migrate_state_to_sqlite.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. migrate_state_to_sqlite.py - 数据迁移脚本 (v5.4)
  5. 将 state.json 中的大数据迁移到 SQLite (index.db):
  6. - entities_v3 → entities 表
  7. - alias_index → aliases 表
  8. - state_changes → state_changes 表
  9. - structured_relationships → relationships 表
  10. 迁移后 state.json 只保留精简数据 (< 5KB):
  11. - progress
  12. - protagonist_state
  13. - strand_tracker
  14. - disambiguation_warnings/pending
  15. - project_info
  16. - world_settings (骨架)
  17. - plot_threads
  18. - relationships (简化版)
  19. - review_checkpoints
  20. 用法:
  21. python -m data_modules.migrate_state_to_sqlite --project-root "D:/wk/斗破苍穹"
  22. python -m data_modules.migrate_state_to_sqlite --project-root "." --dry-run
  23. python -m data_modules.migrate_state_to_sqlite --project-root "." --backup
  24. """
  25. import json
  26. import shutil
  27. from pathlib import Path
  28. from datetime import datetime
  29. from typing import Dict, Any, List
  30. from .config import get_config, DataModulesConfig
  31. from .sql_state_manager import SQLStateManager, EntityData
  32. def migrate_state_to_sqlite(
  33. config: DataModulesConfig,
  34. dry_run: bool = False,
  35. backup: bool = True,
  36. verbose: bool = True
  37. ) -> Dict[str, int]:
  38. """
  39. 执行迁移
  40. 参数:
  41. - config: 配置对象
  42. - dry_run: 只分析不实际写入
  43. - backup: 迁移前备份 state.json
  44. - verbose: 打印详细日志
  45. 返回: 迁移统计
  46. """
  47. stats = {
  48. "entities": 0,
  49. "aliases": 0,
  50. "state_changes": 0,
  51. "relationships": 0,
  52. "skipped": 0,
  53. "errors": 0
  54. }
  55. # 读取 state.json
  56. state_file = config.state_file
  57. if not state_file.exists():
  58. if verbose:
  59. print(f"❌ state.json 不存在: {state_file}")
  60. return stats
  61. with open(state_file, 'r', encoding='utf-8') as f:
  62. state = json.load(f)
  63. if verbose:
  64. file_size = state_file.stat().st_size / 1024
  65. print(f"📄 读取 state.json ({file_size:.1f} KB)")
  66. # 备份
  67. if backup and not dry_run:
  68. backup_file = state_file.with_suffix(f".json.backup-{datetime.now().strftime('%Y%m%d_%H%M%S')}")
  69. shutil.copy(state_file, backup_file)
  70. if verbose:
  71. print(f"💾 已备份到: {backup_file}")
  72. # 初始化 SQLStateManager
  73. sql_manager = SQLStateManager(config)
  74. # 1. 迁移 entities_v3
  75. entities_v3 = state.get("entities_v3", {})
  76. if verbose:
  77. print(f"\n🔄 迁移 entities_v3...")
  78. for entity_type, entities in entities_v3.items():
  79. if not isinstance(entities, dict):
  80. continue
  81. for entity_id, entity_data in entities.items():
  82. if not isinstance(entity_data, dict):
  83. stats["skipped"] += 1
  84. continue
  85. try:
  86. entity = EntityData(
  87. id=entity_id,
  88. type=entity_type,
  89. name=entity_data.get("canonical_name", entity_data.get("name", entity_id)),
  90. tier=entity_data.get("tier", "装饰"),
  91. desc=entity_data.get("desc", ""),
  92. current=entity_data.get("current", {}),
  93. aliases=[], # 别名单独处理
  94. first_appearance=entity_data.get("first_appearance", 0),
  95. last_appearance=entity_data.get("last_appearance", 0),
  96. is_protagonist=entity_data.get("is_protagonist", False)
  97. )
  98. if not dry_run:
  99. sql_manager.upsert_entity(entity)
  100. stats["entities"] += 1
  101. if verbose and stats["entities"] % 50 == 0:
  102. print(f" 已迁移 {stats['entities']} 个实体...")
  103. except Exception as e:
  104. stats["errors"] += 1
  105. if verbose:
  106. print(f" ⚠️ 实体迁移失败 {entity_id}: {e}")
  107. if verbose:
  108. print(f" ✅ 实体: {stats['entities']} 个")
  109. # 2. 迁移 alias_index
  110. alias_index = state.get("alias_index", {})
  111. if verbose:
  112. print(f"\n🔄 迁移 alias_index...")
  113. for alias, entries in alias_index.items():
  114. if not isinstance(entries, list):
  115. continue
  116. for entry in entries:
  117. if not isinstance(entry, dict):
  118. stats["skipped"] += 1
  119. continue
  120. entity_id = entry.get("id")
  121. entity_type = entry.get("type")
  122. if not entity_id or not entity_type:
  123. stats["skipped"] += 1
  124. continue
  125. try:
  126. if not dry_run:
  127. sql_manager.register_alias(alias, entity_id, entity_type)
  128. stats["aliases"] += 1
  129. except Exception as e:
  130. stats["errors"] += 1
  131. if verbose:
  132. print(f" ⚠️ 别名迁移失败 {alias}: {e}")
  133. if verbose:
  134. print(f" ✅ 别名: {stats['aliases']} 个")
  135. # 3. 迁移 state_changes
  136. state_changes = state.get("state_changes", [])
  137. if verbose:
  138. print(f"\n🔄 迁移 state_changes...")
  139. for change in state_changes:
  140. if not isinstance(change, dict):
  141. stats["skipped"] += 1
  142. continue
  143. try:
  144. entity_id = change.get("entity_id", "")
  145. if not entity_id:
  146. stats["skipped"] += 1
  147. continue
  148. if not dry_run:
  149. sql_manager.record_state_change(
  150. entity_id=entity_id,
  151. field=change.get("field", ""),
  152. old_value=change.get("old", change.get("old_value", "")),
  153. new_value=change.get("new", change.get("new_value", "")),
  154. reason=change.get("reason", ""),
  155. chapter=change.get("chapter", 0)
  156. )
  157. stats["state_changes"] += 1
  158. except Exception as e:
  159. stats["errors"] += 1
  160. if verbose:
  161. print(f" ⚠️ 状态变化迁移失败: {e}")
  162. if verbose:
  163. print(f" ✅ 状态变化: {stats['state_changes']} 条")
  164. # 4. 迁移 structured_relationships
  165. relationships = state.get("structured_relationships", [])
  166. if verbose:
  167. print(f"\n🔄 迁移 structured_relationships...")
  168. for rel in relationships:
  169. if not isinstance(rel, dict):
  170. stats["skipped"] += 1
  171. continue
  172. try:
  173. from_entity = rel.get("from", rel.get("from_entity", ""))
  174. to_entity = rel.get("to", rel.get("to_entity", ""))
  175. if not from_entity or not to_entity:
  176. stats["skipped"] += 1
  177. continue
  178. if not dry_run:
  179. sql_manager.upsert_relationship(
  180. from_entity=from_entity,
  181. to_entity=to_entity,
  182. type=rel.get("type", "相识"),
  183. description=rel.get("description", ""),
  184. chapter=rel.get("chapter", 0)
  185. )
  186. stats["relationships"] += 1
  187. except Exception as e:
  188. stats["errors"] += 1
  189. if verbose:
  190. print(f" ⚠️ 关系迁移失败: {e}")
  191. if verbose:
  192. print(f" ✅ 关系: {stats['relationships']} 条")
  193. # 5. 精简 state.json(移除已迁移字段)
  194. if not dry_run:
  195. if verbose:
  196. print(f"\n🔄 精简 state.json...")
  197. # 保留字段
  198. slim_state = {
  199. "project_info": state.get("project_info", {}),
  200. "progress": state.get("progress", {}),
  201. "protagonist_state": state.get("protagonist_state", {}),
  202. "strand_tracker": state.get("strand_tracker", {}),
  203. "world_settings": _slim_world_settings(state.get("world_settings", {})),
  204. "plot_threads": state.get("plot_threads", {}),
  205. "relationships": _slim_relationships(state.get("relationships", {})),
  206. "review_checkpoints": state.get("review_checkpoints", [])[-10:], # 只保留最近10个
  207. "disambiguation_warnings": state.get("disambiguation_warnings", [])[-20:],
  208. "disambiguation_pending": state.get("disambiguation_pending", [])[-10:],
  209. # v5.1 引入标记
  210. "_migrated_to_sqlite": True,
  211. "_migration_timestamp": datetime.now().isoformat()
  212. }
  213. with open(state_file, 'w', encoding='utf-8') as f:
  214. json.dump(slim_state, f, ensure_ascii=False, indent=2)
  215. new_size = state_file.stat().st_size / 1024
  216. if verbose:
  217. print(f" ✅ 精简后: {new_size:.1f} KB")
  218. # 打印统计
  219. if verbose:
  220. print(f"\n" + "=" * 50)
  221. print(f"📊 迁移统计:")
  222. print(f" 实体: {stats['entities']}")
  223. print(f" 别名: {stats['aliases']}")
  224. print(f" 状态变化: {stats['state_changes']}")
  225. print(f" 关系: {stats['relationships']}")
  226. print(f" 跳过: {stats['skipped']}")
  227. print(f" 错误: {stats['errors']}")
  228. if dry_run:
  229. print(f"\n⚠️ 这是 dry-run 模式,实际未写入任何数据")
  230. return stats
  231. def _slim_world_settings(world_settings: Dict) -> Dict:
  232. """精简 world_settings,只保留骨架"""
  233. if not isinstance(world_settings, dict):
  234. return {}
  235. slim = {}
  236. # power_system: 只保留等级名称
  237. power_system = world_settings.get("power_system", [])
  238. if isinstance(power_system, list):
  239. slim["power_system"] = [
  240. p.get("name") if isinstance(p, dict) else p
  241. for p in power_system[:20] # 最多20个等级
  242. ]
  243. # factions: 只保留名称和简述
  244. factions = world_settings.get("factions", [])
  245. if isinstance(factions, list):
  246. slim["factions"] = [
  247. {"name": f.get("name"), "type": f.get("type")}
  248. if isinstance(f, dict) else f
  249. for f in factions[:30] # 最多30个势力
  250. ]
  251. # locations: 只保留名称
  252. locations = world_settings.get("locations", [])
  253. if isinstance(locations, list):
  254. slim["locations"] = [
  255. loc.get("name") if isinstance(loc, dict) else loc
  256. for loc in locations[:50] # 最多50个地点
  257. ]
  258. return slim
  259. def _slim_relationships(relationships: Dict) -> Dict:
  260. """精简 relationships,只保留核心关系"""
  261. if not isinstance(relationships, dict):
  262. return {}
  263. # 只保留 relationships 字典本身,不做额外精简
  264. # 因为这个字段本身应该比较小
  265. return relationships
  266. def main():
  267. import argparse
  268. from .cli_output import print_success, print_error
  269. from .index_manager import IndexManager
  270. parser = argparse.ArgumentParser(description="迁移 state.json 到 SQLite (v5.4)")
  271. parser.add_argument("--project-root", type=str, required=True, help="项目根目录")
  272. parser.add_argument("--dry-run", action="store_true", help="只分析不实际写入")
  273. parser.add_argument("--backup", action="store_true", default=True, help="迁移前备份")
  274. parser.add_argument("--no-backup", action="store_true", help="不备份")
  275. parser.add_argument("--quiet", action="store_true", help="安静模式")
  276. args = parser.parse_args()
  277. # 允许传入“工作区根目录”,统一解析到真正的 book project_root(必须包含 .webnovel/state.json)
  278. from project_locator import resolve_project_root
  279. resolved_root = resolve_project_root(args.project_root)
  280. config = DataModulesConfig.from_project_root(resolved_root)
  281. backup = not args.no_backup
  282. logger = IndexManager(config)
  283. tool_name = "migrate_state_to_sqlite"
  284. try:
  285. stats = migrate_state_to_sqlite(
  286. config=config,
  287. dry_run=args.dry_run,
  288. backup=backup,
  289. verbose=False,
  290. )
  291. except Exception as exc:
  292. print_error("MIGRATE_FAILED", str(exc), suggestion="检查 state.json 与 index.db 权限")
  293. try:
  294. logger.log_tool_call(tool_name, False, error_code="MIGRATE_FAILED", error_message=str(exc))
  295. except Exception:
  296. pass
  297. raise SystemExit(1)
  298. if stats.get("errors", 0) > 0:
  299. print_error("MIGRATE_ERRORS", "迁移出现错误", details=stats)
  300. try:
  301. logger.log_tool_call(tool_name, False, error_code="MIGRATE_ERRORS", error_message="迁移出现错误")
  302. except Exception:
  303. pass
  304. raise SystemExit(1)
  305. print_success({"project": str(config.project_root), **stats}, message="migrated")
  306. try:
  307. logger.log_tool_call(tool_name, True)
  308. except Exception:
  309. pass
  310. if __name__ == "__main__":
  311. main()