state_manager.py 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. State Manager - 状态管理模块 (v5.4)
  5. 管理 state.json 的读写操作:
  6. - 实体状态管理
  7. - 进度追踪
  8. - 关系记录
  9. v5.1 变更(v5.4 沿用):
  10. - 集成 SQLStateManager,同步写入 SQLite (index.db)
  11. - state.json 保留精简数据,大数据自动迁移到 SQLite
  12. """
  13. import json
  14. import sys
  15. from copy import deepcopy
  16. from pathlib import Path
  17. from typing import Dict, List, Optional, Any
  18. from dataclasses import dataclass, field, asdict
  19. from datetime import datetime
  20. import filelock
  21. from .config import get_config
  22. try:
  23. # 当 scripts 目录在 sys.path 中(常见:从 scripts/ 运行)
  24. from security_utils import atomic_write_json, read_json_safe
  25. except ImportError: # pragma: no cover
  26. # 当以 `python -m scripts.data_modules...` 从仓库根目录运行
  27. from scripts.security_utils import atomic_write_json, read_json_safe
  28. @dataclass
  29. class EntityState:
  30. """实体状态"""
  31. id: str
  32. name: str
  33. type: str # 角色/地点/物品/势力
  34. tier: str = "装饰" # 核心/重要/次要/装饰
  35. aliases: List[str] = field(default_factory=list)
  36. attributes: Dict[str, Any] = field(default_factory=dict)
  37. first_appearance: int = 0
  38. last_appearance: int = 0
  39. @dataclass
  40. class Relationship:
  41. """实体关系"""
  42. from_entity: str
  43. to_entity: str
  44. type: str
  45. description: str
  46. chapter: int
  47. @dataclass
  48. class StateChange:
  49. """状态变化记录"""
  50. entity_id: str
  51. field: str
  52. old_value: Any
  53. new_value: Any
  54. reason: str
  55. chapter: int
  56. timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
  57. @dataclass
  58. class _EntityPatch:
  59. """待写入的实体增量补丁(用于锁内合并)"""
  60. entity_type: str
  61. entity_id: str
  62. replace: bool = False
  63. base_entity: Optional[Dict[str, Any]] = None # 新建实体时的完整快照(用于填充缺失字段)
  64. top_updates: Dict[str, Any] = field(default_factory=dict)
  65. current_updates: Dict[str, Any] = field(default_factory=dict)
  66. appearance_chapter: Optional[int] = None
  67. class StateManager:
  68. """状态管理器(v5.1 entities_v3 格式 + SQLite 同步,v5.4 沿用)"""
  69. # v5.0 引入的实体类型
  70. ENTITY_TYPES = ["角色", "地点", "物品", "势力", "招式"]
  71. def __init__(self, config=None, enable_sqlite_sync: bool = True):
  72. """
  73. 初始化状态管理器
  74. 参数:
  75. - config: 配置对象
  76. - enable_sqlite_sync: 是否启用 SQLite 同步 (默认 True)
  77. """
  78. self.config = config or get_config()
  79. self._state: Dict[str, Any] = {}
  80. # 与 security_utils.atomic_write_json 保持一致:state.json.lock
  81. self._lock_path = self.config.state_file.with_suffix(self.config.state_file.suffix + ".lock")
  82. # v5.1 引入: SQLite 同步
  83. self._enable_sqlite_sync = enable_sqlite_sync
  84. self._sql_state_manager = None
  85. if enable_sqlite_sync:
  86. try:
  87. from .sql_state_manager import SQLStateManager
  88. self._sql_state_manager = SQLStateManager(self.config)
  89. except ImportError:
  90. pass # SQLStateManager 不可用时静默降级
  91. # 待写入的增量(锁内重读 + 合并 + 写入)
  92. self._pending_entity_patches: Dict[tuple[str, str], _EntityPatch] = {}
  93. self._pending_alias_entries: Dict[str, List[Dict[str, str]]] = {}
  94. self._pending_state_changes: List[Dict[str, Any]] = []
  95. self._pending_structured_relationships: List[Dict[str, Any]] = []
  96. self._pending_disambiguation_warnings: List[Dict[str, Any]] = []
  97. self._pending_disambiguation_pending: List[Dict[str, Any]] = []
  98. self._pending_progress_chapter: Optional[int] = None
  99. self._pending_progress_words_delta: int = 0
  100. self._pending_chapter_meta: Dict[str, Any] = {}
  101. # v5.1 引入: 缓存待同步到 SQLite 的数据
  102. self._pending_sqlite_data: Dict[str, Any] = {
  103. "entities_appeared": [],
  104. "entities_new": [],
  105. "state_changes": [],
  106. "relationships_new": [],
  107. "chapter": None
  108. }
  109. self._load_state()
  110. def _now_progress_timestamp(self) -> str:
  111. return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  112. def _ensure_state_schema(self, state: Dict[str, Any]) -> Dict[str, Any]:
  113. """确保 state.json 具备运行所需的关键字段(尽量不破坏既有数据)。"""
  114. if not isinstance(state, dict):
  115. state = {}
  116. state.setdefault("project_info", {})
  117. state.setdefault("progress", {})
  118. state.setdefault("protagonist_state", {})
  119. # relationships: 旧版本可能是 list(实体关系),v5.0 运行态用 dict(人物关系/重要关系)
  120. relationships = state.get("relationships")
  121. if isinstance(relationships, list):
  122. state.setdefault("structured_relationships", [])
  123. if isinstance(state.get("structured_relationships"), list):
  124. state["structured_relationships"].extend(relationships)
  125. state["relationships"] = {}
  126. elif not isinstance(relationships, dict):
  127. state["relationships"] = {}
  128. state.setdefault("world_settings", {"power_system": [], "factions": [], "locations": []})
  129. state.setdefault("plot_threads", {"active_threads": [], "foreshadowing": []})
  130. state.setdefault("review_checkpoints", [])
  131. state.setdefault("chapter_meta", {})
  132. state.setdefault(
  133. "strand_tracker",
  134. {
  135. "last_quest_chapter": 0,
  136. "last_fire_chapter": 0,
  137. "last_constellation_chapter": 0,
  138. "current_dominant": "quest",
  139. "chapters_since_switch": 0,
  140. "history": [],
  141. },
  142. )
  143. entities_v3 = state.get("entities_v3")
  144. # v5.1 引入: entities_v3, alias_index, state_changes, structured_relationships 已迁移到 index.db
  145. # 不再在 state.json 中初始化或维护这些字段
  146. if not isinstance(state.get("disambiguation_warnings"), list):
  147. state["disambiguation_warnings"] = []
  148. if not isinstance(state.get("disambiguation_pending"), list):
  149. state["disambiguation_pending"] = []
  150. # progress 基础字段
  151. progress = state["progress"]
  152. if not isinstance(progress, dict):
  153. progress = {}
  154. state["progress"] = progress
  155. progress.setdefault("current_chapter", 0)
  156. progress.setdefault("total_words", 0)
  157. progress.setdefault("last_updated", self._now_progress_timestamp())
  158. return state
  159. def _load_state(self):
  160. """加载状态文件"""
  161. if self.config.state_file.exists():
  162. self._state = read_json_safe(self.config.state_file, default={})
  163. self._state = self._ensure_state_schema(self._state)
  164. else:
  165. self._state = self._ensure_state_schema({})
  166. def save_state(self):
  167. """
  168. 保存状态文件(锁内重读 + 合并 + 原子写入)。
  169. 解决多 Agent 并行下的“读-改-写覆盖”风险:
  170. - 获取锁
  171. - 重新读取磁盘最新 state.json
  172. - 仅合并本实例产生的增量(pending_*)
  173. - 原子化写入
  174. """
  175. # 无增量时不写入,避免无意义覆盖
  176. has_pending = any(
  177. [
  178. self._pending_entity_patches,
  179. self._pending_alias_entries,
  180. self._pending_state_changes,
  181. self._pending_structured_relationships,
  182. self._pending_disambiguation_warnings,
  183. self._pending_disambiguation_pending,
  184. self._pending_chapter_meta,
  185. self._pending_progress_chapter is not None,
  186. self._pending_progress_words_delta != 0,
  187. ]
  188. )
  189. if not has_pending:
  190. return
  191. self.config.ensure_dirs()
  192. lock = filelock.FileLock(str(self._lock_path), timeout=10)
  193. try:
  194. with lock:
  195. disk_state = read_json_safe(self.config.state_file, default={})
  196. disk_state = self._ensure_state_schema(disk_state)
  197. # progress(合并为 max(chapter) + words_delta 累加)
  198. if self._pending_progress_chapter is not None or self._pending_progress_words_delta != 0:
  199. progress = disk_state.get("progress", {})
  200. if not isinstance(progress, dict):
  201. progress = {}
  202. disk_state["progress"] = progress
  203. try:
  204. current_chapter = int(progress.get("current_chapter", 0) or 0)
  205. except (TypeError, ValueError):
  206. current_chapter = 0
  207. if self._pending_progress_chapter is not None:
  208. progress["current_chapter"] = max(current_chapter, int(self._pending_progress_chapter))
  209. if self._pending_progress_words_delta:
  210. try:
  211. total_words = int(progress.get("total_words", 0) or 0)
  212. except (TypeError, ValueError):
  213. total_words = 0
  214. progress["total_words"] = total_words + int(self._pending_progress_words_delta)
  215. progress["last_updated"] = self._now_progress_timestamp()
  216. # v5.1 引入: 强制使用 SQLite 模式,移除大数据字段
  217. # 确保 state.json 中不存在这些膨胀字段
  218. for field in ["entities_v3", "alias_index", "state_changes", "structured_relationships"]:
  219. disk_state.pop(field, None)
  220. # 标记已迁移
  221. disk_state["_migrated_to_sqlite"] = True
  222. # disambiguation_warnings(追加去重 + 截断)
  223. if self._pending_disambiguation_warnings:
  224. warnings_list = disk_state.get("disambiguation_warnings")
  225. if not isinstance(warnings_list, list):
  226. warnings_list = []
  227. disk_state["disambiguation_warnings"] = warnings_list
  228. def _warn_key(w: Dict[str, Any]) -> tuple:
  229. return (
  230. w.get("chapter"),
  231. w.get("mention"),
  232. w.get("chosen_id"),
  233. w.get("confidence"),
  234. )
  235. existing_keys = {_warn_key(w) for w in warnings_list if isinstance(w, dict)}
  236. for w in self._pending_disambiguation_warnings:
  237. if not isinstance(w, dict):
  238. continue
  239. k = _warn_key(w)
  240. if k in existing_keys:
  241. continue
  242. warnings_list.append(w)
  243. existing_keys.add(k)
  244. # 只保留最近 N 条,避免文件无限增长
  245. max_keep = self.config.max_disambiguation_warnings
  246. if len(warnings_list) > max_keep:
  247. disk_state["disambiguation_warnings"] = warnings_list[-max_keep:]
  248. # disambiguation_pending(追加去重 + 截断)
  249. if self._pending_disambiguation_pending:
  250. pending_list = disk_state.get("disambiguation_pending")
  251. if not isinstance(pending_list, list):
  252. pending_list = []
  253. disk_state["disambiguation_pending"] = pending_list
  254. def _pending_key(w: Dict[str, Any]) -> tuple:
  255. return (
  256. w.get("chapter"),
  257. w.get("mention"),
  258. w.get("suggested_id"),
  259. w.get("confidence"),
  260. )
  261. existing_keys = {_pending_key(w) for w in pending_list if isinstance(w, dict)}
  262. for w in self._pending_disambiguation_pending:
  263. if not isinstance(w, dict):
  264. continue
  265. k = _pending_key(w)
  266. if k in existing_keys:
  267. continue
  268. pending_list.append(w)
  269. existing_keys.add(k)
  270. max_keep = self.config.max_disambiguation_pending
  271. if len(pending_list) > max_keep:
  272. disk_state["disambiguation_pending"] = pending_list[-max_keep:]
  273. # chapter_meta(新增:按章节号覆盖写入)
  274. if self._pending_chapter_meta:
  275. chapter_meta = disk_state.get("chapter_meta")
  276. if not isinstance(chapter_meta, dict):
  277. chapter_meta = {}
  278. disk_state["chapter_meta"] = chapter_meta
  279. chapter_meta.update(self._pending_chapter_meta)
  280. # 原子写入(锁已持有,不再二次加锁)
  281. atomic_write_json(self.config.state_file, disk_state, use_lock=False, backup=True)
  282. # v5.1 引入: 同步到 SQLite(失败时保留 pending 以便重试)
  283. sqlite_pending_snapshot = self._snapshot_sqlite_pending()
  284. sqlite_sync_ok = self._sync_to_sqlite()
  285. # 同步内存为磁盘最新快照
  286. self._state = disk_state
  287. # state.json 侧 pending 已写盘,直接清空
  288. self._pending_disambiguation_warnings.clear()
  289. self._pending_disambiguation_pending.clear()
  290. self._pending_chapter_meta.clear()
  291. self._pending_progress_chapter = None
  292. self._pending_progress_words_delta = 0
  293. # SQLite 侧 pending:成功后清空,失败则恢复快照(避免静默丢数据)
  294. if sqlite_sync_ok:
  295. self._pending_entity_patches.clear()
  296. self._pending_alias_entries.clear()
  297. self._pending_state_changes.clear()
  298. self._pending_structured_relationships.clear()
  299. self._clear_pending_sqlite_data()
  300. else:
  301. self._restore_sqlite_pending(sqlite_pending_snapshot)
  302. except filelock.Timeout:
  303. raise RuntimeError("无法获取 state.json 文件锁,请稍后重试")
  304. def _sync_to_sqlite(self) -> bool:
  305. """同步待处理数据到 SQLite(v5.1 引入,v5.4 沿用)"""
  306. if not self._sql_state_manager:
  307. return True
  308. # 方式1: 通过 process_chapter_result 收集的数据
  309. sqlite_data = self._pending_sqlite_data
  310. chapter = sqlite_data.get("chapter")
  311. # 记录已处理的 (entity_id, chapter) 组合,避免重复写入 appearances
  312. processed_appearances = set()
  313. if chapter is not None:
  314. try:
  315. self._sql_state_manager.process_chapter_entities(
  316. chapter=chapter,
  317. entities_appeared=sqlite_data.get("entities_appeared", []),
  318. entities_new=sqlite_data.get("entities_new", []),
  319. state_changes=sqlite_data.get("state_changes", []),
  320. relationships_new=sqlite_data.get("relationships_new", [])
  321. )
  322. # 标记已处理的出场记录
  323. for entity in sqlite_data.get("entities_appeared", []):
  324. if entity.get("id"):
  325. processed_appearances.add((entity.get("id"), chapter))
  326. for entity in sqlite_data.get("entities_new", []):
  327. eid = entity.get("suggested_id") or entity.get("id")
  328. if eid:
  329. processed_appearances.add((eid, chapter))
  330. except Exception as exc:
  331. print(f"[WARNING] SQLite sync failed (process_chapter_entities): {exc}", file=sys.stderr)
  332. return False
  333. # 方式2: 使用 add_entity/update_entity 收集的增量数据。
  334. # 数据缓存在 _pending_entity_patches 等变量中。
  335. return self._sync_pending_patches_to_sqlite(processed_appearances)
  336. def _sync_pending_patches_to_sqlite(self, processed_appearances: set = None) -> bool:
  337. """同步 _pending_entity_patches 等到 SQLite(v5.1 引入,v5.4 沿用)
  338. Args:
  339. processed_appearances: 已通过 process_chapter_entities 处理的 (entity_id, chapter) 集合,
  340. 用于避免重复写入 appearances 表(防止覆盖 mentions)
  341. """
  342. if not self._sql_state_manager:
  343. return True
  344. if processed_appearances is None:
  345. processed_appearances = set()
  346. # 元数据字段(不应写入 current_json)
  347. METADATA_FIELDS = {"canonical_name", "tier", "desc", "is_protagonist", "is_archived"}
  348. try:
  349. from .sql_state_manager import EntityData
  350. from .index_manager import EntityMeta
  351. # 同步实体补丁
  352. for (entity_type, entity_id), patch in self._pending_entity_patches.items():
  353. if patch.base_entity:
  354. # 新实体
  355. entity_data = EntityData(
  356. id=entity_id,
  357. type=entity_type,
  358. name=patch.base_entity.get("canonical_name", entity_id),
  359. tier=patch.base_entity.get("tier", "装饰"),
  360. desc=patch.base_entity.get("desc", ""),
  361. current=patch.base_entity.get("current", {}),
  362. aliases=[],
  363. first_appearance=patch.base_entity.get("first_appearance", 0),
  364. last_appearance=patch.base_entity.get("last_appearance", 0),
  365. is_protagonist=patch.base_entity.get("is_protagonist", False)
  366. )
  367. self._sql_state_manager.upsert_entity(entity_data)
  368. # 记录首次出场(跳过已处理的,避免覆盖 mentions)
  369. if patch.appearance_chapter is not None:
  370. if (entity_id, patch.appearance_chapter) not in processed_appearances:
  371. self._sql_state_manager._index_manager.record_appearance(
  372. entity_id=entity_id,
  373. chapter=patch.appearance_chapter,
  374. mentions=[entity_data.name],
  375. confidence=1.0,
  376. skip_if_exists=True # 关键:不覆盖已有记录
  377. )
  378. else:
  379. # 更新现有实体
  380. has_metadata_updates = bool(patch.top_updates and
  381. any(k in METADATA_FIELDS for k in patch.top_updates))
  382. # 非元数据的 top_updates 应该当作 current 更新
  383. # 例如:realm, layer, location 等状态字段
  384. non_metadata_top_updates = {
  385. k: v for k, v in patch.top_updates.items()
  386. if k not in METADATA_FIELDS
  387. } if patch.top_updates else {}
  388. # 合并 current_updates 和非元数据的 top_updates
  389. effective_current_updates = {**non_metadata_top_updates}
  390. if patch.current_updates:
  391. effective_current_updates.update(patch.current_updates)
  392. if has_metadata_updates:
  393. # 有元数据更新:使用 upsert_entity(update_metadata=True)
  394. existing = self._sql_state_manager.get_entity(entity_id)
  395. if existing:
  396. # 合并 current
  397. current = existing.get("current_json", {})
  398. if isinstance(current, str):
  399. import json
  400. current = json.loads(current) if current else {}
  401. if effective_current_updates:
  402. current.update(effective_current_updates)
  403. new_canonical_name = patch.top_updates.get("canonical_name")
  404. old_canonical_name = existing.get("canonical_name", "")
  405. entity_meta = EntityMeta(
  406. id=entity_id,
  407. type=existing.get("type", entity_type),
  408. canonical_name=new_canonical_name or old_canonical_name,
  409. tier=patch.top_updates.get("tier", existing.get("tier", "装饰")),
  410. desc=patch.top_updates.get("desc", existing.get("desc", "")),
  411. current=current,
  412. first_appearance=existing.get("first_appearance", 0),
  413. last_appearance=patch.appearance_chapter or existing.get("last_appearance", 0),
  414. is_protagonist=patch.top_updates.get("is_protagonist", existing.get("is_protagonist", False)),
  415. is_archived=patch.top_updates.get("is_archived", existing.get("is_archived", False))
  416. )
  417. self._sql_state_manager._index_manager.upsert_entity(entity_meta, update_metadata=True)
  418. # 如果 canonical_name 改名,自动注册新名字为 alias
  419. if new_canonical_name and new_canonical_name != old_canonical_name:
  420. self._sql_state_manager.register_alias(
  421. new_canonical_name, entity_id, existing.get("type", entity_type)
  422. )
  423. elif effective_current_updates:
  424. # 只有 current 更新(包括非元数据的 top_updates)
  425. self._sql_state_manager.update_entity_current(entity_id, effective_current_updates)
  426. # 更新 last_appearance 并记录出场
  427. if patch.appearance_chapter is not None:
  428. self._sql_state_manager._update_last_appearance(entity_id, patch.appearance_chapter)
  429. # 补充 appearances 记录
  430. # 使用 skip_if_exists=True 避免覆盖已有记录的 mentions
  431. if (entity_id, patch.appearance_chapter) not in processed_appearances:
  432. self._sql_state_manager._index_manager.record_appearance(
  433. entity_id=entity_id,
  434. chapter=patch.appearance_chapter,
  435. mentions=[],
  436. confidence=1.0,
  437. skip_if_exists=True # 关键:不覆盖已有记录
  438. )
  439. # 同步别名
  440. for alias, entries in self._pending_alias_entries.items():
  441. for entry in entries:
  442. entity_type = entry.get("type")
  443. entity_id = entry.get("id")
  444. if entity_type and entity_id:
  445. self._sql_state_manager.register_alias(alias, entity_id, entity_type)
  446. # 同步状态变化
  447. for change in self._pending_state_changes:
  448. self._sql_state_manager.record_state_change(
  449. entity_id=change.get("entity_id", ""),
  450. field=change.get("field", ""),
  451. old_value=change.get("old", change.get("old_value", "")),
  452. new_value=change.get("new", change.get("new_value", "")),
  453. reason=change.get("reason", ""),
  454. chapter=change.get("chapter", 0)
  455. )
  456. # 同步关系
  457. for rel in self._pending_structured_relationships:
  458. self._sql_state_manager.upsert_relationship(
  459. from_entity=rel.get("from_entity", ""),
  460. to_entity=rel.get("to_entity", ""),
  461. type=rel.get("type", "相识"),
  462. description=rel.get("description", ""),
  463. chapter=rel.get("chapter", 0)
  464. )
  465. return True
  466. except Exception as e:
  467. # SQLite 同步失败时记录警告(不中断主流程)
  468. print(f"[WARNING] SQLite sync failed: {e}", file=sys.stderr)
  469. return False
  470. def _snapshot_sqlite_pending(self) -> Dict[str, Any]:
  471. """抓取 SQLite 侧 pending 快照,用于同步失败回滚内存队列。"""
  472. return {
  473. "entity_patches": deepcopy(self._pending_entity_patches),
  474. "alias_entries": deepcopy(self._pending_alias_entries),
  475. "state_changes": deepcopy(self._pending_state_changes),
  476. "structured_relationships": deepcopy(self._pending_structured_relationships),
  477. "sqlite_data": deepcopy(self._pending_sqlite_data),
  478. }
  479. def _restore_sqlite_pending(self, snapshot: Dict[str, Any]) -> None:
  480. """恢复 SQLite 侧 pending 快照,避免同步失败后数据静默丢失。"""
  481. self._pending_entity_patches = snapshot.get("entity_patches", {})
  482. self._pending_alias_entries = snapshot.get("alias_entries", {})
  483. self._pending_state_changes = snapshot.get("state_changes", [])
  484. self._pending_structured_relationships = snapshot.get("structured_relationships", [])
  485. self._pending_sqlite_data = snapshot.get("sqlite_data", {
  486. "entities_appeared": [],
  487. "entities_new": [],
  488. "state_changes": [],
  489. "relationships_new": [],
  490. "chapter": None,
  491. })
  492. def _clear_pending_sqlite_data(self):
  493. """清空待同步的 SQLite 数据"""
  494. self._pending_sqlite_data = {
  495. "entities_appeared": [],
  496. "entities_new": [],
  497. "state_changes": [],
  498. "relationships_new": [],
  499. "chapter": None
  500. }
  501. # ==================== 进度管理 ====================
  502. def get_current_chapter(self) -> int:
  503. """获取当前章节号"""
  504. return self._state.get("progress", {}).get("current_chapter", 0)
  505. def update_progress(self, chapter: int, words: int = 0):
  506. """更新进度"""
  507. if "progress" not in self._state:
  508. self._state["progress"] = {}
  509. self._state["progress"]["current_chapter"] = chapter
  510. if words > 0:
  511. total = self._state["progress"].get("total_words", 0)
  512. self._state["progress"]["total_words"] = total + words
  513. # 记录增量:锁内合并时用 max(chapter) + words_delta 累加
  514. if self._pending_progress_chapter is None:
  515. self._pending_progress_chapter = chapter
  516. else:
  517. self._pending_progress_chapter = max(self._pending_progress_chapter, chapter)
  518. if words > 0:
  519. self._pending_progress_words_delta += int(words)
  520. # ==================== 实体管理 (v5.1 SQLite-first) ====================
  521. def get_entity(self, entity_id: str, entity_type: str = None) -> Optional[Dict]:
  522. """获取实体(v5.1 引入:优先从 SQLite 读取)"""
  523. # v5.1 引入: 优先从 SQLite 读取
  524. if self._sql_state_manager:
  525. entity = self._sql_state_manager._index_manager.get_entity(entity_id)
  526. if entity:
  527. return entity
  528. # 回退到内存 state (兼容未迁移场景)
  529. entities_v3 = self._state.get("entities_v3", {})
  530. if entity_type:
  531. return entities_v3.get(entity_type, {}).get(entity_id)
  532. # 遍历所有类型查找
  533. for type_name, entities in entities_v3.items():
  534. if entity_id in entities:
  535. return entities[entity_id]
  536. return None
  537. def get_entity_type(self, entity_id: str) -> Optional[str]:
  538. """获取实体所属类型"""
  539. # v5.1 引入: 优先从 SQLite 读取
  540. if self._sql_state_manager:
  541. entity = self._sql_state_manager._index_manager.get_entity(entity_id)
  542. if entity:
  543. return entity.get("type")
  544. # 回退到内存 state
  545. for type_name, entities in self._state.get("entities_v3", {}).items():
  546. if entity_id in entities:
  547. return type_name
  548. return None
  549. def get_all_entities(self) -> Dict[str, Dict]:
  550. """获取所有实体(扁平化视图)"""
  551. # v5.1 引入: 优先从 SQLite 读取
  552. if self._sql_state_manager:
  553. result = {}
  554. for entity_type in self.ENTITY_TYPES:
  555. entities = self._sql_state_manager._index_manager.get_entities_by_type(entity_type)
  556. for e in entities:
  557. eid = e.get("id")
  558. if eid:
  559. result[eid] = {**e, "type": entity_type}
  560. if result:
  561. return result
  562. # 回退到内存 state
  563. result = {}
  564. for type_name, entities in self._state.get("entities_v3", {}).items():
  565. for eid, e in entities.items():
  566. result[eid] = {**e, "type": type_name}
  567. return result
  568. def get_entities_by_type(self, entity_type: str) -> Dict[str, Dict]:
  569. """按类型获取实体"""
  570. # v5.1 引入: 优先从 SQLite 读取
  571. if self._sql_state_manager:
  572. entities = self._sql_state_manager._index_manager.get_entities_by_type(entity_type)
  573. if entities:
  574. return {e.get("id"): e for e in entities if e.get("id")}
  575. # 回退到内存 state
  576. return self._state.get("entities_v3", {}).get(entity_type, {})
  577. def get_entities_by_tier(self, tier: str) -> Dict[str, Dict]:
  578. """按层级获取实体"""
  579. # v5.1 引入: 优先从 SQLite 读取
  580. if self._sql_state_manager:
  581. result = {}
  582. for entity_type in self.ENTITY_TYPES:
  583. entities = self._sql_state_manager._index_manager.get_entities_by_tier(tier)
  584. for e in entities:
  585. eid = e.get("id")
  586. if eid and e.get("type") == entity_type:
  587. result[eid] = {**e, "type": entity_type}
  588. if result:
  589. return result
  590. # 回退到内存 state
  591. result = {}
  592. for type_name, entities in self._state.get("entities_v3", {}).items():
  593. for eid, e in entities.items():
  594. if e.get("tier") == tier:
  595. result[eid] = {**e, "type": type_name}
  596. return result
  597. def add_entity(self, entity: EntityState) -> bool:
  598. """添加新实体(v5.0 entities_v3 格式,v5.4 沿用)"""
  599. entity_type = entity.type
  600. if entity_type not in self.ENTITY_TYPES:
  601. entity_type = "角色"
  602. if "entities_v3" not in self._state:
  603. self._state["entities_v3"] = {t: {} for t in self.ENTITY_TYPES}
  604. if entity_type not in self._state["entities_v3"]:
  605. self._state["entities_v3"][entity_type] = {}
  606. # 检查是否已存在
  607. if entity.id in self._state["entities_v3"][entity_type]:
  608. return False
  609. # 转换为 v3 格式
  610. v3_entity = {
  611. "canonical_name": entity.name,
  612. "tier": entity.tier,
  613. "desc": "",
  614. "current": entity.attributes,
  615. "first_appearance": entity.first_appearance,
  616. "last_appearance": entity.last_appearance,
  617. "history": []
  618. }
  619. self._state["entities_v3"][entity_type][entity.id] = v3_entity
  620. # 记录实体补丁(新建:仅填充缺失字段,避免覆盖并发写入)
  621. patch = self._pending_entity_patches.get((entity_type, entity.id))
  622. if patch is None:
  623. patch = _EntityPatch(entity_type=entity_type, entity_id=entity.id)
  624. self._pending_entity_patches[(entity_type, entity.id)] = patch
  625. patch.replace = True
  626. patch.base_entity = v3_entity
  627. # v5.1 引入: 注册别名到 index.db (通过 SQLStateManager)
  628. if self._sql_state_manager:
  629. self._sql_state_manager._index_manager.register_alias(entity.name, entity.id, entity_type)
  630. for alias in entity.aliases:
  631. if alias:
  632. self._sql_state_manager._index_manager.register_alias(alias, entity.id, entity_type)
  633. return True
  634. def _register_alias_internal(self, entity_id: str, entity_type: str, alias: str):
  635. """内部方法:注册别名到 index.db(v5.1 引入)"""
  636. if not alias:
  637. return
  638. # v5.1 引入: 直接写入 SQLite
  639. if self._sql_state_manager:
  640. self._sql_state_manager._index_manager.register_alias(alias, entity_id, entity_type)
  641. def update_entity(self, entity_id: str, updates: Dict[str, Any], entity_type: str = None) -> bool:
  642. """更新实体属性(v5.0 引入,v5.4 沿用)"""
  643. # 查找实体
  644. if entity_type:
  645. if entity_id not in self._state.get("entities_v3", {}).get(entity_type, {}):
  646. return False
  647. entity = self._state["entities_v3"][entity_type][entity_id]
  648. else:
  649. entity_type = self.get_entity_type(entity_id)
  650. if not entity_type:
  651. return False
  652. entity = self._state["entities_v3"][entity_type][entity_id]
  653. for key, value in updates.items():
  654. if key == "attributes" and isinstance(value, dict):
  655. # v5.0 引入: attributes 存在 current 字段
  656. if "current" not in entity:
  657. entity["current"] = {}
  658. entity["current"].update(value)
  659. # 记录补丁(current 增量)
  660. patch = self._pending_entity_patches.get((entity_type, entity_id))
  661. if patch is None:
  662. patch = _EntityPatch(entity_type=entity_type, entity_id=entity_id)
  663. self._pending_entity_patches[(entity_type, entity_id)] = patch
  664. patch.current_updates.update(value)
  665. elif key == "current" and isinstance(value, dict):
  666. if "current" not in entity:
  667. entity["current"] = {}
  668. entity["current"].update(value)
  669. patch = self._pending_entity_patches.get((entity_type, entity_id))
  670. if patch is None:
  671. patch = _EntityPatch(entity_type=entity_type, entity_id=entity_id)
  672. self._pending_entity_patches[(entity_type, entity_id)] = patch
  673. patch.current_updates.update(value)
  674. else:
  675. entity[key] = value
  676. patch = self._pending_entity_patches.get((entity_type, entity_id))
  677. if patch is None:
  678. patch = _EntityPatch(entity_type=entity_type, entity_id=entity_id)
  679. self._pending_entity_patches[(entity_type, entity_id)] = patch
  680. patch.top_updates[key] = value
  681. return True
  682. def update_entity_appearance(self, entity_id: str, chapter: int, entity_type: str = None):
  683. """更新实体出场章节"""
  684. if not entity_type:
  685. entity_type = self.get_entity_type(entity_id)
  686. if not entity_type:
  687. return
  688. entities_v3 = self._state.get("entities_v3")
  689. if not isinstance(entities_v3, dict):
  690. entities_v3 = {t: {} for t in self.ENTITY_TYPES}
  691. self._state["entities_v3"] = entities_v3
  692. entities_v3.setdefault(entity_type, {})
  693. entity = entities_v3[entity_type].get(entity_id)
  694. if entity:
  695. if entity.get("first_appearance", 0) == 0:
  696. entity["first_appearance"] = chapter
  697. entity["last_appearance"] = chapter
  698. # 记录补丁:锁内应用 first=min(non-zero), last=max
  699. patch = self._pending_entity_patches.get((entity_type, entity_id))
  700. if patch is None:
  701. patch = _EntityPatch(entity_type=entity_type, entity_id=entity_id)
  702. self._pending_entity_patches[(entity_type, entity_id)] = patch
  703. if patch.appearance_chapter is None:
  704. patch.appearance_chapter = chapter
  705. else:
  706. patch.appearance_chapter = max(int(patch.appearance_chapter), int(chapter))
  707. # ==================== 状态变化记录 ====================
  708. def record_state_change(
  709. self,
  710. entity_id: str,
  711. field: str,
  712. old_value: Any,
  713. new_value: Any,
  714. reason: str,
  715. chapter: int
  716. ):
  717. """记录状态变化"""
  718. if "state_changes" not in self._state:
  719. self._state["state_changes"] = []
  720. change = StateChange(
  721. entity_id=entity_id,
  722. field=field,
  723. old_value=old_value,
  724. new_value=new_value,
  725. reason=reason,
  726. chapter=chapter
  727. )
  728. change_dict = asdict(change)
  729. self._state["state_changes"].append(change_dict)
  730. self._pending_state_changes.append(change_dict)
  731. # 同时更新实体属性
  732. self.update_entity(entity_id, {"attributes": {field: new_value}})
  733. def get_state_changes(self, entity_id: Optional[str] = None) -> List[Dict]:
  734. """获取状态变化历史"""
  735. changes = self._state.get("state_changes", [])
  736. if entity_id:
  737. changes = [c for c in changes if c.get("entity_id") == entity_id]
  738. return changes
  739. # ==================== 关系管理 ====================
  740. def add_relationship(
  741. self,
  742. from_entity: str,
  743. to_entity: str,
  744. rel_type: str,
  745. description: str,
  746. chapter: int
  747. ):
  748. """添加关系"""
  749. rel = Relationship(
  750. from_entity=from_entity,
  751. to_entity=to_entity,
  752. type=rel_type,
  753. description=description,
  754. chapter=chapter
  755. )
  756. # v5.0 引入: 实体关系存入 structured_relationships,避免与 relationships(人物关系字典) 冲突
  757. if "structured_relationships" not in self._state:
  758. self._state["structured_relationships"] = []
  759. rel_dict = asdict(rel)
  760. self._state["structured_relationships"].append(rel_dict)
  761. self._pending_structured_relationships.append(rel_dict)
  762. def get_relationships(self, entity_id: Optional[str] = None) -> List[Dict]:
  763. """获取关系列表"""
  764. rels = self._state.get("structured_relationships", [])
  765. if entity_id:
  766. rels = [
  767. r for r in rels
  768. if r.get("from_entity") == entity_id or r.get("to_entity") == entity_id
  769. ]
  770. return rels
  771. # ==================== 批量操作 ====================
  772. def _record_disambiguation(self, chapter: int, uncertain_items: Any) -> List[str]:
  773. """
  774. 记录消歧反馈到 state.json,便于 Writer/Context Agent 感知风险。
  775. 约定:
  776. - >= extraction_confidence_medium:写入 disambiguation_warnings(采用但警告)
  777. - < extraction_confidence_medium:写入 disambiguation_pending(需人工确认)
  778. """
  779. if not isinstance(uncertain_items, list) or not uncertain_items:
  780. return []
  781. warnings: List[str] = []
  782. now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  783. for item in uncertain_items:
  784. if not isinstance(item, dict):
  785. continue
  786. mention = str(item.get("mention", "") or "").strip()
  787. if not mention:
  788. continue
  789. raw_conf = item.get("confidence", 0.0)
  790. try:
  791. confidence = float(raw_conf)
  792. except (TypeError, ValueError):
  793. confidence = 0.0
  794. # 候选:支持 [{"type","id"}...] 或 ["id1","id2"] 两种形式
  795. candidates_raw = item.get("candidates", [])
  796. candidates: List[Dict[str, str]] = []
  797. if isinstance(candidates_raw, list):
  798. for c in candidates_raw:
  799. if isinstance(c, dict):
  800. cid = str(c.get("id", "") or "").strip()
  801. ctype = str(c.get("type", "") or "").strip()
  802. entry: Dict[str, str] = {}
  803. if ctype:
  804. entry["type"] = ctype
  805. if cid:
  806. entry["id"] = cid
  807. if entry:
  808. candidates.append(entry)
  809. else:
  810. cid = str(c).strip()
  811. if cid:
  812. candidates.append({"id": cid})
  813. entity_type = str(item.get("type", "") or "").strip()
  814. suggested_id = str(item.get("suggested", "") or "").strip()
  815. adopted_raw = item.get("adopted", None)
  816. chosen_id = ""
  817. if isinstance(adopted_raw, str):
  818. chosen_id = adopted_raw.strip()
  819. elif adopted_raw is True:
  820. chosen_id = suggested_id
  821. else:
  822. # 兼容字段名:entity_id / chosen_id
  823. chosen_id = str(item.get("entity_id") or item.get("chosen_id") or "").strip() or suggested_id
  824. context = str(item.get("context", "") or "").strip()
  825. note = str(item.get("warning", "") or "").strip()
  826. record: Dict[str, Any] = {
  827. "chapter": int(chapter),
  828. "mention": mention,
  829. "type": entity_type,
  830. "suggested_id": suggested_id,
  831. "chosen_id": chosen_id,
  832. "confidence": confidence,
  833. "candidates": candidates,
  834. "context": context,
  835. "note": note,
  836. "created_at": now,
  837. }
  838. if confidence >= float(self.config.extraction_confidence_medium):
  839. self._state.setdefault("disambiguation_warnings", []).append(record)
  840. self._pending_disambiguation_warnings.append(record)
  841. chosen_part = f" → {chosen_id}" if chosen_id else ""
  842. warnings.append(f"消歧警告: {mention}{chosen_part} (confidence: {confidence:.2f})")
  843. else:
  844. self._state.setdefault("disambiguation_pending", []).append(record)
  845. self._pending_disambiguation_pending.append(record)
  846. warnings.append(f"消歧需人工确认: {mention} (confidence: {confidence:.2f})")
  847. return warnings
  848. def process_chapter_result(self, chapter: int, result: Dict) -> List[str]:
  849. """
  850. 处理 Data Agent 的章节处理结果(v5.1 引入,v5.4 沿用)
  851. 输入格式:
  852. - entities_appeared: 出场实体列表
  853. - entities_new: 新实体列表
  854. - state_changes: 状态变化列表
  855. - relationships_new: 新关系列表
  856. 返回警告列表
  857. """
  858. warnings = []
  859. # v5.1 引入: 记录章节号用于 SQLite 同步
  860. self._pending_sqlite_data["chapter"] = chapter
  861. # 处理出场实体
  862. for entity in result.get("entities_appeared", []):
  863. entity_id = entity.get("id")
  864. entity_type = entity.get("type")
  865. if entity_id:
  866. self.update_entity_appearance(entity_id, chapter, entity_type)
  867. # v5.1 引入: 缓存用于 SQLite 同步
  868. self._pending_sqlite_data["entities_appeared"].append(entity)
  869. # 处理新实体
  870. for entity in result.get("entities_new", []):
  871. entity_id = entity.get("suggested_id") or entity.get("id")
  872. if entity_id and entity_id != "NEW":
  873. new_entity = EntityState(
  874. id=entity_id,
  875. name=entity.get("name", ""),
  876. type=entity.get("type", "角色"),
  877. tier=entity.get("tier", "装饰"),
  878. aliases=entity.get("mentions", []),
  879. first_appearance=chapter,
  880. last_appearance=chapter
  881. )
  882. if not self.add_entity(new_entity):
  883. warnings.append(f"实体已存在: {entity_id}")
  884. # v5.1 引入: 缓存用于 SQLite 同步
  885. self._pending_sqlite_data["entities_new"].append(entity)
  886. # 处理状态变化
  887. for change in result.get("state_changes", []):
  888. self.record_state_change(
  889. entity_id=change.get("entity_id", ""),
  890. field=change.get("field", ""),
  891. old_value=change.get("old"),
  892. new_value=change.get("new"),
  893. reason=change.get("reason", ""),
  894. chapter=chapter
  895. )
  896. # v5.1 引入: 缓存用于 SQLite 同步
  897. self._pending_sqlite_data["state_changes"].append(change)
  898. # 处理关系
  899. for rel in result.get("relationships_new", []):
  900. self.add_relationship(
  901. from_entity=rel.get("from", ""),
  902. to_entity=rel.get("to", ""),
  903. rel_type=rel.get("type", ""),
  904. description=rel.get("description", ""),
  905. chapter=chapter
  906. )
  907. # v5.1 引入: 缓存用于 SQLite 同步
  908. self._pending_sqlite_data["relationships_new"].append(rel)
  909. # 处理消歧不确定项(不影响实体写入,但必须对 Writer 可见)
  910. warnings.extend(self._record_disambiguation(chapter, result.get("uncertain", [])))
  911. # 写入 chapter_meta(钩子/模式/结束状态)
  912. chapter_meta = result.get("chapter_meta")
  913. if isinstance(chapter_meta, dict):
  914. meta_key = f"{int(chapter):04d}"
  915. self._state.setdefault("chapter_meta", {})
  916. self._state["chapter_meta"][meta_key] = chapter_meta
  917. self._pending_chapter_meta[meta_key] = chapter_meta
  918. # 更新进度
  919. self.update_progress(chapter)
  920. # 同步主角状态(entities_v3 → protagonist_state)
  921. self.sync_protagonist_from_entity()
  922. return warnings
  923. # ==================== 导出 ====================
  924. def export_for_context(self) -> Dict:
  925. """导出用于上下文的精简版状态(v5.0 引入,v5.4 沿用)"""
  926. # 从 entities_v3 构建精简视图
  927. entities_flat = {}
  928. for type_name, entities in self._state.get("entities_v3", {}).items():
  929. for eid, e in entities.items():
  930. entities_flat[eid] = {
  931. "name": e.get("canonical_name", eid),
  932. "type": type_name,
  933. "tier": e.get("tier", "装饰"),
  934. "current": e.get("current", {})
  935. }
  936. return {
  937. "progress": self._state.get("progress", {}),
  938. "entities": entities_flat,
  939. # v5.1 引入: alias_index 已迁移到 index.db,这里返回空(兼容性)
  940. "alias_index": {},
  941. "recent_changes": [], # v5.1 引入: 从 index.db 查询
  942. "disambiguation": {
  943. "warnings": self._state.get("disambiguation_warnings", [])[-self.config.export_disambiguation_slice:],
  944. "pending": self._state.get("disambiguation_pending", [])[-self.config.export_disambiguation_slice:],
  945. },
  946. }
  947. # ==================== 主角同步 ====================
  948. def get_protagonist_entity_id(self) -> Optional[str]:
  949. """获取主角实体 ID(通过 is_protagonist 标记或 SQLite 查询)"""
  950. # 方式1: 通过 SQLStateManager 查询 (v5.1)
  951. if self._sql_state_manager:
  952. protagonist = self._sql_state_manager.get_protagonist()
  953. if protagonist:
  954. return protagonist.get("id")
  955. # 方式2: 通过 protagonist_state.name 查找别名
  956. protag_name = self._state.get("protagonist_state", {}).get("name")
  957. if protag_name and self._sql_state_manager:
  958. entities = self._sql_state_manager._index_manager.get_entities_by_alias(protag_name)
  959. for entry in entities:
  960. if entry.get("type") == "角色":
  961. return entry.get("id")
  962. return None
  963. def sync_protagonist_from_entity(self, entity_id: str = None):
  964. """
  965. 将主角实体的状态同步到 protagonist_state (v5.1: 从 SQLite 读取)
  966. 用于确保 consistency-checker 等依赖 protagonist_state 的组件获取最新数据
  967. """
  968. if entity_id is None:
  969. entity_id = self.get_protagonist_entity_id()
  970. if entity_id is None:
  971. return
  972. entity = self.get_entity(entity_id, "角色")
  973. if not entity:
  974. return
  975. current = entity.get("current")
  976. if not isinstance(current, dict):
  977. current = entity.get("current_json", {})
  978. if isinstance(current, str):
  979. try:
  980. current = json.loads(current) if current else {}
  981. except (json.JSONDecodeError, TypeError):
  982. current = {}
  983. if not isinstance(current, dict):
  984. current = {}
  985. protag = self._state.setdefault("protagonist_state", {})
  986. # 同步境界
  987. if "realm" in current:
  988. power = protag.setdefault("power", {})
  989. power["realm"] = current["realm"]
  990. if "layer" in current:
  991. power["layer"] = current["layer"]
  992. # 同步位置
  993. if "location" in current:
  994. loc = protag.setdefault("location", {})
  995. loc["current"] = current["location"]
  996. if "last_chapter" in current:
  997. loc["last_chapter"] = current["last_chapter"]
  998. def sync_protagonist_to_entity(self, entity_id: str = None):
  999. """
  1000. 将 protagonist_state 同步到 entities_v3 中的主角实体
  1001. 用于初始化或手动编辑 protagonist_state 后保持一致性
  1002. """
  1003. if entity_id is None:
  1004. entity_id = self.get_protagonist_entity_id()
  1005. if entity_id is None:
  1006. return
  1007. protag = self._state.get("protagonist_state", {})
  1008. if not protag:
  1009. return
  1010. updates = {}
  1011. # 同步境界
  1012. power = protag.get("power", {})
  1013. if power.get("realm"):
  1014. updates["realm"] = power["realm"]
  1015. if power.get("layer"):
  1016. updates["layer"] = power["layer"]
  1017. # 同步位置
  1018. loc = protag.get("location", {})
  1019. if loc.get("current"):
  1020. updates["location"] = loc["current"]
  1021. if updates:
  1022. self.update_entity(entity_id, updates, "角色")
  1023. # ==================== CLI 接口 ====================
  1024. def main():
  1025. import argparse
  1026. from pydantic import ValidationError
  1027. from .cli_output import print_success, print_error
  1028. from .schemas import validate_data_agent_output, format_validation_error, normalize_data_agent_output
  1029. from .index_manager import IndexManager
  1030. parser = argparse.ArgumentParser(description="State Manager CLI (v5.4)")
  1031. parser.add_argument("--project-root", type=str, help="项目根目录")
  1032. subparsers = parser.add_subparsers(dest="command")
  1033. # 读取进度
  1034. subparsers.add_parser("get-progress")
  1035. # 获取实体
  1036. get_entity_parser = subparsers.add_parser("get-entity")
  1037. get_entity_parser.add_argument("--id", required=True)
  1038. # 列出实体
  1039. list_parser = subparsers.add_parser("list-entities")
  1040. list_parser.add_argument("--type", help="按类型过滤")
  1041. list_parser.add_argument("--tier", help="按层级过滤")
  1042. # 处理章节结果
  1043. process_parser = subparsers.add_parser("process-chapter")
  1044. process_parser.add_argument("--chapter", type=int, required=True, help="章节号")
  1045. process_parser.add_argument("--data", required=True, help="JSON 格式的处理结果")
  1046. args = parser.parse_args()
  1047. # 初始化
  1048. config = None
  1049. if args.project_root:
  1050. from .config import DataModulesConfig
  1051. config = DataModulesConfig.from_project_root(args.project_root)
  1052. manager = StateManager(config)
  1053. logger = IndexManager(config)
  1054. tool_name = f"state_manager:{args.command or 'unknown'}"
  1055. def emit_success(data=None, message: str = "ok"):
  1056. print_success(data, message=message)
  1057. try:
  1058. logger.log_tool_call(tool_name, True)
  1059. except Exception:
  1060. pass
  1061. def emit_error(code: str, message: str, suggestion: str | None = None):
  1062. print_error(code, message, suggestion=suggestion)
  1063. try:
  1064. logger.log_tool_call(tool_name, False, error_code=code, error_message=message)
  1065. except Exception:
  1066. pass
  1067. if args.command == "get-progress":
  1068. emit_success(manager._state.get("progress", {}), message="progress")
  1069. elif args.command == "get-entity":
  1070. entity = manager.get_entity(args.id)
  1071. if entity:
  1072. emit_success(entity, message="entity")
  1073. else:
  1074. emit_error("NOT_FOUND", f"未找到实体: {args.id}")
  1075. elif args.command == "list-entities":
  1076. if args.type:
  1077. entities = manager.get_entities_by_type(args.type)
  1078. elif args.tier:
  1079. entities = manager.get_entities_by_tier(args.tier)
  1080. else:
  1081. entities = manager.get_all_entities()
  1082. payload = [{"id": eid, **e} for eid, e in entities.items()]
  1083. emit_success(payload, message="entities")
  1084. elif args.command == "process-chapter":
  1085. data = json.loads(args.data)
  1086. validated = None
  1087. last_exc = None
  1088. for _ in range(3):
  1089. try:
  1090. validated = validate_data_agent_output(data)
  1091. break
  1092. except ValidationError as exc:
  1093. last_exc = exc
  1094. data = normalize_data_agent_output(data)
  1095. if validated is None:
  1096. err = format_validation_error(last_exc) if last_exc else {
  1097. "code": "SCHEMA_VALIDATION_FAILED",
  1098. "message": "数据结构校验失败",
  1099. "details": {"errors": []},
  1100. "suggestion": "请检查 data-agent 输出字段是否完整且类型正确",
  1101. }
  1102. emit_error(err["code"], err["message"], suggestion=err.get("suggestion"))
  1103. return
  1104. warnings = manager.process_chapter_result(args.chapter, validated.model_dump(by_alias=True))
  1105. manager.save_state()
  1106. emit_success({"chapter": args.chapter, "warnings": warnings}, message="chapter_processed")
  1107. else:
  1108. emit_error("UNKNOWN_COMMAND", "未指定有效命令", suggestion="请查看 --help")
  1109. if __name__ == "__main__":
  1110. if sys.platform == "win32":
  1111. import io
  1112. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
  1113. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
  1114. main()