state_manager.py 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. State Manager - 状态管理模块
  5. 管理 state.json 的读写操作:
  6. - 实体状态管理
  7. - 进度追踪
  8. - 关系记录
  9. """
  10. import json
  11. from pathlib import Path
  12. from typing import Dict, List, Optional, Any
  13. from dataclasses import dataclass, field, asdict
  14. from datetime import datetime
  15. import filelock
  16. from .config import get_config
  17. try:
  18. # 当 scripts 目录在 sys.path 中(常见:从 scripts/ 运行)
  19. from security_utils import atomic_write_json, read_json_safe
  20. except ImportError: # pragma: no cover
  21. # 当以 `python -m scripts.data_modules...` 从仓库根目录运行
  22. from scripts.security_utils import atomic_write_json, read_json_safe
  23. @dataclass
  24. class EntityState:
  25. """实体状态"""
  26. id: str
  27. name: str
  28. type: str # 角色/地点/物品/势力
  29. tier: str = "装饰" # 核心/支线/装饰
  30. aliases: List[str] = field(default_factory=list)
  31. attributes: Dict[str, Any] = field(default_factory=dict)
  32. first_appearance: int = 0
  33. last_appearance: int = 0
  34. @dataclass
  35. class Relationship:
  36. """实体关系"""
  37. from_entity: str
  38. to_entity: str
  39. type: str
  40. description: str
  41. chapter: int
  42. @dataclass
  43. class StateChange:
  44. """状态变化记录"""
  45. entity_id: str
  46. field: str
  47. old_value: Any
  48. new_value: Any
  49. reason: str
  50. chapter: int
  51. timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
  52. @dataclass
  53. class _EntityPatch:
  54. """待写入的实体增量补丁(用于锁内合并)"""
  55. entity_type: str
  56. entity_id: str
  57. replace: bool = False
  58. base_entity: Optional[Dict[str, Any]] = None # 新建实体时的完整快照(用于填充缺失字段)
  59. top_updates: Dict[str, Any] = field(default_factory=dict)
  60. current_updates: Dict[str, Any] = field(default_factory=dict)
  61. appearance_chapter: Optional[int] = None
  62. class StateManager:
  63. """状态管理器 (v5.0 entities_v3 格式)"""
  64. # v5.0 支持的实体类型
  65. ENTITY_TYPES = ["角色", "地点", "物品", "势力", "招式"]
  66. def __init__(self, config=None):
  67. self.config = config or get_config()
  68. self._state: Dict[str, Any] = {}
  69. # 与 security_utils.atomic_write_json 保持一致:state.json.lock
  70. self._lock_path = self.config.state_file.with_suffix(self.config.state_file.suffix + ".lock")
  71. # 待写入的增量(锁内重读 + 合并 + 写入)
  72. self._pending_entity_patches: Dict[tuple[str, str], _EntityPatch] = {}
  73. self._pending_alias_entries: Dict[str, List[Dict[str, str]]] = {}
  74. self._pending_state_changes: List[Dict[str, Any]] = []
  75. self._pending_structured_relationships: List[Dict[str, Any]] = []
  76. self._pending_disambiguation_warnings: List[Dict[str, Any]] = []
  77. self._pending_disambiguation_pending: List[Dict[str, Any]] = []
  78. self._pending_progress_chapter: Optional[int] = None
  79. self._pending_progress_words_delta: int = 0
  80. self._load_state()
  81. def _now_progress_timestamp(self) -> str:
  82. return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  83. def _ensure_state_schema(self, state: Dict[str, Any]) -> Dict[str, Any]:
  84. """确保 state.json 具备运行所需的关键字段(尽量不破坏既有数据)。"""
  85. if not isinstance(state, dict):
  86. state = {}
  87. state.setdefault("project_info", {})
  88. state.setdefault("progress", {})
  89. state.setdefault("protagonist_state", {})
  90. # relationships: 旧版本可能是 list(实体关系),v5.0 运行态用 dict(人物关系/重要关系)
  91. relationships = state.get("relationships")
  92. if isinstance(relationships, list):
  93. state.setdefault("structured_relationships", [])
  94. if isinstance(state.get("structured_relationships"), list):
  95. state["structured_relationships"].extend(relationships)
  96. state["relationships"] = {}
  97. elif not isinstance(relationships, dict):
  98. state["relationships"] = {}
  99. state.setdefault("world_settings", {"power_system": [], "factions": [], "locations": []})
  100. state.setdefault("plot_threads", {"active_threads": [], "foreshadowing": []})
  101. state.setdefault("review_checkpoints", [])
  102. state.setdefault(
  103. "strand_tracker",
  104. {
  105. "last_quest_chapter": 0,
  106. "last_fire_chapter": 0,
  107. "last_constellation_chapter": 0,
  108. "current_dominant": "quest",
  109. "chapters_since_switch": 0,
  110. "history": [],
  111. },
  112. )
  113. entities_v3 = state.get("entities_v3")
  114. if not isinstance(entities_v3, dict):
  115. entities_v3 = {}
  116. state["entities_v3"] = entities_v3
  117. for t in self.ENTITY_TYPES:
  118. if not isinstance(entities_v3.get(t), dict):
  119. entities_v3[t] = {}
  120. if not isinstance(state.get("alias_index"), dict):
  121. state["alias_index"] = {}
  122. if not isinstance(state.get("state_changes"), list):
  123. state["state_changes"] = []
  124. if not isinstance(state.get("structured_relationships"), list):
  125. state["structured_relationships"] = []
  126. if not isinstance(state.get("disambiguation_warnings"), list):
  127. state["disambiguation_warnings"] = []
  128. if not isinstance(state.get("disambiguation_pending"), list):
  129. state["disambiguation_pending"] = []
  130. # progress 基础字段
  131. progress = state["progress"]
  132. if not isinstance(progress, dict):
  133. progress = {}
  134. state["progress"] = progress
  135. progress.setdefault("current_chapter", 0)
  136. progress.setdefault("total_words", 0)
  137. progress.setdefault("last_updated", self._now_progress_timestamp())
  138. return state
  139. def _load_state(self):
  140. """加载状态文件"""
  141. if self.config.state_file.exists():
  142. self._state = read_json_safe(self.config.state_file, default={})
  143. self._state = self._ensure_state_schema(self._state)
  144. else:
  145. self._state = self._ensure_state_schema({})
  146. def save_state(self):
  147. """
  148. 保存状态文件(锁内重读 + 合并 + 原子写入)。
  149. 解决多 Agent 并行下的“读-改-写覆盖”风险:
  150. - 获取锁
  151. - 重新读取磁盘最新 state.json
  152. - 仅合并本实例产生的增量(pending_*)
  153. - 原子化写入
  154. """
  155. # 无增量时不写入,避免无意义覆盖
  156. has_pending = any(
  157. [
  158. self._pending_entity_patches,
  159. self._pending_alias_entries,
  160. self._pending_state_changes,
  161. self._pending_structured_relationships,
  162. self._pending_disambiguation_warnings,
  163. self._pending_disambiguation_pending,
  164. self._pending_progress_chapter is not None,
  165. self._pending_progress_words_delta != 0,
  166. ]
  167. )
  168. if not has_pending:
  169. return
  170. self.config.ensure_dirs()
  171. lock = filelock.FileLock(str(self._lock_path), timeout=10)
  172. try:
  173. with lock:
  174. disk_state = read_json_safe(self.config.state_file, default={})
  175. disk_state = self._ensure_state_schema(disk_state)
  176. # progress(合并为 max(chapter) + words_delta 累加)
  177. if self._pending_progress_chapter is not None or self._pending_progress_words_delta != 0:
  178. progress = disk_state.get("progress", {})
  179. if not isinstance(progress, dict):
  180. progress = {}
  181. disk_state["progress"] = progress
  182. try:
  183. current_chapter = int(progress.get("current_chapter", 0) or 0)
  184. except (TypeError, ValueError):
  185. current_chapter = 0
  186. if self._pending_progress_chapter is not None:
  187. progress["current_chapter"] = max(current_chapter, int(self._pending_progress_chapter))
  188. if self._pending_progress_words_delta:
  189. try:
  190. total_words = int(progress.get("total_words", 0) or 0)
  191. except (TypeError, ValueError):
  192. total_words = 0
  193. progress["total_words"] = total_words + int(self._pending_progress_words_delta)
  194. progress["last_updated"] = self._now_progress_timestamp()
  195. # entities_v3(按补丁应用)
  196. entities_v3 = disk_state.get("entities_v3", {})
  197. if not isinstance(entities_v3, dict):
  198. entities_v3 = {}
  199. disk_state["entities_v3"] = entities_v3
  200. for t in self.ENTITY_TYPES:
  201. if not isinstance(entities_v3.get(t), dict):
  202. entities_v3[t] = {}
  203. for (entity_type, entity_id), patch in self._pending_entity_patches.items():
  204. bucket = entities_v3.setdefault(entity_type, {})
  205. if not isinstance(bucket, dict):
  206. bucket = {}
  207. entities_v3[entity_type] = bucket
  208. entity = bucket.get(entity_id)
  209. if not isinstance(entity, dict):
  210. entity = {}
  211. bucket[entity_id] = entity
  212. # 新建实体时:只填充缺失字段,避免覆盖并发写入的更完整信息
  213. if patch.base_entity:
  214. for k, v in patch.base_entity.items():
  215. if k not in entity:
  216. entity[k] = v
  217. elif isinstance(entity.get(k), dict) and isinstance(v, dict):
  218. # 递归填充缺失
  219. for kk, vv in v.items():
  220. if kk not in entity[k]:
  221. entity[k][kk] = vv
  222. # top-level updates(明确写入)
  223. for k, v in patch.top_updates.items():
  224. entity[k] = v
  225. # current updates(明确写入)
  226. if patch.current_updates:
  227. current = entity.get("current")
  228. if not isinstance(current, dict):
  229. current = {}
  230. entity["current"] = current
  231. current.update(patch.current_updates)
  232. # appearance updates(first=min(non-zero), last=max)
  233. if patch.appearance_chapter is not None:
  234. chapter = int(patch.appearance_chapter)
  235. try:
  236. first = int(entity.get("first_appearance", 0) or 0)
  237. except (TypeError, ValueError):
  238. first = 0
  239. try:
  240. last = int(entity.get("last_appearance", 0) or 0)
  241. except (TypeError, ValueError):
  242. last = 0
  243. if first <= 0:
  244. entity["first_appearance"] = chapter
  245. else:
  246. entity["first_appearance"] = min(first, chapter)
  247. entity["last_appearance"] = max(last, chapter)
  248. # alias_index(一对多:合并去重)
  249. alias_index = disk_state.get("alias_index", {})
  250. if not isinstance(alias_index, dict):
  251. alias_index = {}
  252. disk_state["alias_index"] = alias_index
  253. for alias, entries in self._pending_alias_entries.items():
  254. if not alias:
  255. continue
  256. existing = alias_index.get(alias)
  257. if not isinstance(existing, list):
  258. existing = []
  259. alias_index[alias] = existing
  260. for entry in entries:
  261. et = entry.get("type")
  262. eid = entry.get("id")
  263. if not et or not eid:
  264. continue
  265. if any(e.get("type") == et and e.get("id") == eid for e in existing if isinstance(e, dict)):
  266. continue
  267. existing.append({"type": et, "id": eid})
  268. # state_changes(追加)
  269. if self._pending_state_changes:
  270. changes = disk_state.get("state_changes")
  271. if not isinstance(changes, list):
  272. changes = []
  273. disk_state["state_changes"] = changes
  274. changes.extend(self._pending_state_changes)
  275. # structured_relationships(追加去重)
  276. if self._pending_structured_relationships:
  277. rels = disk_state.get("structured_relationships")
  278. if not isinstance(rels, list):
  279. rels = []
  280. disk_state["structured_relationships"] = rels
  281. def _rel_key(r: Dict[str, Any]) -> tuple:
  282. return (
  283. r.get("from_entity"),
  284. r.get("to_entity"),
  285. r.get("type"),
  286. r.get("description"),
  287. r.get("chapter"),
  288. )
  289. existing_keys = {_rel_key(r) for r in rels if isinstance(r, dict)}
  290. for r in self._pending_structured_relationships:
  291. if not isinstance(r, dict):
  292. continue
  293. k = _rel_key(r)
  294. if k in existing_keys:
  295. continue
  296. rels.append(r)
  297. existing_keys.add(k)
  298. # disambiguation_warnings(追加去重 + 截断)
  299. if self._pending_disambiguation_warnings:
  300. warnings_list = disk_state.get("disambiguation_warnings")
  301. if not isinstance(warnings_list, list):
  302. warnings_list = []
  303. disk_state["disambiguation_warnings"] = warnings_list
  304. def _warn_key(w: Dict[str, Any]) -> tuple:
  305. return (
  306. w.get("chapter"),
  307. w.get("mention"),
  308. w.get("chosen_id"),
  309. w.get("confidence"),
  310. )
  311. existing_keys = {_warn_key(w) for w in warnings_list if isinstance(w, dict)}
  312. for w in self._pending_disambiguation_warnings:
  313. if not isinstance(w, dict):
  314. continue
  315. k = _warn_key(w)
  316. if k in existing_keys:
  317. continue
  318. warnings_list.append(w)
  319. existing_keys.add(k)
  320. # 只保留最近 N 条,避免文件无限增长
  321. max_keep = self.config.max_disambiguation_warnings
  322. if len(warnings_list) > max_keep:
  323. disk_state["disambiguation_warnings"] = warnings_list[-max_keep:]
  324. # disambiguation_pending(追加去重 + 截断)
  325. if self._pending_disambiguation_pending:
  326. pending_list = disk_state.get("disambiguation_pending")
  327. if not isinstance(pending_list, list):
  328. pending_list = []
  329. disk_state["disambiguation_pending"] = pending_list
  330. def _pending_key(w: Dict[str, Any]) -> tuple:
  331. return (
  332. w.get("chapter"),
  333. w.get("mention"),
  334. w.get("suggested_id"),
  335. w.get("confidence"),
  336. )
  337. existing_keys = {_pending_key(w) for w in pending_list if isinstance(w, dict)}
  338. for w in self._pending_disambiguation_pending:
  339. if not isinstance(w, dict):
  340. continue
  341. k = _pending_key(w)
  342. if k in existing_keys:
  343. continue
  344. pending_list.append(w)
  345. existing_keys.add(k)
  346. max_keep = self.config.max_disambiguation_pending
  347. if len(pending_list) > max_keep:
  348. disk_state["disambiguation_pending"] = pending_list[-max_keep:]
  349. # 原子写入(锁已持有,不再二次加锁)
  350. atomic_write_json(self.config.state_file, disk_state, use_lock=False, backup=True)
  351. # 同步内存为磁盘最新快照,并清空增量队列
  352. self._state = disk_state
  353. self._pending_entity_patches.clear()
  354. self._pending_alias_entries.clear()
  355. self._pending_state_changes.clear()
  356. self._pending_structured_relationships.clear()
  357. self._pending_disambiguation_warnings.clear()
  358. self._pending_disambiguation_pending.clear()
  359. self._pending_progress_chapter = None
  360. self._pending_progress_words_delta = 0
  361. except filelock.Timeout:
  362. raise RuntimeError("无法获取 state.json 文件锁,请稍后重试")
  363. # ==================== 进度管理 ====================
  364. def get_current_chapter(self) -> int:
  365. """获取当前章节号"""
  366. return self._state.get("progress", {}).get("current_chapter", 0)
  367. def update_progress(self, chapter: int, words: int = 0):
  368. """更新进度"""
  369. if "progress" not in self._state:
  370. self._state["progress"] = {}
  371. self._state["progress"]["current_chapter"] = chapter
  372. if words > 0:
  373. total = self._state["progress"].get("total_words", 0)
  374. self._state["progress"]["total_words"] = total + words
  375. # 记录增量:锁内合并时用 max(chapter) + words_delta 累加
  376. if self._pending_progress_chapter is None:
  377. self._pending_progress_chapter = chapter
  378. else:
  379. self._pending_progress_chapter = max(self._pending_progress_chapter, chapter)
  380. if words > 0:
  381. self._pending_progress_words_delta += int(words)
  382. # ==================== 实体管理 (v5.0 entities_v3) ====================
  383. def get_entity(self, entity_id: str, entity_type: str = None) -> Optional[Dict]:
  384. """获取实体 (v5.0 entities_v3 格式)"""
  385. entities_v3 = self._state.get("entities_v3", {})
  386. if entity_type:
  387. return entities_v3.get(entity_type, {}).get(entity_id)
  388. # 遍历所有类型查找
  389. for type_name, entities in entities_v3.items():
  390. if entity_id in entities:
  391. return entities[entity_id]
  392. return None
  393. def get_entity_type(self, entity_id: str) -> Optional[str]:
  394. """获取实体所属类型"""
  395. for type_name, entities in self._state.get("entities_v3", {}).items():
  396. if entity_id in entities:
  397. return type_name
  398. return None
  399. def get_all_entities(self) -> Dict[str, Dict]:
  400. """获取所有实体(扁平化视图,兼容旧代码)"""
  401. result = {}
  402. for type_name, entities in self._state.get("entities_v3", {}).items():
  403. for eid, e in entities.items():
  404. result[eid] = {**e, "type": type_name}
  405. return result
  406. def get_entities_by_type(self, entity_type: str) -> Dict[str, Dict]:
  407. """按类型获取实体"""
  408. return self._state.get("entities_v3", {}).get(entity_type, {})
  409. def get_entities_by_tier(self, tier: str) -> Dict[str, Dict]:
  410. """按层级获取实体"""
  411. result = {}
  412. for type_name, entities in self._state.get("entities_v3", {}).items():
  413. for eid, e in entities.items():
  414. if e.get("tier") == tier:
  415. result[eid] = {**e, "type": type_name}
  416. return result
  417. def add_entity(self, entity: EntityState) -> bool:
  418. """添加新实体 (v5.0 entities_v3 格式)"""
  419. entity_type = entity.type
  420. if entity_type not in self.ENTITY_TYPES:
  421. entity_type = "角色"
  422. if "entities_v3" not in self._state:
  423. self._state["entities_v3"] = {t: {} for t in self.ENTITY_TYPES}
  424. if entity_type not in self._state["entities_v3"]:
  425. self._state["entities_v3"][entity_type] = {}
  426. # 检查是否已存在
  427. if entity.id in self._state["entities_v3"][entity_type]:
  428. return False
  429. # 转换为 v3 格式
  430. v3_entity = {
  431. "canonical_name": entity.name,
  432. "tier": entity.tier,
  433. "desc": "",
  434. "current": entity.attributes,
  435. "first_appearance": entity.first_appearance,
  436. "last_appearance": entity.last_appearance,
  437. "history": []
  438. }
  439. self._state["entities_v3"][entity_type][entity.id] = v3_entity
  440. # 记录实体补丁(新建:仅填充缺失字段,避免覆盖并发写入)
  441. patch = self._pending_entity_patches.get((entity_type, entity.id))
  442. if patch is None:
  443. patch = _EntityPatch(entity_type=entity_type, entity_id=entity.id)
  444. self._pending_entity_patches[(entity_type, entity.id)] = patch
  445. patch.replace = True
  446. patch.base_entity = v3_entity
  447. # 注册别名到 alias_index
  448. self._register_alias_internal(entity.id, entity_type, entity.name)
  449. for alias in entity.aliases:
  450. self._register_alias_internal(entity.id, entity_type, alias)
  451. return True
  452. def _register_alias_internal(self, entity_id: str, entity_type: str, alias: str):
  453. """内部方法:注册别名到 state.json 的 alias_index"""
  454. if not alias:
  455. return
  456. if "alias_index" not in self._state:
  457. self._state["alias_index"] = {}
  458. if alias not in self._state["alias_index"]:
  459. self._state["alias_index"][alias] = []
  460. # 检查是否已存在
  461. exists = any(
  462. e.get("type") == entity_type and e.get("id") == entity_id
  463. for e in self._state["alias_index"][alias]
  464. )
  465. if not exists:
  466. self._state["alias_index"][alias].append({
  467. "type": entity_type,
  468. "id": entity_id
  469. })
  470. # 记录待合并增量:避免锁外读-改-写覆盖
  471. pending = self._pending_alias_entries.setdefault(alias, [])
  472. if not any(e.get("type") == entity_type and e.get("id") == entity_id for e in pending):
  473. pending.append({"type": entity_type, "id": entity_id})
  474. def update_entity(self, entity_id: str, updates: Dict[str, Any], entity_type: str = None) -> bool:
  475. """更新实体属性 (v5.0)"""
  476. # 查找实体
  477. if entity_type:
  478. if entity_id not in self._state.get("entities_v3", {}).get(entity_type, {}):
  479. return False
  480. entity = self._state["entities_v3"][entity_type][entity_id]
  481. else:
  482. entity_type = self.get_entity_type(entity_id)
  483. if not entity_type:
  484. return False
  485. entity = self._state["entities_v3"][entity_type][entity_id]
  486. for key, value in updates.items():
  487. if key == "attributes" and isinstance(value, dict):
  488. # v5.0: attributes 存在 current 字段
  489. if "current" not in entity:
  490. entity["current"] = {}
  491. entity["current"].update(value)
  492. # 记录补丁(current 增量)
  493. patch = self._pending_entity_patches.get((entity_type, entity_id))
  494. if patch is None:
  495. patch = _EntityPatch(entity_type=entity_type, entity_id=entity_id)
  496. self._pending_entity_patches[(entity_type, entity_id)] = patch
  497. patch.current_updates.update(value)
  498. elif key == "current" and isinstance(value, dict):
  499. if "current" not in entity:
  500. entity["current"] = {}
  501. entity["current"].update(value)
  502. patch = self._pending_entity_patches.get((entity_type, entity_id))
  503. if patch is None:
  504. patch = _EntityPatch(entity_type=entity_type, entity_id=entity_id)
  505. self._pending_entity_patches[(entity_type, entity_id)] = patch
  506. patch.current_updates.update(value)
  507. else:
  508. entity[key] = value
  509. patch = self._pending_entity_patches.get((entity_type, entity_id))
  510. if patch is None:
  511. patch = _EntityPatch(entity_type=entity_type, entity_id=entity_id)
  512. self._pending_entity_patches[(entity_type, entity_id)] = patch
  513. patch.top_updates[key] = value
  514. return True
  515. def update_entity_appearance(self, entity_id: str, chapter: int, entity_type: str = None):
  516. """更新实体出场章节"""
  517. if not entity_type:
  518. entity_type = self.get_entity_type(entity_id)
  519. if not entity_type:
  520. return
  521. entity = self._state["entities_v3"][entity_type].get(entity_id)
  522. if entity:
  523. if entity.get("first_appearance", 0) == 0:
  524. entity["first_appearance"] = chapter
  525. entity["last_appearance"] = chapter
  526. # 记录补丁:锁内应用 first=min(non-zero), last=max
  527. patch = self._pending_entity_patches.get((entity_type, entity_id))
  528. if patch is None:
  529. patch = _EntityPatch(entity_type=entity_type, entity_id=entity_id)
  530. self._pending_entity_patches[(entity_type, entity_id)] = patch
  531. if patch.appearance_chapter is None:
  532. patch.appearance_chapter = chapter
  533. else:
  534. patch.appearance_chapter = max(int(patch.appearance_chapter), int(chapter))
  535. # ==================== 状态变化记录 ====================
  536. def record_state_change(
  537. self,
  538. entity_id: str,
  539. field: str,
  540. old_value: Any,
  541. new_value: Any,
  542. reason: str,
  543. chapter: int
  544. ):
  545. """记录状态变化"""
  546. if "state_changes" not in self._state:
  547. self._state["state_changes"] = []
  548. change = StateChange(
  549. entity_id=entity_id,
  550. field=field,
  551. old_value=old_value,
  552. new_value=new_value,
  553. reason=reason,
  554. chapter=chapter
  555. )
  556. change_dict = asdict(change)
  557. self._state["state_changes"].append(change_dict)
  558. self._pending_state_changes.append(change_dict)
  559. # 同时更新实体属性
  560. self.update_entity(entity_id, {"attributes": {field: new_value}})
  561. def get_state_changes(self, entity_id: Optional[str] = None) -> List[Dict]:
  562. """获取状态变化历史"""
  563. changes = self._state.get("state_changes", [])
  564. if entity_id:
  565. changes = [c for c in changes if c.get("entity_id") == entity_id]
  566. return changes
  567. # ==================== 关系管理 ====================
  568. def add_relationship(
  569. self,
  570. from_entity: str,
  571. to_entity: str,
  572. rel_type: str,
  573. description: str,
  574. chapter: int
  575. ):
  576. """添加关系"""
  577. rel = Relationship(
  578. from_entity=from_entity,
  579. to_entity=to_entity,
  580. type=rel_type,
  581. description=description,
  582. chapter=chapter
  583. )
  584. # v5.0: 实体关系存入 structured_relationships,避免与 relationships(人物关系字典) 冲突
  585. if "structured_relationships" not in self._state:
  586. self._state["structured_relationships"] = []
  587. rel_dict = asdict(rel)
  588. self._state["structured_relationships"].append(rel_dict)
  589. self._pending_structured_relationships.append(rel_dict)
  590. def get_relationships(self, entity_id: Optional[str] = None) -> List[Dict]:
  591. """获取关系列表"""
  592. rels = self._state.get("structured_relationships", [])
  593. if entity_id:
  594. rels = [
  595. r for r in rels
  596. if r.get("from_entity") == entity_id or r.get("to_entity") == entity_id
  597. ]
  598. return rels
  599. # ==================== 批量操作 ====================
  600. def _record_disambiguation(self, chapter: int, uncertain_items: Any) -> List[str]:
  601. """
  602. 记录消歧反馈到 state.json,便于 Writer/Context Agent 感知风险。
  603. 约定:
  604. - >= extraction_confidence_medium:写入 disambiguation_warnings(采用但警告)
  605. - < extraction_confidence_medium:写入 disambiguation_pending(需人工确认)
  606. """
  607. if not isinstance(uncertain_items, list) or not uncertain_items:
  608. return []
  609. warnings: List[str] = []
  610. now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  611. for item in uncertain_items:
  612. if not isinstance(item, dict):
  613. continue
  614. mention = str(item.get("mention", "") or "").strip()
  615. if not mention:
  616. continue
  617. raw_conf = item.get("confidence", 0.0)
  618. try:
  619. confidence = float(raw_conf)
  620. except (TypeError, ValueError):
  621. confidence = 0.0
  622. # 候选:支持 [{"type","id"}...] 或 ["id1","id2"] 两种形式
  623. candidates_raw = item.get("candidates", [])
  624. candidates: List[Dict[str, str]] = []
  625. if isinstance(candidates_raw, list):
  626. for c in candidates_raw:
  627. if isinstance(c, dict):
  628. cid = str(c.get("id", "") or "").strip()
  629. ctype = str(c.get("type", "") or "").strip()
  630. entry: Dict[str, str] = {}
  631. if ctype:
  632. entry["type"] = ctype
  633. if cid:
  634. entry["id"] = cid
  635. if entry:
  636. candidates.append(entry)
  637. else:
  638. cid = str(c).strip()
  639. if cid:
  640. candidates.append({"id": cid})
  641. entity_type = str(item.get("type", "") or "").strip()
  642. suggested_id = str(item.get("suggested", "") or "").strip()
  643. adopted_raw = item.get("adopted", None)
  644. chosen_id = ""
  645. if isinstance(adopted_raw, str):
  646. chosen_id = adopted_raw.strip()
  647. elif adopted_raw is True:
  648. chosen_id = suggested_id
  649. else:
  650. # 兼容字段名:entity_id / chosen_id
  651. chosen_id = str(item.get("entity_id") or item.get("chosen_id") or "").strip() or suggested_id
  652. context = str(item.get("context", "") or "").strip()
  653. note = str(item.get("warning", "") or "").strip()
  654. record: Dict[str, Any] = {
  655. "chapter": int(chapter),
  656. "mention": mention,
  657. "type": entity_type,
  658. "suggested_id": suggested_id,
  659. "chosen_id": chosen_id,
  660. "confidence": confidence,
  661. "candidates": candidates,
  662. "context": context,
  663. "note": note,
  664. "created_at": now,
  665. }
  666. if confidence >= float(self.config.extraction_confidence_medium):
  667. self._state.setdefault("disambiguation_warnings", []).append(record)
  668. self._pending_disambiguation_warnings.append(record)
  669. chosen_part = f" → {chosen_id}" if chosen_id else ""
  670. warnings.append(f"消歧警告: {mention}{chosen_part} (confidence: {confidence:.2f})")
  671. else:
  672. self._state.setdefault("disambiguation_pending", []).append(record)
  673. self._pending_disambiguation_pending.append(record)
  674. warnings.append(f"消歧需人工确认: {mention} (confidence: {confidence:.2f})")
  675. return warnings
  676. def process_chapter_result(self, chapter: int, result: Dict) -> List[str]:
  677. """
  678. 处理 Data Agent 的章节处理结果 (v5.0)
  679. 输入格式:
  680. - entities_appeared: 出场实体列表
  681. - entities_new: 新实体列表
  682. - state_changes: 状态变化列表
  683. - relationships_new: 新关系列表
  684. 返回警告列表
  685. """
  686. warnings = []
  687. # 处理出场实体
  688. for entity in result.get("entities_appeared", []):
  689. entity_id = entity.get("id")
  690. entity_type = entity.get("type")
  691. if entity_id:
  692. self.update_entity_appearance(entity_id, chapter, entity_type)
  693. # 处理新实体
  694. for entity in result.get("entities_new", []):
  695. entity_id = entity.get("suggested_id") or entity.get("id")
  696. if entity_id and entity_id != "NEW":
  697. new_entity = EntityState(
  698. id=entity_id,
  699. name=entity.get("name", ""),
  700. type=entity.get("type", "角色"),
  701. tier=entity.get("tier", "装饰"),
  702. aliases=entity.get("mentions", []),
  703. first_appearance=chapter,
  704. last_appearance=chapter
  705. )
  706. if not self.add_entity(new_entity):
  707. warnings.append(f"实体已存在: {entity_id}")
  708. # 处理状态变化
  709. for change in result.get("state_changes", []):
  710. self.record_state_change(
  711. entity_id=change.get("entity_id", ""),
  712. field=change.get("field", ""),
  713. old_value=change.get("old"),
  714. new_value=change.get("new"),
  715. reason=change.get("reason", ""),
  716. chapter=chapter
  717. )
  718. # 处理关系
  719. for rel in result.get("relationships_new", []):
  720. self.add_relationship(
  721. from_entity=rel.get("from", ""),
  722. to_entity=rel.get("to", ""),
  723. rel_type=rel.get("type", ""),
  724. description=rel.get("description", ""),
  725. chapter=chapter
  726. )
  727. # 处理消歧不确定项(不影响实体写入,但必须对 Writer 可见)
  728. warnings.extend(self._record_disambiguation(chapter, result.get("uncertain", [])))
  729. # 更新进度
  730. self.update_progress(chapter)
  731. # 同步主角状态(entities_v3 → protagonist_state)
  732. self.sync_protagonist_from_entity()
  733. return warnings
  734. # ==================== 导出 ====================
  735. def export_for_context(self) -> Dict:
  736. """导出用于上下文的精简版状态 (v5.0)"""
  737. # 从 entities_v3 构建精简视图
  738. entities_flat = {}
  739. for type_name, entities in self._state.get("entities_v3", {}).items():
  740. for eid, e in entities.items():
  741. entities_flat[eid] = {
  742. "name": e.get("canonical_name", eid),
  743. "type": type_name,
  744. "tier": e.get("tier", "装饰"),
  745. "current": e.get("current", {})
  746. }
  747. return {
  748. "progress": self._state.get("progress", {}),
  749. "entities": entities_flat,
  750. "alias_index": self._state.get("alias_index", {}),
  751. "recent_changes": self._state.get("state_changes", [])[-self.config.export_recent_changes_slice:],
  752. "disambiguation": {
  753. "warnings": self._state.get("disambiguation_warnings", [])[-self.config.export_disambiguation_slice:],
  754. "pending": self._state.get("disambiguation_pending", [])[-self.config.export_disambiguation_slice:],
  755. },
  756. }
  757. # ==================== 主角同步 ====================
  758. def get_protagonist_entity_id(self) -> Optional[str]:
  759. """获取主角实体 ID(通过 is_protagonist 标记或 protagonist_state.name 查找)"""
  760. # 方式1: 查找 is_protagonist 标记
  761. for eid, e in self._state.get("entities_v3", {}).get("角色", {}).items():
  762. if e.get("is_protagonist"):
  763. return eid
  764. # 方式2: 通过 protagonist_state.name 查找
  765. protag_name = self._state.get("protagonist_state", {}).get("name")
  766. if protag_name:
  767. alias_entries = self._state.get("alias_index", {}).get(protag_name, [])
  768. for entry in alias_entries:
  769. if entry.get("type") == "角色":
  770. return entry.get("id")
  771. return None
  772. def sync_protagonist_from_entity(self, entity_id: str = None):
  773. """
  774. 将 entities_v3 中主角实体的状态同步到 protagonist_state
  775. 用于确保 consistency-checker 等依赖 protagonist_state 的组件获取最新数据
  776. """
  777. if entity_id is None:
  778. entity_id = self.get_protagonist_entity_id()
  779. if entity_id is None:
  780. return
  781. entity = self.get_entity(entity_id, "角色")
  782. if not entity:
  783. return
  784. current = entity.get("current", {})
  785. protag = self._state.setdefault("protagonist_state", {})
  786. # 同步境界
  787. if "realm" in current:
  788. power = protag.setdefault("power", {})
  789. power["realm"] = current["realm"]
  790. if "layer" in current:
  791. power["layer"] = current["layer"]
  792. # 同步位置
  793. if "location" in current:
  794. loc = protag.setdefault("location", {})
  795. loc["current"] = current["location"]
  796. if "last_chapter" in current:
  797. loc["last_chapter"] = current["last_chapter"]
  798. def sync_protagonist_to_entity(self, entity_id: str = None):
  799. """
  800. 将 protagonist_state 同步到 entities_v3 中的主角实体
  801. 用于初始化或手动编辑 protagonist_state 后保持一致性
  802. """
  803. if entity_id is None:
  804. entity_id = self.get_protagonist_entity_id()
  805. if entity_id is None:
  806. return
  807. protag = self._state.get("protagonist_state", {})
  808. if not protag:
  809. return
  810. updates = {}
  811. # 同步境界
  812. power = protag.get("power", {})
  813. if power.get("realm"):
  814. updates["realm"] = power["realm"]
  815. if power.get("layer"):
  816. updates["layer"] = power["layer"]
  817. # 同步位置
  818. loc = protag.get("location", {})
  819. if loc.get("current"):
  820. updates["location"] = loc["current"]
  821. if updates:
  822. self.update_entity(entity_id, updates, "角色")
  823. # ==================== CLI 接口 ====================
  824. def main():
  825. import argparse
  826. parser = argparse.ArgumentParser(description="State Manager CLI")
  827. parser.add_argument("--project-root", type=str, help="项目根目录")
  828. subparsers = parser.add_subparsers(dest="command")
  829. # 获取进度
  830. subparsers.add_parser("get-progress")
  831. # 获取实体
  832. get_entity_parser = subparsers.add_parser("get-entity")
  833. get_entity_parser.add_argument("--id", required=True, help="实体ID")
  834. # 列出实体
  835. list_parser = subparsers.add_parser("list-entities")
  836. list_parser.add_argument("--type", help="按类型过滤")
  837. list_parser.add_argument("--tier", help="按层级过滤")
  838. # 处理章节结果
  839. process_parser = subparsers.add_parser("process-chapter")
  840. process_parser.add_argument("--chapter", type=int, required=True, help="章节号")
  841. process_parser.add_argument("--data", required=True, help="JSON 格式的处理结果")
  842. args = parser.parse_args()
  843. # 初始化
  844. config = None
  845. if args.project_root:
  846. from .config import DataModulesConfig
  847. config = DataModulesConfig.from_project_root(args.project_root)
  848. manager = StateManager(config)
  849. if args.command == "get-progress":
  850. print(json.dumps(manager._state.get("progress", {}), ensure_ascii=False, indent=2))
  851. elif args.command == "get-entity":
  852. entity = manager.get_entity(args.id)
  853. if entity:
  854. print(json.dumps(entity, ensure_ascii=False, indent=2))
  855. else:
  856. print(f"未找到实体: {args.id}")
  857. elif args.command == "list-entities":
  858. if args.type:
  859. entities = manager.get_entities_by_type(args.type)
  860. elif args.tier:
  861. entities = manager.get_entities_by_tier(args.tier)
  862. else:
  863. entities = manager.get_all_entities()
  864. for eid, e in entities.items():
  865. print(f"{eid}: {e.get('name')} ({e.get('type')}/{e.get('tier')})")
  866. elif args.command == "process-chapter":
  867. data = json.loads(args.data)
  868. warnings = manager.process_chapter_result(args.chapter, data)
  869. manager.save_state()
  870. print(f"✓ 已处理第 {args.chapter} 章")
  871. if warnings:
  872. print("警告:")
  873. for w in warnings:
  874. print(f" - {w}")
  875. if __name__ == "__main__":
  876. main()