index_projection_writer.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. from __future__ import annotations
  4. import json
  5. import re
  6. from pathlib import Path
  7. from typing import Any
  8. from .commit_artifacts import extraction_dict, extraction_list, extraction_text
  9. from .config import DataModulesConfig
  10. from .index_manager import ChapterMeta, IndexManager, SceneMeta, StateChangeMeta
  11. try:
  12. from chapter_paths import find_chapter_file
  13. except ImportError: # pragma: no cover
  14. from scripts.chapter_paths import find_chapter_file
  15. class IndexProjectionWriter:
  16. def __init__(self, project_root: Path):
  17. self.project_root = Path(project_root)
  18. def apply(self, commit_payload: dict) -> dict:
  19. if commit_payload["meta"]["status"] != "accepted":
  20. return {"applied": False, "writer": "index", "reason": "commit_rejected"}
  21. manager = IndexManager(DataModulesConfig.from_project_root(self.project_root))
  22. applied_count = 0
  23. chapter_applied = self._upsert_chapter(manager, commit_payload)
  24. if chapter_applied:
  25. applied_count += 1
  26. scenes_count = self._apply_scenes(manager, commit_payload)
  27. applied_count += scenes_count
  28. appearances_count = self._apply_appearances(manager, commit_payload)
  29. applied_count += appearances_count
  30. state_changes_count = self._apply_state_changes(manager, commit_payload)
  31. applied_count += state_changes_count
  32. entity_delta_count = 0
  33. for delta in self._collect_entity_deltas(commit_payload):
  34. result = manager.apply_entity_delta(delta)
  35. if result:
  36. entity_delta_count += 1
  37. applied_count += 1
  38. return {
  39. "applied": applied_count > 0,
  40. "writer": "index",
  41. "applied_count": applied_count,
  42. "chapters": 1 if chapter_applied else 0,
  43. "scenes": scenes_count,
  44. "appearances": appearances_count,
  45. "state_changes": state_changes_count,
  46. "entity_deltas": entity_delta_count,
  47. }
  48. def _upsert_chapter(self, manager: IndexManager, commit_payload: dict) -> bool:
  49. chapter = int(commit_payload.get("meta", {}).get("chapter") or 0)
  50. if chapter <= 0:
  51. return False
  52. meta = extraction_dict(commit_payload, "chapter_meta")
  53. title = str(
  54. meta.get("title")
  55. or commit_payload.get("chapter_title")
  56. or self._title_from_chapter_file(chapter)
  57. or ""
  58. ).strip()
  59. location = str(meta.get("location") or commit_payload.get("location") or "").strip()
  60. summary = str(extraction_text(commit_payload, "summary_text") or meta.get("summary") or "").strip()
  61. word_count = self._safe_int(meta.get("word_count") or commit_payload.get("word_count"))
  62. if word_count <= 0:
  63. word_count = self._chapter_word_count(chapter)
  64. characters = meta.get("characters") or self._collect_character_ids(commit_payload)
  65. if not isinstance(characters, list):
  66. characters = []
  67. manager.add_chapter(
  68. ChapterMeta(
  69. chapter=chapter,
  70. title=title,
  71. location=location,
  72. word_count=word_count,
  73. characters=[str(c) for c in characters if str(c).strip()],
  74. summary=summary,
  75. )
  76. )
  77. return True
  78. def _apply_scenes(self, manager: IndexManager, commit_payload: dict) -> int:
  79. chapter = int(commit_payload.get("meta", {}).get("chapter") or 0)
  80. scenes = extraction_list(commit_payload, "scenes")
  81. if chapter <= 0 or not isinstance(scenes, list) or not scenes:
  82. return 0
  83. scene_metas: list[SceneMeta] = []
  84. for idx, scene in enumerate(scenes, start=1):
  85. if not isinstance(scene, dict):
  86. continue
  87. scene_index = self._safe_int(scene.get("scene_index") or scene.get("index") or idx)
  88. characters = scene.get("characters") or scene.get("character_ids") or []
  89. if not isinstance(characters, list):
  90. characters = []
  91. scene_metas.append(
  92. SceneMeta(
  93. chapter=chapter,
  94. scene_index=scene_index,
  95. start_line=self._safe_int(scene.get("start_line")),
  96. end_line=self._safe_int(scene.get("end_line")),
  97. location=str(scene.get("location") or "").strip(),
  98. summary=str(scene.get("summary") or scene.get("content") or "").strip(),
  99. characters=[str(c) for c in characters if str(c).strip()],
  100. )
  101. )
  102. if not scene_metas:
  103. return 0
  104. manager.add_scenes(chapter, scene_metas)
  105. return len(scene_metas)
  106. def _apply_appearances(self, manager: IndexManager, commit_payload: dict) -> int:
  107. chapter = int(commit_payload.get("meta", {}).get("chapter") or 0)
  108. entities = extraction_list(commit_payload, "entities_appeared")
  109. if chapter <= 0 or not isinstance(entities, list):
  110. return 0
  111. applied = 0
  112. for entity in entities:
  113. if not isinstance(entity, dict):
  114. continue
  115. entity_id = str(entity.get("id") or entity.get("entity_id") or "").strip()
  116. if not entity_id or entity_id == "NEW":
  117. continue
  118. mentions = entity.get("mentions") or []
  119. if isinstance(mentions, str):
  120. mentions = [mentions]
  121. if not isinstance(mentions, list):
  122. mentions = []
  123. manager.record_appearance(
  124. entity_id=entity_id,
  125. chapter=chapter,
  126. mentions=[str(m) for m in mentions if str(m).strip()],
  127. confidence=self._safe_float(entity.get("confidence"), 1.0),
  128. )
  129. applied += 1
  130. return applied
  131. def _apply_state_changes(self, manager: IndexManager, commit_payload: dict) -> int:
  132. applied = 0
  133. for change in self._collect_state_changes(commit_payload):
  134. entity_id = str(change.get("entity_id") or "").strip()
  135. field = str(change.get("field") or "").strip()
  136. chapter = self._safe_int(change.get("chapter") or commit_payload.get("meta", {}).get("chapter"))
  137. if not entity_id or not field or chapter <= 0:
  138. continue
  139. old_value = self._stringify(change.get("old"))
  140. new_value = self._stringify(change.get("new"))
  141. reason = str(change.get("reason") or "").strip()
  142. if self._state_change_exists(manager, entity_id, field, old_value, new_value, reason, chapter):
  143. continue
  144. manager.record_state_change(
  145. StateChangeMeta(
  146. entity_id=entity_id,
  147. field=field,
  148. old_value=old_value,
  149. new_value=new_value,
  150. reason=reason,
  151. chapter=chapter,
  152. )
  153. )
  154. applied += 1
  155. return applied
  156. def _collect_state_changes(self, commit_payload: dict) -> list[dict]:
  157. deltas = [
  158. self._normalize_state_delta(delta)
  159. for delta in extraction_list(commit_payload, "state_deltas")
  160. if isinstance(delta, dict)
  161. ]
  162. seen = {
  163. (
  164. str(delta.get("entity_id") or "").strip(),
  165. str(delta.get("field") or "").strip(),
  166. self._safe_int(delta.get("chapter") or commit_payload.get("meta", {}).get("chapter")),
  167. )
  168. for delta in deltas
  169. }
  170. for event in extraction_list(commit_payload, "accepted_events"):
  171. if not isinstance(event, dict):
  172. continue
  173. event_type = str(event.get("event_type") or "").strip()
  174. payload = dict(event.get("payload") or {})
  175. if event_type == "power_breakthrough":
  176. field = str(payload.get("field") or payload.get("field_path") or "realm").strip()
  177. elif event_type == "character_state_changed":
  178. field = str(payload.get("field") or payload.get("field_path") or "").strip()
  179. else:
  180. continue
  181. entity_id = str(payload.get("entity_id") or event.get("subject") or "").strip()
  182. chapter = self._safe_int(event.get("chapter") or commit_payload.get("meta", {}).get("chapter"))
  183. key = (entity_id, field, chapter)
  184. if not entity_id or not field or key in seen:
  185. continue
  186. seen.add(key)
  187. deltas.append(
  188. {
  189. "entity_id": entity_id,
  190. "field": field,
  191. "old": (
  192. payload.get("old")
  193. if "old" in payload
  194. else payload.get("from")
  195. if "from" in payload
  196. else payload.get("old_value")
  197. if "old_value" in payload
  198. else payload.get("previous_state")
  199. ),
  200. "new": (
  201. payload.get("new")
  202. if "new" in payload
  203. else payload.get("to")
  204. if "to" in payload
  205. else payload.get("new_value")
  206. if "new_value" in payload
  207. else payload.get("new_state")
  208. ),
  209. "reason": event_type,
  210. "chapter": chapter,
  211. }
  212. )
  213. return deltas
  214. def _normalize_state_delta(self, delta: dict) -> dict:
  215. result = dict(delta)
  216. if "field" not in result and "field_path" in result:
  217. result["field"] = result["field_path"]
  218. if "new" not in result and "new_value" in result:
  219. result["new"] = result["new_value"]
  220. if "old" not in result and "old_value" in result:
  221. result["old"] = result["old_value"]
  222. return result
  223. def _state_change_exists(
  224. self,
  225. manager: IndexManager,
  226. entity_id: str,
  227. field: str,
  228. old_value: str,
  229. new_value: str,
  230. reason: str,
  231. chapter: int,
  232. ) -> bool:
  233. with manager._get_conn() as conn:
  234. cursor = conn.cursor()
  235. cursor.execute(
  236. """
  237. SELECT 1 FROM state_changes
  238. WHERE entity_id = ?
  239. AND field = ?
  240. AND chapter = ?
  241. AND COALESCE(old_value, '') = ?
  242. AND COALESCE(new_value, '') = ?
  243. AND COALESCE(reason, '') = ?
  244. LIMIT 1
  245. """,
  246. (entity_id, field, chapter, old_value, new_value, reason),
  247. )
  248. return cursor.fetchone() is not None
  249. def _collect_character_ids(self, commit_payload: dict) -> list[str]:
  250. ids: list[str] = []
  251. for entity in extraction_list(commit_payload, "entities_appeared"):
  252. if not isinstance(entity, dict):
  253. continue
  254. entity_id = str(entity.get("id") or entity.get("entity_id") or "").strip()
  255. if entity_id and entity_id != "NEW":
  256. ids.append(entity_id)
  257. for delta in extraction_list(commit_payload, "entity_deltas"):
  258. if not isinstance(delta, dict):
  259. continue
  260. entity_id = str(delta.get("entity_id") or delta.get("id") or "").strip()
  261. entity_type = str(delta.get("type") or delta.get("entity_type") or "").strip()
  262. if entity_id and (not entity_type or entity_type == "角色"):
  263. ids.append(entity_id)
  264. return list(dict.fromkeys(ids))
  265. def _title_from_chapter_file(self, chapter: int) -> str:
  266. path = find_chapter_file(self.project_root, chapter)
  267. if path is None:
  268. return ""
  269. stem = path.stem
  270. match = re.match(r"第0*\d+章[-_ ]+(.+)$", stem)
  271. return match.group(1).strip() if match else ""
  272. def _chapter_word_count(self, chapter: int) -> int:
  273. path = find_chapter_file(self.project_root, chapter)
  274. if path is None:
  275. return 0
  276. try:
  277. text = path.read_text(encoding="utf-8")
  278. except OSError:
  279. return 0
  280. text = re.sub(r"```[\s\S]*?```", "", text)
  281. text = re.sub(r"^#+ .*$", "", text, flags=re.MULTILINE)
  282. text = re.sub(r"---", "", text)
  283. return len(text.strip())
  284. def _stringify(self, value: Any) -> str:
  285. if value is None:
  286. return ""
  287. if isinstance(value, (dict, list)):
  288. return json.dumps(value, ensure_ascii=False, sort_keys=True)
  289. return str(value)
  290. def _safe_int(self, value: object) -> int:
  291. try:
  292. return int(value or 0)
  293. except (TypeError, ValueError):
  294. return 0
  295. def _safe_float(self, value: object, default: float) -> float:
  296. try:
  297. return float(value)
  298. except (TypeError, ValueError):
  299. return default
  300. def _collect_entity_deltas(self, commit_payload: dict) -> list[dict]:
  301. deltas = [dict(delta) for delta in extraction_list(commit_payload, "entity_deltas") if isinstance(delta, dict)]
  302. for event in extraction_list(commit_payload, "accepted_events"):
  303. if not isinstance(event, dict):
  304. continue
  305. event_type = str(event.get("event_type") or "").strip()
  306. payload = dict(event.get("payload") or {})
  307. chapter = int(event.get("chapter") or commit_payload.get("meta", {}).get("chapter") or 0)
  308. if event_type == "relationship_changed":
  309. from_entity = str(payload.get("from_entity") or event.get("subject") or "").strip()
  310. to_entity = str(payload.get("to_entity") or payload.get("to") or "").strip()
  311. rel_type = str(
  312. payload.get("relationship_type")
  313. or payload.get("relation_type")
  314. or payload.get("type")
  315. or ""
  316. ).strip()
  317. if from_entity and to_entity and rel_type:
  318. deltas.append(
  319. {
  320. "from_entity": from_entity,
  321. "to_entity": to_entity,
  322. "relationship_type": rel_type,
  323. "description": str(payload.get("description") or "").strip(),
  324. "chapter": chapter,
  325. }
  326. )
  327. elif event_type == "artifact_obtained":
  328. entity_id = str(
  329. payload.get("artifact_id")
  330. or payload.get("entity_id")
  331. or payload.get("id")
  332. or event.get("subject")
  333. or ""
  334. ).strip()
  335. if not entity_id:
  336. continue
  337. current = {}
  338. owner = str(payload.get("owner") or payload.get("holder") or "").strip()
  339. location = str(payload.get("location") or "").strip()
  340. if owner:
  341. current["holder"] = owner
  342. if location:
  343. current["location"] = location
  344. deltas.append(
  345. {
  346. "entity_id": entity_id,
  347. "canonical_name": str(payload.get("name") or event.get("subject") or entity_id).strip(),
  348. "type": str(payload.get("type") or "物品").strip() or "物品",
  349. "current": current,
  350. "desc": str(payload.get("description") or "").strip(),
  351. "chapter": chapter,
  352. }
  353. )
  354. return deltas