sql_state_manager.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. SQL State Manager - SQLite 状态管理模块 (v5.1)
  5. 基于 IndexManager 扩展,提供与 StateManager 兼容的高级接口,
  6. 将大数据(实体、别名、状态变化、关系)存储到 SQLite 而非 JSON。
  7. 目标:
  8. - 替代 state.json 中的大数据字段
  9. - 保持与 Data Agent / Context Agent 的接口兼容
  10. - 支持增量写入和按需查询
  11. """
  12. import json
  13. from typing import Dict, List, Optional, Any
  14. from dataclasses import dataclass, field
  15. from datetime import datetime
  16. from .index_manager import (
  17. IndexManager,
  18. EntityMeta,
  19. StateChangeMeta,
  20. RelationshipMeta
  21. )
  22. from .config import get_config
  23. @dataclass
  24. class EntityData:
  25. """实体数据(用于 Data Agent 输入)"""
  26. id: str
  27. type: str # 角色/地点/物品/势力/招式
  28. name: str
  29. tier: str = "装饰"
  30. desc: str = ""
  31. current: Dict[str, Any] = field(default_factory=dict)
  32. aliases: List[str] = field(default_factory=list)
  33. first_appearance: int = 0
  34. last_appearance: int = 0
  35. is_protagonist: bool = False
  36. class SQLStateManager:
  37. """
  38. SQLite 状态管理器 (v5.1)
  39. 提供与 StateManager 兼容的接口,但数据存储在 SQLite (index.db) 中。
  40. 用于替代 state.json 中膨胀的数据结构。
  41. 用法:
  42. ```python
  43. manager = SQLStateManager(config)
  44. # 写入实体
  45. manager.upsert_entity(EntityData(
  46. id="xiaoyan",
  47. type="角色",
  48. name="萧炎",
  49. tier="核心",
  50. current={"realm": "斗师", "location": "天云宗"},
  51. aliases=["小炎子", "废柴"],
  52. is_protagonist=True
  53. ))
  54. # 写入状态变化
  55. manager.record_state_change(
  56. entity_id="xiaoyan",
  57. field="realm",
  58. old_value="斗者",
  59. new_value="斗师",
  60. reason="闭关突破",
  61. chapter=100
  62. )
  63. # 写入关系
  64. manager.upsert_relationship(
  65. from_entity="xiaoyan",
  66. to_entity="yaolao",
  67. type="师徒",
  68. description="药老收萧炎为徒",
  69. chapter=5
  70. )
  71. # 读取
  72. protagonist = manager.get_protagonist()
  73. core_entities = manager.get_core_entities()
  74. changes = manager.get_recent_state_changes(limit=50)
  75. ```
  76. """
  77. # v5.0 支持的实体类型
  78. ENTITY_TYPES = ["角色", "地点", "物品", "势力", "招式"]
  79. def __init__(self, config=None):
  80. self.config = config or get_config()
  81. self._index_manager = IndexManager(config)
  82. # ==================== 实体操作 ====================
  83. def upsert_entity(self, entity: EntityData) -> bool:
  84. """
  85. 插入或更新实体
  86. 自动处理:
  87. - 实体基本信息写入 entities 表
  88. - 别名写入 aliases 表
  89. - canonical_name 自动添加为别名
  90. 返回: 是否为新实体
  91. """
  92. # 构建 EntityMeta
  93. meta = EntityMeta(
  94. id=entity.id,
  95. type=entity.type,
  96. canonical_name=entity.name,
  97. tier=entity.tier,
  98. desc=entity.desc,
  99. current=entity.current,
  100. first_appearance=entity.first_appearance,
  101. last_appearance=entity.last_appearance,
  102. is_protagonist=entity.is_protagonist,
  103. is_archived=False
  104. )
  105. is_new = self._index_manager.upsert_entity(meta)
  106. # 注册别名
  107. # 1. canonical_name 本身作为别名
  108. self._index_manager.register_alias(entity.name, entity.id, entity.type)
  109. # 2. 其他别名
  110. for alias in entity.aliases:
  111. if alias and alias != entity.name:
  112. self._index_manager.register_alias(alias, entity.id, entity.type)
  113. return is_new
  114. def get_entity(self, entity_id: str) -> Optional[Dict]:
  115. """获取实体详情"""
  116. entity = self._index_manager.get_entity(entity_id)
  117. if entity:
  118. # 添加别名
  119. entity["aliases"] = self._index_manager.get_entity_aliases(entity_id)
  120. return entity
  121. def get_entities_by_type(self, entity_type: str, include_archived: bool = False) -> List[Dict]:
  122. """按类型获取实体"""
  123. entities = self._index_manager.get_entities_by_type(entity_type, include_archived)
  124. for e in entities:
  125. e["aliases"] = self._index_manager.get_entity_aliases(e["id"])
  126. return entities
  127. def get_core_entities(self) -> List[Dict]:
  128. """
  129. 获取核心实体(用于 Context Agent 全量加载)
  130. 返回所有 tier=核心/重要/次要 或 is_protagonist=1 的实体
  131. """
  132. entities = self._index_manager.get_core_entities()
  133. for e in entities:
  134. e["aliases"] = self._index_manager.get_entity_aliases(e["id"])
  135. return entities
  136. def get_protagonist(self) -> Optional[Dict]:
  137. """获取主角实体"""
  138. protagonist = self._index_manager.get_protagonist()
  139. if protagonist:
  140. protagonist["aliases"] = self._index_manager.get_entity_aliases(protagonist["id"])
  141. return protagonist
  142. def update_entity_current(self, entity_id: str, updates: Dict) -> bool:
  143. """增量更新实体的 current 字段"""
  144. return self._index_manager.update_entity_current(entity_id, updates)
  145. def resolve_alias(self, alias: str) -> List[Dict]:
  146. """
  147. 根据别名解析实体(一对多)
  148. 返回所有匹配的实体
  149. """
  150. return self._index_manager.get_entities_by_alias(alias)
  151. def register_alias(self, alias: str, entity_id: str, entity_type: str) -> bool:
  152. """注册别名"""
  153. return self._index_manager.register_alias(alias, entity_id, entity_type)
  154. # ==================== 状态变化操作 ====================
  155. def record_state_change(
  156. self,
  157. entity_id: str,
  158. field: str,
  159. old_value: Any,
  160. new_value: Any,
  161. reason: str,
  162. chapter: int
  163. ) -> int:
  164. """
  165. 记录状态变化
  166. 返回: 记录 ID
  167. """
  168. change = StateChangeMeta(
  169. entity_id=entity_id,
  170. field=field,
  171. old_value=str(old_value) if old_value is not None else "",
  172. new_value=str(new_value),
  173. reason=reason,
  174. chapter=chapter
  175. )
  176. return self._index_manager.record_state_change(change)
  177. def get_entity_state_changes(self, entity_id: str, limit: int = 20) -> List[Dict]:
  178. """获取实体的状态变化历史"""
  179. return self._index_manager.get_entity_state_changes(entity_id, limit)
  180. def get_recent_state_changes(self, limit: int = 50) -> List[Dict]:
  181. """获取最近的状态变化"""
  182. return self._index_manager.get_recent_state_changes(limit)
  183. def get_chapter_state_changes(self, chapter: int) -> List[Dict]:
  184. """获取某章的所有状态变化"""
  185. return self._index_manager.get_chapter_state_changes(chapter)
  186. # ==================== 关系操作 ====================
  187. def upsert_relationship(
  188. self,
  189. from_entity: str,
  190. to_entity: str,
  191. type: str,
  192. description: str,
  193. chapter: int
  194. ) -> bool:
  195. """
  196. 插入或更新关系
  197. 返回: 是否为新关系
  198. """
  199. rel = RelationshipMeta(
  200. from_entity=from_entity,
  201. to_entity=to_entity,
  202. type=type,
  203. description=description,
  204. chapter=chapter
  205. )
  206. return self._index_manager.upsert_relationship(rel)
  207. def get_entity_relationships(self, entity_id: str, direction: str = "both") -> List[Dict]:
  208. """获取实体的关系"""
  209. return self._index_manager.get_entity_relationships(entity_id, direction)
  210. def get_relationship_between(self, entity1: str, entity2: str) -> List[Dict]:
  211. """获取两个实体之间的所有关系"""
  212. return self._index_manager.get_relationship_between(entity1, entity2)
  213. def get_recent_relationships(self, limit: int = 30) -> List[Dict]:
  214. """获取最近建立的关系"""
  215. return self._index_manager.get_recent_relationships(limit)
  216. # ==================== 批量写入(供 Data Agent 使用) ====================
  217. def process_chapter_entities(
  218. self,
  219. chapter: int,
  220. entities_appeared: List[Dict],
  221. entities_new: List[Dict],
  222. state_changes: List[Dict],
  223. relationships_new: List[Dict]
  224. ) -> Dict[str, int]:
  225. """
  226. 处理章节的实体数据(Data Agent 主入口)
  227. 参数:
  228. - chapter: 章节号
  229. - entities_appeared: 出场的已有实体
  230. [{"id": "xiaoyan", "type": "角色", "mentions": ["萧炎", "他"], "confidence": 0.95}]
  231. - entities_new: 新发现的实体
  232. [{"suggested_id": "hongyi_girl", "name": "红衣女子", "type": "角色", "tier": "装饰"}]
  233. - state_changes: 状态变化
  234. [{"entity_id": "xiaoyan", "field": "realm", "old": "斗者", "new": "斗师", "reason": "突破"}]
  235. - relationships_new: 新关系
  236. [{"from": "xiaoyan", "to": "hongyi_girl", "type": "相识", "description": "初次见面"}]
  237. 返回: 写入统计
  238. """
  239. stats = {
  240. "entities_updated": 0,
  241. "entities_created": 0,
  242. "state_changes": 0,
  243. "relationships": 0,
  244. "aliases": 0
  245. }
  246. # 1. 处理出场实体(更新 last_appearance)
  247. for entity in entities_appeared:
  248. entity_id = entity.get("id")
  249. if not entity_id:
  250. continue
  251. self._index_manager.update_entity_current(entity_id, {}) # 触发 updated_at
  252. # 更新 last_appearance
  253. existing = self._index_manager.get_entity(entity_id)
  254. if existing:
  255. # 使用 SQL 直接更新 last_appearance
  256. self._update_last_appearance(entity_id, chapter)
  257. stats["entities_updated"] += 1
  258. # 记录出场(保留原有逻辑)
  259. self._index_manager.record_appearance(
  260. entity_id=entity_id,
  261. chapter=chapter,
  262. mentions=entity.get("mentions", []),
  263. confidence=entity.get("confidence", 1.0)
  264. )
  265. # 2. 处理新实体
  266. for entity in entities_new:
  267. suggested_id = entity.get("suggested_id") or entity.get("id")
  268. if not suggested_id:
  269. continue
  270. entity_data = EntityData(
  271. id=suggested_id,
  272. type=entity.get("type", "角色"),
  273. name=entity.get("name", suggested_id),
  274. tier=entity.get("tier", "装饰"),
  275. desc=entity.get("desc", ""),
  276. current=entity.get("current", {}),
  277. aliases=entity.get("aliases", []),
  278. first_appearance=chapter,
  279. last_appearance=chapter,
  280. is_protagonist=entity.get("is_protagonist", False)
  281. )
  282. is_new = self.upsert_entity(entity_data)
  283. if is_new:
  284. stats["entities_created"] += 1
  285. else:
  286. stats["entities_updated"] += 1
  287. # 统计别名
  288. stats["aliases"] += 1 + len(entity_data.aliases)
  289. # 记录新实体的首次出场(解决 appearances 缺失问题)
  290. mentions = entity.get("mentions", [])
  291. if not mentions:
  292. mentions = [entity_data.name] # 至少包含实体名
  293. self._index_manager.record_appearance(
  294. entity_id=suggested_id,
  295. chapter=chapter,
  296. mentions=mentions,
  297. confidence=entity.get("confidence", 1.0)
  298. )
  299. # 3. 处理状态变化
  300. for change in state_changes:
  301. entity_id = change.get("entity_id")
  302. if not entity_id:
  303. continue
  304. self.record_state_change(
  305. entity_id=entity_id,
  306. field=change.get("field", ""),
  307. old_value=change.get("old", change.get("old_value", "")),
  308. new_value=change.get("new", change.get("new_value", "")),
  309. reason=change.get("reason", ""),
  310. chapter=chapter
  311. )
  312. stats["state_changes"] += 1
  313. # 同步更新实体的 current
  314. field_name = change.get("field")
  315. new_value = change.get("new", change.get("new_value"))
  316. # 注意:new_value 可能是 0/""/False 等 falsy 值,需要用 is not None 判断
  317. if field_name and new_value is not None:
  318. self._index_manager.update_entity_current(entity_id, {field_name: new_value})
  319. # 4. 处理新关系
  320. for rel in relationships_new:
  321. from_entity = rel.get("from", rel.get("from_entity"))
  322. to_entity = rel.get("to", rel.get("to_entity"))
  323. if not from_entity or not to_entity:
  324. continue
  325. self.upsert_relationship(
  326. from_entity=from_entity,
  327. to_entity=to_entity,
  328. type=rel.get("type", "相识"),
  329. description=rel.get("description", ""),
  330. chapter=chapter
  331. )
  332. stats["relationships"] += 1
  333. return stats
  334. def _update_last_appearance(self, entity_id: str, chapter: int):
  335. """更新实体的 last_appearance"""
  336. with self._index_manager._get_conn() as conn:
  337. cursor = conn.cursor()
  338. cursor.execute("""
  339. UPDATE entities SET
  340. last_appearance = MAX(last_appearance, ?),
  341. updated_at = CURRENT_TIMESTAMP
  342. WHERE id = ?
  343. """, (chapter, entity_id))
  344. conn.commit()
  345. # ==================== 统计 ====================
  346. def get_stats(self) -> Dict[str, int]:
  347. """获取统计信息"""
  348. return self._index_manager.get_stats()
  349. # ==================== 格式转换(兼容性) ====================
  350. def export_to_entities_v3_format(self) -> Dict[str, Dict[str, Dict]]:
  351. """
  352. 导出为 entities_v3 格式(用于兼容性)
  353. 返回: {"角色": {"xiaoyan": {...}}, "地点": {...}, ...}
  354. """
  355. result = {t: {} for t in self.ENTITY_TYPES}
  356. for entity_type in self.ENTITY_TYPES:
  357. entities = self.get_entities_by_type(entity_type, include_archived=True)
  358. for e in entities:
  359. entity_dict = {
  360. "canonical_name": e.get("canonical_name"),
  361. "name": e.get("canonical_name"), # 兼容性别名
  362. "tier": e.get("tier", "装饰"),
  363. "aliases": e.get("aliases", []),
  364. "desc": e.get("desc", ""),
  365. "current": e.get("current_json", {}),
  366. "history": [], # 历史记录需要从 state_changes 表查询
  367. "first_appearance": e.get("first_appearance", 0),
  368. "last_appearance": e.get("last_appearance", 0)
  369. }
  370. if e.get("is_protagonist"):
  371. entity_dict["is_protagonist"] = True
  372. result[entity_type][e["id"]] = entity_dict
  373. return result
  374. def export_to_alias_index_format(self) -> Dict[str, List[Dict[str, str]]]:
  375. """
  376. 导出为 alias_index 格式(用于兼容性)
  377. 返回: {"萧炎": [{"type": "角色", "id": "xiaoyan"}], ...}
  378. """
  379. result = {}
  380. with self._index_manager._get_conn() as conn:
  381. cursor = conn.cursor()
  382. cursor.execute("SELECT alias, entity_id, entity_type FROM aliases")
  383. for row in cursor.fetchall():
  384. alias = row["alias"]
  385. if alias not in result:
  386. result[alias] = []
  387. result[alias].append({
  388. "type": row["entity_type"],
  389. "id": row["entity_id"]
  390. })
  391. return result
  392. # ==================== CLI 接口 ====================
  393. def main():
  394. import argparse
  395. parser = argparse.ArgumentParser(description="SQL State Manager CLI (v5.1)")
  396. parser.add_argument("--project-root", type=str, help="项目根目录")
  397. subparsers = parser.add_subparsers(dest="command")
  398. # 获取统计
  399. subparsers.add_parser("stats")
  400. # 获取主角
  401. subparsers.add_parser("get-protagonist")
  402. # 获取核心实体
  403. subparsers.add_parser("get-core-entities")
  404. # 导出 entities_v3 格式
  405. subparsers.add_parser("export-entities-v3")
  406. # 导出 alias_index 格式
  407. subparsers.add_parser("export-alias-index")
  408. # 处理章节数据
  409. process_parser = subparsers.add_parser("process-chapter")
  410. process_parser.add_argument("--chapter", type=int, required=True)
  411. process_parser.add_argument("--data", required=True, help="JSON 格式的章节数据")
  412. args = parser.parse_args()
  413. # 初始化
  414. config = None
  415. if args.project_root:
  416. from .config import DataModulesConfig
  417. config = DataModulesConfig.from_project_root(args.project_root)
  418. manager = SQLStateManager(config)
  419. if args.command == "stats":
  420. stats = manager.get_stats()
  421. print(json.dumps(stats, ensure_ascii=False, indent=2))
  422. elif args.command == "get-protagonist":
  423. protagonist = manager.get_protagonist()
  424. if protagonist:
  425. print(json.dumps(protagonist, ensure_ascii=False, indent=2))
  426. else:
  427. print("未设置主角")
  428. elif args.command == "get-core-entities":
  429. entities = manager.get_core_entities()
  430. print(json.dumps(entities, ensure_ascii=False, indent=2))
  431. elif args.command == "export-entities-v3":
  432. data = manager.export_to_entities_v3_format()
  433. print(json.dumps(data, ensure_ascii=False, indent=2))
  434. elif args.command == "export-alias-index":
  435. data = manager.export_to_alias_index_format()
  436. print(json.dumps(data, ensure_ascii=False, indent=2))
  437. elif args.command == "process-chapter":
  438. data = json.loads(args.data)
  439. stats = manager.process_chapter_entities(
  440. chapter=args.chapter,
  441. entities_appeared=data.get("entities_appeared", []),
  442. entities_new=data.get("entities_new", []),
  443. state_changes=data.get("state_changes", []),
  444. relationships_new=data.get("relationships_new", [])
  445. )
  446. print(f"✓ 已处理第 {args.chapter} 章")
  447. print(json.dumps(stats, ensure_ascii=False, indent=2))
  448. if __name__ == "__main__":
  449. main()