index_chapter_mixin.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. IndexChapterMixin extracted from IndexManager.
  5. """
  6. from __future__ import annotations
  7. import json
  8. from datetime import datetime
  9. from typing import Any, Dict, List, Optional
  10. class IndexChapterMixin:
  11. def add_chapter(self, meta: ChapterMeta):
  12. """添加/更新章节元数据"""
  13. with self._get_conn() as conn:
  14. cursor = conn.cursor()
  15. cursor.execute(
  16. """
  17. INSERT OR REPLACE INTO chapters
  18. (chapter, title, location, word_count, characters, summary)
  19. VALUES (?, ?, ?, ?, ?, ?)
  20. """,
  21. (
  22. meta.chapter,
  23. meta.title,
  24. meta.location,
  25. meta.word_count,
  26. json.dumps(meta.characters, ensure_ascii=False),
  27. meta.summary,
  28. ),
  29. )
  30. conn.commit()
  31. def get_chapter(self, chapter: int) -> Optional[Dict]:
  32. """获取章节元数据"""
  33. with self._get_conn() as conn:
  34. cursor = conn.cursor()
  35. cursor.execute("SELECT * FROM chapters WHERE chapter = ?", (chapter,))
  36. row = cursor.fetchone()
  37. if row:
  38. return self._row_to_dict(row, parse_json=["characters"])
  39. return None
  40. def get_recent_chapters(self, limit: int = None) -> List[Dict]:
  41. """获取最近章节"""
  42. if limit is None:
  43. limit = self.config.query_recent_chapters_limit
  44. with self._get_conn() as conn:
  45. cursor = conn.cursor()
  46. cursor.execute(
  47. """
  48. SELECT * FROM chapters
  49. ORDER BY chapter DESC
  50. LIMIT ?
  51. """,
  52. (limit,),
  53. )
  54. return [
  55. self._row_to_dict(row, parse_json=["characters"])
  56. for row in cursor.fetchall()
  57. ]
  58. # ==================== 场景操作 ====================
  59. def add_scenes(self, chapter: int, scenes: List[SceneMeta]):
  60. """添加章节场景"""
  61. with self._get_conn() as conn:
  62. cursor = conn.cursor()
  63. # 先删除该章节旧场景
  64. cursor.execute("DELETE FROM scenes WHERE chapter = ?", (chapter,))
  65. # 插入新场景
  66. for scene in scenes:
  67. cursor.execute(
  68. """
  69. INSERT INTO scenes
  70. (chapter, scene_index, start_line, end_line, location, summary, characters)
  71. VALUES (?, ?, ?, ?, ?, ?, ?)
  72. """,
  73. (
  74. scene.chapter,
  75. scene.scene_index,
  76. scene.start_line,
  77. scene.end_line,
  78. scene.location,
  79. scene.summary,
  80. json.dumps(scene.characters, ensure_ascii=False),
  81. ),
  82. )
  83. conn.commit()
  84. def get_scenes(self, chapter: int) -> List[Dict]:
  85. """获取章节场景"""
  86. with self._get_conn() as conn:
  87. cursor = conn.cursor()
  88. cursor.execute(
  89. """
  90. SELECT * FROM scenes
  91. WHERE chapter = ?
  92. ORDER BY scene_index
  93. """,
  94. (chapter,),
  95. )
  96. return [
  97. self._row_to_dict(row, parse_json=["characters"])
  98. for row in cursor.fetchall()
  99. ]
  100. def search_scenes_by_location(self, location: str, limit: int = None) -> List[Dict]:
  101. """按地点搜索场景"""
  102. if limit is None:
  103. limit = self.config.query_scenes_by_location_limit
  104. with self._get_conn() as conn:
  105. cursor = conn.cursor()
  106. cursor.execute(
  107. """
  108. SELECT * FROM scenes
  109. WHERE location LIKE ?
  110. ORDER BY chapter DESC
  111. LIMIT ?
  112. """,
  113. (f"%{location}%", limit),
  114. )
  115. return [
  116. self._row_to_dict(row, parse_json=["characters"])
  117. for row in cursor.fetchall()
  118. ]
  119. # ==================== 出场记录操作 ====================
  120. def record_appearance(
  121. self,
  122. entity_id: str,
  123. chapter: int,
  124. mentions: List[str],
  125. confidence: float = 1.0,
  126. skip_if_exists: bool = False,
  127. ):
  128. """记录实体出场
  129. Args:
  130. entity_id: 实体ID
  131. chapter: 章节号
  132. mentions: 提及列表
  133. confidence: 置信度
  134. skip_if_exists: 如果为True,当记录已存在时跳过(避免覆盖已有mentions)
  135. """
  136. with self._get_conn() as conn:
  137. cursor = conn.cursor()
  138. if skip_if_exists:
  139. # 先检查是否已存在
  140. cursor.execute(
  141. "SELECT 1 FROM appearances WHERE entity_id = ? AND chapter = ?",
  142. (entity_id, chapter),
  143. )
  144. if cursor.fetchone():
  145. return # 已存在,跳过
  146. cursor.execute(
  147. """
  148. INSERT OR REPLACE INTO appearances
  149. (entity_id, chapter, mentions, confidence)
  150. VALUES (?, ?, ?, ?)
  151. """,
  152. (
  153. entity_id,
  154. chapter,
  155. json.dumps(mentions, ensure_ascii=False),
  156. confidence,
  157. ),
  158. )
  159. conn.commit()
  160. def get_entity_appearances(self, entity_id: str, limit: int = None) -> List[Dict]:
  161. """获取实体出场记录"""
  162. if limit is None:
  163. limit = self.config.query_entity_appearances_limit
  164. with self._get_conn() as conn:
  165. cursor = conn.cursor()
  166. cursor.execute(
  167. """
  168. SELECT * FROM appearances
  169. WHERE entity_id = ?
  170. ORDER BY chapter DESC
  171. LIMIT ?
  172. """,
  173. (entity_id, limit),
  174. )
  175. return [
  176. self._row_to_dict(row, parse_json=["mentions"])
  177. for row in cursor.fetchall()
  178. ]
  179. def get_recent_appearances(self, limit: int = None) -> List[Dict]:
  180. """获取最近出场的实体"""
  181. if limit is None:
  182. limit = self.config.query_recent_appearances_limit
  183. with self._get_conn() as conn:
  184. cursor = conn.cursor()
  185. cursor.execute(
  186. """
  187. SELECT entity_id, MAX(chapter) as last_chapter, COUNT(*) as total
  188. FROM appearances
  189. GROUP BY entity_id
  190. ORDER BY last_chapter DESC
  191. LIMIT ?
  192. """,
  193. (limit,),
  194. )
  195. return [dict(row) for row in cursor.fetchall()]
  196. def get_chapter_appearances(self, chapter: int) -> List[Dict]:
  197. """获取某章所有出场实体"""
  198. with self._get_conn() as conn:
  199. cursor = conn.cursor()
  200. cursor.execute(
  201. """
  202. SELECT * FROM appearances
  203. WHERE chapter = ?
  204. ORDER BY confidence DESC
  205. """,
  206. (chapter,),
  207. )
  208. return [
  209. self._row_to_dict(row, parse_json=["mentions"])
  210. for row in cursor.fetchall()
  211. ]
  212. # ==================== v5.1 实体操作 ====================
  213. def process_chapter_data(
  214. self,
  215. chapter: int,
  216. title: str,
  217. location: str,
  218. word_count: int,
  219. entities: List[Dict],
  220. scenes: List[Dict],
  221. ) -> Dict[str, int]:
  222. """
  223. 处理章节数据,批量写入索引
  224. 返回写入统计
  225. """
  226. from .index_manager import ChapterMeta, SceneMeta
  227. stats = {"chapters": 0, "scenes": 0, "appearances": 0}
  228. # 提取出场角色
  229. characters = [e.get("id") for e in entities if e.get("type") == "角色"]
  230. # 写入章节元数据
  231. self.add_chapter(
  232. ChapterMeta(
  233. chapter=chapter,
  234. title=title,
  235. location=location,
  236. word_count=word_count,
  237. characters=characters,
  238. summary="", # 可后续由 Data Agent 生成
  239. )
  240. )
  241. stats["chapters"] = 1
  242. # 写入场景
  243. scene_metas = []
  244. for s in scenes:
  245. scene_metas.append(
  246. SceneMeta(
  247. chapter=chapter,
  248. scene_index=s.get("index", 0),
  249. start_line=s.get("start_line", 0),
  250. end_line=s.get("end_line", 0),
  251. location=s.get("location", ""),
  252. summary=s.get("summary", ""),
  253. characters=s.get("characters", []),
  254. )
  255. )
  256. self.add_scenes(chapter, scene_metas)
  257. stats["scenes"] = len(scene_metas)
  258. # 写入出场记录
  259. for entity in entities:
  260. entity_id = entity.get("id")
  261. if entity_id and entity_id != "NEW":
  262. self.record_appearance(
  263. entity_id=entity_id,
  264. chapter=chapter,
  265. mentions=entity.get("mentions", []),
  266. confidence=entity.get("confidence", 1.0),
  267. )
  268. stats["appearances"] += 1
  269. return stats
  270. # ==================== 辅助方法 ====================