structured_index.py 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261
  1. #!/usr/bin/env python3
  2. """
  3. 结构化索引系统(Structured Index System)v4.0
  4. ⚠️ DEPRECATED: 本模块已被 v5.1 index_manager 替代。
  5. - v5.1 使用不同的 schema(entities.id, aliases, current_json)
  6. - 本模块仅保留用于兼容旧项目迁移
  7. - 新项目请使用 data_modules.index_manager
  8. 目标:取代向量化检索,使用 SQLite 提供精确、快速的结构化查询
  9. v4.0 变更:
  10. - 新增 entities/entity_aliases/entity_kv/entity_history 表
  11. - 主键从 name 迁移到 entity_id
  12. - relationships 表使用 char1_id/char2_id
  13. - 不再写回 state.json(消除循环依赖)
  14. - 从 entities_v3 + alias_index 同步数据
  15. 核心功能:
  16. 1. 实体索引(entities, entity_aliases, entity_kv, entity_history)
  17. 2. 章节元数据索引(location, characters, word_count)
  18. 3. 伏笔追踪索引(status, urgency calculation)
  19. 4. 文件 Hash 自愈机制(auto-rebuild on change)
  20. 性能目标:
  21. - 查询速度:2-5ms(vs 文件遍历 500ms,提升 250x)
  22. - 索引构建:10ms/章(增量更新)
  23. - 存储开销:200 章 ≈ 100 KB
  24. 使用方式:
  25. # 更新单章索引
  26. python structured_index.py --update-chapter 7 --metadata-file /tmp/ch7.json
  27. # 批量重建索引(历史章节)
  28. python structured_index.py --rebuild-index
  29. # 查询地点相关章节
  30. python structured_index.py --query-location "血煞秘境"
  31. # 查询紧急伏笔
  32. python structured_index.py --query-urgent-foreshadowing
  33. # 模糊查询角色
  34. python structured_index.py --fuzzy-search "姓李" "女弟子"
  35. # 查看统计信息
  36. python structured_index.py --stats
  37. """
  38. import json
  39. import os
  40. import sys
  41. import argparse
  42. import sqlite3
  43. import hashlib
  44. import re
  45. import tempfile
  46. from datetime import datetime
  47. from pathlib import Path
  48. from typing import Optional, List, Dict, Tuple
  49. # ============================================================================
  50. # 安全修复:导入安全工具函数(P1 MEDIUM)
  51. # ============================================================================
  52. from security_utils import create_secure_directory
  53. from project_locator import resolve_project_root
  54. from chapter_paths import find_chapter_file
  55. class StructuredIndex:
  56. """结构化索引管理器(取代向量化检索)"""
  57. def __init__(self, project_root=None):
  58. if project_root is None:
  59. try:
  60. project_root = resolve_project_root()
  61. except FileNotFoundError:
  62. project_root = Path.cwd()
  63. else:
  64. project_root = Path(project_root)
  65. self.project_root = project_root
  66. self.state_file = project_root / ".webnovel" / "state.json"
  67. self.chapters_dir = project_root / "正文"
  68. self.index_db = project_root / ".webnovel" / "index.db"
  69. # ============================================================================
  70. # 安全修复:使用安全目录创建函数(P1 MEDIUM)
  71. # 原代码: self.index_db.parent.mkdir(parents=True, exist_ok=True)
  72. # 漏洞: 未设置权限,使用OS默认(可能为755,允许同组用户读取)
  73. # ============================================================================
  74. create_secure_directory(str(self.index_db.parent))
  75. # 连接数据库
  76. self.conn = sqlite3.connect(str(self.index_db))
  77. self.conn.row_factory = sqlite3.Row # 返回字典式行
  78. # 创建表结构
  79. self._create_tables()
  80. def _create_tables(self):
  81. """创建索引表结构(v4.0 主键迁移到 entity_id)"""
  82. # ============== 新增实体表(v4.0)==============
  83. # 实体主表(取代旧 characters 表)
  84. self.conn.execute("""
  85. CREATE TABLE IF NOT EXISTS entities (
  86. entity_id TEXT PRIMARY KEY,
  87. entity_type TEXT NOT NULL,
  88. canonical_name TEXT,
  89. tier TEXT,
  90. desc TEXT,
  91. created_chapter INTEGER,
  92. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  93. )
  94. """)
  95. # 实体类型索引
  96. self.conn.execute("""
  97. CREATE INDEX IF NOT EXISTS idx_entity_type
  98. ON entities(entity_type)
  99. """)
  100. # 别名表(支持一对多查询)
  101. self.conn.execute("""
  102. CREATE TABLE IF NOT EXISTS entity_aliases (
  103. alias TEXT,
  104. entity_id TEXT,
  105. entity_type TEXT,
  106. first_seen_chapter INTEGER,
  107. context TEXT,
  108. PRIMARY KEY (alias, entity_id)
  109. )
  110. """)
  111. # 别名索引(加速反向查询)
  112. self.conn.execute("""
  113. CREATE INDEX IF NOT EXISTS idx_alias
  114. ON entity_aliases(alias)
  115. """)
  116. # 实体属性 KV 表
  117. self.conn.execute("""
  118. CREATE TABLE IF NOT EXISTS entity_kv (
  119. entity_id TEXT,
  120. key TEXT,
  121. value TEXT,
  122. last_chapter INTEGER,
  123. PRIMARY KEY (entity_id, key)
  124. )
  125. """)
  126. # 实体历史表
  127. self.conn.execute("""
  128. CREATE TABLE IF NOT EXISTS entity_history (
  129. id INTEGER PRIMARY KEY AUTOINCREMENT,
  130. entity_id TEXT,
  131. chapter INTEGER,
  132. changes_json TEXT,
  133. reasons_json TEXT,
  134. added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  135. )
  136. """)
  137. # 历史索引
  138. self.conn.execute("""
  139. CREATE INDEX IF NOT EXISTS idx_entity_history
  140. ON entity_history(entity_id, chapter)
  141. """)
  142. # ============== 章节元数据表 ==============
  143. # 1. 章节元数据表(v4.0: characters 改为存 entity_id 列表)
  144. self.conn.execute("""
  145. CREATE TABLE IF NOT EXISTS chapters (
  146. chapter_num INTEGER PRIMARY KEY,
  147. title TEXT,
  148. location TEXT,
  149. location_id TEXT,
  150. characters TEXT, -- JSON: ["entity_id_1", "entity_id_2"]
  151. word_count INTEGER,
  152. content_hash TEXT,
  153. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  154. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  155. )
  156. """)
  157. # 地点索引(加速查询)
  158. self.conn.execute("""
  159. CREATE INDEX IF NOT EXISTS idx_location
  160. ON chapters(location)
  161. """)
  162. # 2. 伏笔追踪表
  163. self.conn.execute("""
  164. CREATE TABLE IF NOT EXISTS foreshadowing_index (
  165. id INTEGER PRIMARY KEY,
  166. content TEXT,
  167. location TEXT,
  168. characters TEXT, -- JSON: ["李雪", "主角"]
  169. introduced_chapter INTEGER,
  170. resolved_chapter INTEGER,
  171. status TEXT, -- '未回收' / '已回收'
  172. urgency INTEGER DEFAULT 0, -- 0-100,自动计算
  173. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  174. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  175. )
  176. """)
  177. # 状态索引
  178. self.conn.execute("""
  179. CREATE INDEX IF NOT EXISTS idx_status
  180. ON foreshadowing_index(status)
  181. """)
  182. # 紧急度索引
  183. self.conn.execute("""
  184. CREATE INDEX IF NOT EXISTS idx_urgency
  185. ON foreshadowing_index(urgency)
  186. """)
  187. # 3. 角色关系表(v4.0: 使用 entity_id)
  188. self.conn.execute("""
  189. CREATE TABLE IF NOT EXISTS relationships (
  190. id INTEGER PRIMARY KEY AUTOINCREMENT,
  191. char1_id TEXT,
  192. char2_id TEXT,
  193. char1_name TEXT,
  194. char2_name TEXT,
  195. relation_type TEXT, -- 'ally', 'enemy', 'romance', 'mentor', 'debtor'
  196. intensity INTEGER, -- 关系强度 0-100
  197. description TEXT,
  198. last_update_chapter INTEGER,
  199. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  200. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  201. UNIQUE(char1_id, char2_id, relation_type) -- 防止重复
  202. )
  203. """)
  204. # 关系索引(v4.0: 使用 entity_id)
  205. self.conn.execute("""
  206. CREATE INDEX IF NOT EXISTS idx_char1_char2
  207. ON relationships(char1_id, char2_id)
  208. """)
  209. # 4. 角色索引表(v4.0 已废弃,保留兼容)
  210. # 新代码应使用 entities 表
  211. self.conn.execute("""
  212. CREATE TABLE IF NOT EXISTS characters (
  213. name TEXT PRIMARY KEY,
  214. description TEXT,
  215. personality TEXT,
  216. importance TEXT, -- 'major' / 'minor'
  217. power_level TEXT,
  218. first_appearance INTEGER,
  219. last_appearance INTEGER,
  220. status TEXT DEFAULT 'active', -- 'active' / 'archived'
  221. archived_at TEXT, -- ISO timestamp
  222. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  223. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  224. )
  225. """)
  226. # 角色名索引(加速模糊搜索)
  227. self.conn.execute("""
  228. CREATE INDEX IF NOT EXISTS idx_character_name
  229. ON characters(name)
  230. """)
  231. # 状态索引
  232. self.conn.execute("""
  233. CREATE INDEX IF NOT EXISTS idx_character_status
  234. ON characters(status)
  235. """)
  236. self.conn.commit()
  237. # ================== 核心功能 1:章节元数据索引 ==================
  238. def index_chapter(self, chapter_num: int, metadata: Dict):
  239. """为新章节建立索引(在 webnovel-write Step 4.6 调用)
  240. Args:
  241. chapter_num: 章节编号
  242. metadata: {
  243. 'title': '章节标题',
  244. 'location': '地点',
  245. 'characters': ['李雪', '主角'],
  246. 'word_count': 3500,
  247. 'hash': 'md5_hash'
  248. }
  249. """
  250. def _normalize_str_list(v) -> List[str]:
  251. if v is None:
  252. return []
  253. if isinstance(v, list):
  254. return [str(x).strip() for x in v if str(x).strip()]
  255. if isinstance(v, str):
  256. return [s.strip() for s in re.split(r"[,,]", v) if s.strip()]
  257. return [str(v).strip()] if str(v).strip() else []
  258. def _exists_entity(entity_id: str, entity_type: str) -> bool:
  259. row = self.conn.execute(
  260. "SELECT 1 FROM entities WHERE entity_id = ? AND entity_type = ? LIMIT 1",
  261. (entity_id, entity_type),
  262. ).fetchone()
  263. return bool(row)
  264. def _resolve_alias_ids(alias: str, entity_type: str) -> List[str]:
  265. rows = self.conn.execute(
  266. "SELECT entity_id FROM entity_aliases WHERE alias = ? AND entity_type = ?",
  267. (alias, entity_type),
  268. ).fetchall()
  269. return [r["entity_id"] for r in rows] if rows else []
  270. # v4.0: chapters.characters 存 entity_id 列表(metadata 允许传入 name/alias,索引层负责解析)
  271. resolved_character_ids: List[str] = []
  272. seen_ids = set()
  273. for ref in _normalize_str_list(metadata.get("characters", [])):
  274. if _exists_entity(ref, "角色"):
  275. if ref not in seen_ids:
  276. resolved_character_ids.append(ref)
  277. seen_ids.add(ref)
  278. continue
  279. candidates = _resolve_alias_ids(ref, "角色")
  280. if len(candidates) == 1:
  281. cid = candidates[0]
  282. if cid not in seen_ids:
  283. resolved_character_ids.append(cid)
  284. seen_ids.add(cid)
  285. continue
  286. if len(candidates) > 1:
  287. print(f"⚠️ 角色别名歧义,跳过: {ref!r} 命中 {len(candidates)} 个角色")
  288. else:
  289. print(f"⚠️ 未知角色,跳过: {ref!r}")
  290. # v4.0: 可选 location_id(只解析为地点实体)
  291. location = str(metadata.get("location", "")).strip()
  292. location_id = ""
  293. if location:
  294. if _exists_entity(location, "地点"):
  295. location_id = location
  296. else:
  297. loc_candidates = _resolve_alias_ids(location, "地点")
  298. if len(loc_candidates) == 1:
  299. location_id = loc_candidates[0]
  300. elif len(loc_candidates) > 1:
  301. print(f"⚠️ 地点别名歧义,location_id 留空: {location!r} 命中 {len(loc_candidates)} 个地点")
  302. self.conn.execute("""
  303. INSERT OR REPLACE INTO chapters
  304. (chapter_num, title, location, location_id, characters, word_count, content_hash, updated_at)
  305. VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
  306. """, (
  307. chapter_num,
  308. metadata['title'],
  309. location,
  310. location_id,
  311. json.dumps(resolved_character_ids, ensure_ascii=False),
  312. metadata['word_count'],
  313. metadata['hash']
  314. ))
  315. self.conn.commit()
  316. print(f"✅ 章节索引已更新:Ch{chapter_num} - {metadata['title']}")
  317. # bump_character_last_appearance_in_state 已删除(v4.0)
  318. # 原因:消除索引层写回 state.json 的循环依赖
  319. # last_appearance_chapter 现在作为 index.db 的派生字段
  320. def query_chapters_by_location(self, location: str, limit: int = 10) -> List[Tuple]:
  321. """O(log n) 查询:返回该地点的最近 N 章
  322. Args:
  323. location: 地点名称
  324. limit: 返回数量
  325. Returns:
  326. [(chapter_num, title, characters), ...]
  327. """
  328. cursor = self.conn.execute("""
  329. SELECT chapter_num, title, characters
  330. FROM chapters
  331. WHERE location = ?
  332. ORDER BY chapter_num DESC
  333. LIMIT ?
  334. """, (location, limit))
  335. return cursor.fetchall()
  336. def calculate_chapter_hash(self, chapter_file: Path) -> str:
  337. """计算章节文件 MD5 Hash(用于自愈机制)"""
  338. if not chapter_file.exists():
  339. return ""
  340. with open(chapter_file, 'rb') as f:
  341. return hashlib.md5(f.read()).hexdigest()
  342. def get_stored_hash(self, chapter_num: int) -> Optional[str]:
  343. """从索引中读取存储的 Hash"""
  344. cursor = self.conn.execute("""
  345. SELECT content_hash FROM chapters WHERE chapter_num = ?
  346. """, (chapter_num,))
  347. row = cursor.fetchone()
  348. return row['content_hash'] if row else None
  349. def validate_and_rebuild_if_needed(self, chapter_num: int):
  350. """校验章节 Hash,不一致则自动重建索引(Self-Healing Index)
  351. 触发时机:
  352. - context_manager.py 查询章节前调用
  353. - 增加耗时:~5ms(Hash 计算 + 对比)
  354. - 仅当检测到变更时才重建(增量成本)
  355. """
  356. chapter_file = find_chapter_file(self.project_root, chapter_num)
  357. if chapter_file is None or not chapter_file.exists():
  358. return # 文件不存在,跳过
  359. # 计算当前文件 Hash
  360. current_hash = self.calculate_chapter_hash(chapter_file)
  361. # 从索引中读取存储的 Hash
  362. stored_hash = self.get_stored_hash(chapter_num)
  363. if current_hash != stored_hash:
  364. print(f"⚠️ 检测到 Ch{chapter_num} 已修改,自动重建索引...")
  365. self._rebuild_chapter_index(chapter_num, chapter_file)
  366. print(f"✅ Ch{chapter_num} 索引已更新")
  367. def _rebuild_chapter_index(self, chapter_num: int, chapter_file: Path):
  368. """重建单章索引(自动提取元数据)"""
  369. # 读取章节内容
  370. with open(chapter_file, 'r', encoding='utf-8') as f:
  371. content = f.read()
  372. # 提取元数据
  373. metadata = self._extract_metadata_from_content(content, chapter_num)
  374. # 重建索引
  375. self.index_chapter(chapter_num, metadata)
  376. def _extract_metadata_from_content(self, content: str, chapter_num: int) -> Dict:
  377. """从章节内容中提取元数据"""
  378. # 提取标题(第一行)
  379. lines = content.split('\n')
  380. title = lines[0].strip('# ').strip() if lines else f"第{chapter_num}章"
  381. # 提取地点(在章节开头查找,通常格式为 **地点:XXX**)
  382. location_match = re.search(r'\*\*地点[::]\s*(.+?)\*\*', content)
  383. location = location_match.group(1).strip() if location_match else "未知"
  384. # 提取角色(查找所有对话和描述中的角色名)
  385. # 简化实现:从 state.json 读取已知角色,匹配出现频率
  386. characters = self._extract_characters_from_content(content)
  387. # 计算字数
  388. word_count = len(content)
  389. # 计算 Hash
  390. content_hash = hashlib.md5(content.encode('utf-8')).hexdigest()
  391. return {
  392. 'title': title,
  393. 'location': location,
  394. 'characters': characters[:5], # 最多 5 个主要角色
  395. 'word_count': word_count,
  396. 'hash': content_hash
  397. }
  398. def _extract_characters_from_content(self, content: str) -> List[str]:
  399. """从内容中提取角色(简化实现:读取索引中已知角色 canonical_name)"""
  400. # 获取已知角色列表(限制规模,避免超大角色库拖慢)
  401. rows = self.conn.execute(
  402. "SELECT canonical_name FROM entities WHERE entity_type = ? AND canonical_name != '' LIMIT 800",
  403. ("角色",),
  404. ).fetchall()
  405. known_characters = [r["canonical_name"] for r in rows] if rows else []
  406. if not known_characters:
  407. return []
  408. # 统计每个角色在内容中的出现次数
  409. char_counts = {}
  410. for char_name in known_characters:
  411. count = content.count(char_name)
  412. if count > 0:
  413. char_counts[char_name] = count
  414. # 按出现次数排序,返回前 5 个
  415. sorted_chars = sorted(char_counts.items(), key=lambda x: x[1], reverse=True)
  416. return [char for char, _ in sorted_chars[:5]]
  417. # ================== 核心功能 2:伏笔追踪索引 ==================
  418. def sync_foreshadowing_from_state(self):
  419. """从 state.json 同步伏笔数据到索引
  420. 触发时机:
  421. - update_state.py 更新伏笔后调用
  422. - --rebuild-index 批量重建时调用
  423. """
  424. if not self.state_file.exists():
  425. print("❌ state.json 不存在,跳过伏笔同步")
  426. return
  427. # 读取 state.json
  428. with open(self.state_file, 'r', encoding='utf-8') as f:
  429. state = json.load(f)
  430. current_chapter = state.get('progress', {}).get('current_chapter', 0)
  431. plot_threads = state.get('plot_threads', {}) or {}
  432. # 兼容新格式:plot_threads.foreshadowing = [{"content": "...", "status": "active", ...}, ...]
  433. foreshadowing_items = plot_threads.get('foreshadowing', []) or []
  434. active_count = 0
  435. resolved_count = 0
  436. for item in foreshadowing_items:
  437. desc = item.get('description') or item.get('content') or ''
  438. if not desc:
  439. continue
  440. raw_status = (item.get('status') or '').strip()
  441. if raw_status in ['已回收', 'resolved']:
  442. status = '已回收'
  443. resolved_count += 1
  444. else:
  445. # 默认都视为未回收(兼容 active/未回收/pending/空)
  446. status = '未回收'
  447. active_count += 1
  448. normalized = {
  449. 'description': desc,
  450. 'location': item.get('location', ''),
  451. 'characters': item.get('characters', []),
  452. # 如果没有明确记录,至少给一个可用的默认值(避免紧急度恒为0)
  453. 'introduced_chapter': item.get('introduced_chapter') or item.get('planted_chapter') or 1,
  454. 'resolved_chapter': item.get('resolved_chapter', None),
  455. }
  456. self._index_foreshadowing(normalized, current_chapter, status=status)
  457. self.conn.commit()
  458. print(f"✅ 伏笔索引已同步:{active_count} 条活跃 + {resolved_count} 条已回收")
  459. def _index_foreshadowing(self, plot: Dict, current_chapter: int, status: str):
  460. """为单个伏笔建立索引"""
  461. # 计算紧急度
  462. urgency = self._calculate_urgency(plot, current_chapter)
  463. # 提取地点和角色(如果有)
  464. location = plot.get('location', '')
  465. characters = plot.get('characters', [])
  466. self.conn.execute("""
  467. INSERT OR REPLACE INTO foreshadowing_index
  468. (id, content, location, characters, introduced_chapter, resolved_chapter, status, urgency, updated_at)
  469. VALUES ((SELECT id FROM foreshadowing_index WHERE content = ?), ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
  470. """, (
  471. plot.get('description', ''), # 用于查重
  472. plot.get('description', ''),
  473. location,
  474. json.dumps(characters, ensure_ascii=False),
  475. plot.get('introduced_chapter', 0),
  476. plot.get('resolved_chapter', None),
  477. status,
  478. urgency
  479. ))
  480. def _calculate_urgency(self, plot: Dict, current_chapter: int) -> int:
  481. """计算伏笔紧急度(0-100)
  482. 规则:
  483. - 超过 100 章未回收 → 极度紧急(100)
  484. - 超过 50 章未回收 → 中等紧急(60)
  485. - 其他 → 正常(20)
  486. """
  487. introduced_ch = plot.get('introduced_chapter', 0)
  488. chapters_pending = current_chapter - introduced_ch
  489. if chapters_pending > 100:
  490. return 100 # 极度紧急
  491. elif chapters_pending > 50:
  492. return 60 # 中等紧急
  493. else:
  494. return 20 # 正常
  495. # ================== v4.0 实体同步(使用 entities_v3)==================
  496. def sync_entities_from_state(self):
  497. """从 state.json.entities_v3 同步实体到 entities/entity_aliases 表
  498. v4.0 新增:取代旧的 sync_characters_from_state
  499. 数据源:state.json.entities_v3 + alias_index
  500. """
  501. if not self.state_file.exists():
  502. print("❌ state.json 不存在,跳过实体同步")
  503. return
  504. with open(self.state_file, 'r', encoding='utf-8') as f:
  505. state = json.load(f)
  506. entities_v3 = state.get('entities_v3', {})
  507. alias_index = state.get('alias_index', {})
  508. # v4.0:索引层为派生数据,可直接重建(避免重复插入导致膨胀)
  509. self.conn.execute("DELETE FROM entity_kv")
  510. self.conn.execute("DELETE FROM entity_aliases")
  511. self.conn.execute("DELETE FROM entity_history")
  512. self.conn.execute("DELETE FROM entities")
  513. entity_count = 0
  514. alias_count = 0
  515. # 遍历所有实体类型
  516. for entity_type, entities in entities_v3.items():
  517. for entity_id, entity_data in entities.items():
  518. # 写入 entities 主表
  519. canonical_name = entity_data.get('canonical_name', '')
  520. tier = entity_data.get('tier', '')
  521. desc = entity_data.get('desc', '')
  522. created_chapter = entity_data.get('created_chapter', 0)
  523. self.conn.execute("""
  524. INSERT OR REPLACE INTO entities
  525. (entity_id, entity_type, canonical_name, tier, desc, created_chapter, updated_at)
  526. VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
  527. """, (entity_id, entity_type, canonical_name, tier, desc, created_chapter))
  528. entity_count += 1
  529. # 写入实体 KV 属性
  530. current = entity_data.get('current', {})
  531. last_chapter = current.get("last_chapter", created_chapter) if isinstance(current, dict) else created_chapter
  532. try:
  533. last_chapter = int(last_chapter)
  534. except (TypeError, ValueError):
  535. last_chapter = int(created_chapter or 0)
  536. for key, value in current.items():
  537. value_str = json.dumps(value, ensure_ascii=False) if isinstance(value, (dict, list)) else str(value)
  538. self.conn.execute("""
  539. INSERT OR REPLACE INTO entity_kv
  540. (entity_id, key, value, last_chapter)
  541. VALUES (?, ?, ?, ?)
  542. """, (entity_id, key, value_str, last_chapter))
  543. # 写入历史记录
  544. history = entity_data.get('history', [])
  545. for record in history:
  546. chapter = record.get('chapter', 0)
  547. changes = record.get('changes', {})
  548. reasons = record.get('reasons', {})
  549. self.conn.execute("""
  550. INSERT OR IGNORE INTO entity_history
  551. (entity_id, chapter, changes_json, reasons_json)
  552. VALUES (?, ?, ?, ?)
  553. """, (entity_id, chapter, json.dumps(changes, ensure_ascii=False), json.dumps(reasons, ensure_ascii=False)))
  554. # 同步别名索引
  555. for alias, entries in alias_index.items():
  556. # v4.0: entries 必须是数组(一对多)
  557. if not isinstance(entries, list):
  558. raise ValueError(
  559. f"alias_index 数据格式错误:期望 alias_index[{alias!r}] 为 list[{{type,id,...}}],实际为 {type(entries).__name__}"
  560. )
  561. for entry in entries:
  562. entry_type = entry.get('type', '')
  563. entry_id = entry.get('id', '')
  564. first_seen = entry.get('first_seen_chapter', 0)
  565. context = entry.get('context', '')
  566. self.conn.execute("""
  567. INSERT OR REPLACE INTO entity_aliases
  568. (alias, entity_id, entity_type, first_seen_chapter, context)
  569. VALUES (?, ?, ?, ?, ?)
  570. """, (alias, entry_id, entry_type, first_seen, context))
  571. alias_count += 1
  572. self.conn.commit()
  573. print(f"✅ 实体索引已同步:{entity_count} 个实体,{alias_count} 个别名")
  574. def query_entity_by_id(self, entity_id: str) -> Optional[Dict]:
  575. """通过 entity_id 查询实体详情"""
  576. cursor = self.conn.execute("""
  577. SELECT entity_id, entity_type, canonical_name, tier, desc, created_chapter
  578. FROM entities WHERE entity_id = ?
  579. """, (entity_id,))
  580. row = cursor.fetchone()
  581. if not row:
  582. return None
  583. result = dict(row)
  584. # 获取 KV 属性
  585. cursor = self.conn.execute("""
  586. SELECT key, value FROM entity_kv WHERE entity_id = ?
  587. """, (entity_id,))
  588. result['current'] = {}
  589. for kv_row in cursor.fetchall():
  590. try:
  591. result['current'][kv_row['key']] = json.loads(kv_row['value'])
  592. except json.JSONDecodeError:
  593. result['current'][kv_row['key']] = kv_row['value']
  594. # 获取别名
  595. cursor = self.conn.execute("""
  596. SELECT alias FROM entity_aliases WHERE entity_id = ?
  597. """, (entity_id,))
  598. result['aliases'] = [row['alias'] for row in cursor.fetchall()]
  599. return result
  600. def query_entities_by_alias(self, alias: str) -> List[Dict]:
  601. """通过别名查询实体(支持一对多)"""
  602. cursor = self.conn.execute("""
  603. SELECT ea.entity_id, ea.entity_type, e.canonical_name, e.tier
  604. FROM entity_aliases ea
  605. LEFT JOIN entities e ON ea.entity_id = e.entity_id
  606. WHERE ea.alias = ?
  607. """, (alias,))
  608. return [dict(row) for row in cursor.fetchall()]
  609. def query_entities_by_type(self, entity_type: str, limit: int = 50) -> List[Dict]:
  610. """按类型查询实体"""
  611. cursor = self.conn.execute("""
  612. SELECT entity_id, canonical_name, tier, desc
  613. FROM entities
  614. WHERE entity_type = ?
  615. ORDER BY created_chapter DESC
  616. LIMIT ?
  617. """, (entity_type, limit))
  618. return [dict(row) for row in cursor.fetchall()]
  619. def sync_characters_from_state(self):
  620. """从 state.json 同步角色数据到索引(v4.0 已废弃)
  621. 保留兼容:调用新的 sync_entities_from_state
  622. """
  623. # v4.0: 委托给新函数
  624. self.sync_entities_from_state()
  625. def _index_character(self, char: Dict, status: str = 'active'):
  626. """为单个角色建立索引"""
  627. description = char.get('description') or char.get('desc') or ''
  628. tier = str(char.get('tier', '') or '').strip()
  629. importance = char.get('importance') or ('major' if tier == '核心' else 'minor')
  630. first_appearance = char.get('first_appearance_chapter', 0) or 0
  631. try:
  632. first_appearance = int(first_appearance)
  633. except (TypeError, ValueError):
  634. first_appearance = 0
  635. if first_appearance == 0:
  636. src = char.get('first_appearance')
  637. if isinstance(src, str):
  638. m = re.search(r'第(\d+)章', src)
  639. if m:
  640. try:
  641. first_appearance = int(m.group(1))
  642. except ValueError:
  643. first_appearance = 0
  644. last_appearance = char.get('last_appearance_chapter', 0) or first_appearance
  645. try:
  646. last_appearance = int(last_appearance)
  647. except (TypeError, ValueError):
  648. last_appearance = first_appearance
  649. self.conn.execute("""
  650. INSERT OR REPLACE INTO characters
  651. (name, description, personality, importance, power_level,
  652. first_appearance, last_appearance, status, updated_at)
  653. VALUES (?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
  654. """, (
  655. char.get('name', ''),
  656. description,
  657. char.get('personality', ''),
  658. importance,
  659. char.get('power_level', ''),
  660. first_appearance,
  661. last_appearance,
  662. status
  663. ))
  664. def mark_character_archived(self, name: str, archived_at: str = None):
  665. """标记角色为已归档状态(Priority 2 修复)
  666. Args:
  667. name: 角色名
  668. archived_at: 归档时间戳(ISO格式),默认当前时间
  669. """
  670. if archived_at is None:
  671. from datetime import datetime
  672. archived_at = datetime.now().isoformat()
  673. self.conn.execute("""
  674. UPDATE characters
  675. SET status = 'archived', archived_at = ?, updated_at = CURRENT_TIMESTAMP
  676. WHERE name = ?
  677. """, (archived_at, name))
  678. self.conn.commit()
  679. def mark_character_active(self, name: str):
  680. """恢复角色为活跃状态(与 mark_character_archived 对应)"""
  681. self.conn.execute("""
  682. UPDATE characters
  683. SET status = 'active', archived_at = NULL, updated_at = CURRENT_TIMESTAMP
  684. WHERE name = ?
  685. """, (name,))
  686. self.conn.commit()
  687. def query_urgent_foreshadowing(self, threshold: int = 60) -> List[Dict]:
  688. """查询紧急伏笔(urgency >= threshold)
  689. Args:
  690. threshold: 紧急度阈值(60=中等紧急,80=高度紧急,100=极度紧急)
  691. Returns:
  692. [{'content': '...', 'introduced_chapter': 45, 'urgency': 80}, ...]
  693. """
  694. cursor = self.conn.execute("""
  695. SELECT content, introduced_chapter, urgency
  696. FROM foreshadowing_index
  697. WHERE status = '未回收' AND urgency >= ?
  698. ORDER BY urgency DESC
  699. """, (threshold,))
  700. return [dict(row) for row in cursor.fetchall()]
  701. def sync_relationships_from_state(self):
  702. """从 state.json 同步关系数据到索引(v4.0: 使用 entity_id)
  703. 触发时机:
  704. - extract_entities.py 更新关系后调用
  705. - --rebuild-index 批量重建时调用
  706. 数据来源: state.json 的 structured_relationships 列表
  707. """
  708. if not self.state_file.exists():
  709. print("❌ state.json 不存在,跳过关系同步")
  710. return
  711. # 读取 state.json
  712. with open(self.state_file, 'r', encoding='utf-8') as f:
  713. state = json.load(f)
  714. # 获取结构化关系列表
  715. relationships = state.get('structured_relationships', [])
  716. if not relationships:
  717. print("ℹ️ 无结构化关系数据")
  718. return
  719. count = 0
  720. for rel in relationships:
  721. # v4.0: 关系必须用 entity_id(chapter tags 是真相,避免 name 漂移)
  722. char1_id = str(rel.get('char1_id', '') or '').strip()
  723. char2_id = str(rel.get('char2_id', '') or '').strip()
  724. char1_name = str(rel.get('char1_name', '') or '').strip()
  725. char2_name = str(rel.get('char2_name', '') or '').strip()
  726. rel_type = rel.get('type', 'ally')
  727. intensity = rel.get('intensity', 50)
  728. desc = rel.get('description', '')
  729. last_chapter = rel.get('last_update_chapter', 0)
  730. if not char1_id or not char2_id:
  731. print("⚠️ 跳过无效关系(缺少 char1_id/char2_id)")
  732. continue
  733. # 补齐显示名(可选)
  734. if not char1_name:
  735. row = self.conn.execute("SELECT canonical_name FROM entities WHERE entity_id = ? LIMIT 1", (char1_id,)).fetchone()
  736. char1_name = (row["canonical_name"] if row else "") or char1_id
  737. if not char2_name:
  738. row = self.conn.execute("SELECT canonical_name FROM entities WHERE entity_id = ? LIMIT 1", (char2_id,)).fetchone()
  739. char2_name = (row["canonical_name"] if row else "") or char2_id
  740. self.conn.execute("""
  741. INSERT OR REPLACE INTO relationships
  742. (id, char1_id, char2_id, char1_name, char2_name, relation_type, intensity, description, last_update_chapter, updated_at)
  743. VALUES (
  744. (SELECT id FROM relationships WHERE char1_id = ? AND char2_id = ? AND relation_type = ?),
  745. ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP
  746. )
  747. """, (
  748. char1_id, char2_id, rel_type, # for subquery
  749. char1_id, char2_id, char1_name, char2_name, rel_type, intensity, desc, last_chapter
  750. ))
  751. count += 1
  752. self.conn.commit()
  753. print(f"✅ 关系索引已同步:{count} 条关系")
  754. def query_relationships(self, char_id: str = None, rel_type: str = None) -> List[Dict]:
  755. """查询角色关系(v4.0: 使用 entity_id)
  756. Args:
  757. char_id: 角色 entity_id(可选,查该角色的所有关系)
  758. rel_type: 关系类型(可选,过滤特定类型)
  759. Returns:
  760. [{'char1_id': '...', 'char2_id': '...', 'type': 'romance', 'intensity': 80, ...}, ...]
  761. """
  762. conditions = []
  763. params = []
  764. if char_id:
  765. conditions.append("(char1_id = ? OR char2_id = ?)")
  766. params.extend([char_id, char_id])
  767. if rel_type:
  768. conditions.append("relation_type = ?")
  769. params.append(rel_type)
  770. where_clause = " AND ".join(conditions) if conditions else "1=1"
  771. cursor = self.conn.execute(f"""
  772. SELECT char1_id, char2_id, char1_name, char2_name, relation_type, intensity, description, last_update_chapter
  773. FROM relationships
  774. WHERE {where_clause}
  775. ORDER BY intensity DESC
  776. """, params)
  777. return [dict(row) for row in cursor.fetchall()]
  778. # ================== 核心功能 3:模糊查询(Fuzzy Search via SQL LIKE)==================
  779. def fuzzy_search_entity(self, keywords: List[str], entity_type: str = None) -> List[Dict]:
  780. """模糊查询实体(v4.0 新增,支持多关键词 + 类型过滤)
  781. Args:
  782. keywords: 关键词列表,如 ["李", "女弟子"]
  783. entity_type: 可选,过滤实体类型(角色/地点/物品/势力/招式)
  784. Returns:
  785. [{'entity_id': '...', 'canonical_name': '...', 'desc': '...', 'tier': '...'}, ...]
  786. """
  787. # 构建 WHERE 子句
  788. conditions = []
  789. params = []
  790. for kw in keywords:
  791. # 每个关键词在 canonical_name/desc 任一字段中出现即可
  792. conditions.append("(e.canonical_name LIKE ? OR e.desc LIKE ? OR ea.alias LIKE ?)")
  793. params.extend([f'%{kw}%', f'%{kw}%', f'%{kw}%'])
  794. if entity_type:
  795. conditions.append("e.entity_type = ?")
  796. params.append(entity_type)
  797. where_clause = " AND ".join(conditions)
  798. query = f"""
  799. SELECT DISTINCT e.entity_id, e.entity_type, e.canonical_name, e.tier, e.desc, e.created_chapter
  800. FROM entities e
  801. LEFT JOIN entity_aliases ea ON e.entity_id = ea.entity_id
  802. WHERE {where_clause}
  803. ORDER BY e.tier DESC, e.created_chapter DESC
  804. LIMIT 20
  805. """
  806. cursor = self.conn.execute(query, params)
  807. return [dict(row) for row in cursor.fetchall()]
  808. def fuzzy_search_character(self, keywords: List[str]) -> List[Dict]:
  809. """模糊查询角色(v4.0: 委托给 fuzzy_search_entity)
  810. Args:
  811. keywords: 关键词列表,如 ["李", "女弟子"]
  812. Returns:
  813. [{'entity_id': '...', 'canonical_name': '...', 'desc': '...', ...}, ...]
  814. """
  815. return self.fuzzy_search_entity(keywords, entity_type="角色")
  816. # ================== 批量操作 ==================
  817. def rebuild_all_indexes(self):
  818. """批量重建所有历史章节的索引
  819. 使用场景:
  820. - 索引系统首次上线
  821. - 索引数据库损坏
  822. """
  823. if not self.chapters_dir.exists():
  824. print("❌ 章节目录不存在")
  825. return
  826. # 获取所有章节文件
  827. chapter_files = sorted(self.chapters_dir.rglob("第*.md"))
  828. print(f"🔍 发现 {len(chapter_files)} 个章节文件,开始重建索引...")
  829. seen = set()
  830. for chapter_file in chapter_files:
  831. # 提取章节编号
  832. match = re.search(r'第(\d+)章', chapter_file.name)
  833. if not match:
  834. continue
  835. chapter_num = int(match.group(1))
  836. if chapter_num in seen:
  837. continue
  838. seen.add(chapter_num)
  839. # 重建索引
  840. self._rebuild_chapter_index(chapter_num, chapter_file)
  841. # 同步伏笔索引
  842. self.sync_foreshadowing_from_state()
  843. self.sync_characters_from_state()
  844. self.sync_relationships_from_state()
  845. print(f"✅ 批量重建完成:{len(seen)} 章")
  846. # ================== 查询与统计 ==================
  847. def get_index_stats(self) -> Dict:
  848. """获取索引统计信息(v4.0: 增加实体/别名统计)"""
  849. # 章节统计
  850. cursor = self.conn.execute("SELECT COUNT(*) as count FROM chapters")
  851. chapter_count = cursor.fetchone()['count']
  852. # 实体统计(v4.0 新增)
  853. cursor = self.conn.execute("""
  854. SELECT entity_type, COUNT(*) as count
  855. FROM entities
  856. GROUP BY entity_type
  857. """)
  858. entity_stats = {row['entity_type']: row['count'] for row in cursor.fetchall()}
  859. # 别名统计(v4.0 新增)
  860. cursor = self.conn.execute("SELECT COUNT(*) as count FROM entity_aliases")
  861. alias_count = cursor.fetchone()['count']
  862. # 伏笔统计
  863. cursor = self.conn.execute("""
  864. SELECT status, COUNT(*) as count
  865. FROM foreshadowing_index
  866. GROUP BY status
  867. """)
  868. foreshadowing_stats = {row['status']: row['count'] for row in cursor.fetchall()}
  869. # 关系统计
  870. cursor = self.conn.execute("SELECT COUNT(*) as count FROM relationships")
  871. relationship_count = cursor.fetchone()['count']
  872. # 数据库大小
  873. db_size_kb = self.index_db.stat().st_size / 1024
  874. return {
  875. 'chapter_count': chapter_count,
  876. 'entity_stats': entity_stats,
  877. 'alias_count': alias_count,
  878. 'foreshadowing_active': foreshadowing_stats.get('未回收', 0),
  879. 'foreshadowing_resolved': foreshadowing_stats.get('已回收', 0),
  880. 'relationship_count': relationship_count,
  881. 'db_size_kb': round(db_size_kb, 2)
  882. }
  883. def __del__(self):
  884. """析构函数:关闭数据库连接"""
  885. if hasattr(self, 'conn'):
  886. self.conn.close()
  887. def main():
  888. parser = argparse.ArgumentParser(description="结构化索引系统(取代向量化检索)")
  889. # 更新操作
  890. parser.add_argument("--update-chapter", type=int, metavar="NUM", help="更新单章索引")
  891. parser.add_argument("--metadata", metavar="PATH", help="章节文件路径(配合 --update-chapter)")
  892. parser.add_argument("--metadata-json", metavar="JSON", help="元数据 JSON 字符串(配合 --update-chapter,由 metadata-extractor agent 提供)")
  893. parser.add_argument("--metadata-file", metavar="FILE", help="元数据 JSON 文件路径(配合 --update-chapter,Windows 推荐使用此参数)")
  894. # 批量操作
  895. parser.add_argument("--rebuild-index", action="store_true", help="批量重建所有索引")
  896. # 查询操作
  897. parser.add_argument("--query-location", metavar="LOCATION", help="查询地点相关章节")
  898. parser.add_argument("--query-urgent-foreshadowing", action="store_true", help="查询紧急伏笔")
  899. parser.add_argument("--fuzzy-search", nargs='+', metavar="KEYWORD", help="模糊查询角色(多个关键词)")
  900. # 统计信息
  901. parser.add_argument("--stats", action="store_true", help="显示索引统计信息")
  902. # 项目路径
  903. parser.add_argument("--project-root", metavar="PATH", help="项目根目录(默认为当前目录)")
  904. args = parser.parse_args()
  905. # 创建索引管理器
  906. index = StructuredIndex(project_root=args.project_root)
  907. # 执行操作
  908. if args.update_chapter:
  909. # 模式1:从 JSON 文件读取(Windows 推荐,避免 CLI 引号转义问题)
  910. if args.metadata_file:
  911. try:
  912. metadata_file = Path(args.metadata_file)
  913. if not metadata_file.exists():
  914. print(f"❌ 元数据文件不存在: {metadata_file}")
  915. return
  916. with open(metadata_file, 'r', encoding='utf-8') as f:
  917. metadata = json.load(f)
  918. # 验证必需字段
  919. required_fields = ['title', 'location', 'characters', 'word_count', 'hash']
  920. missing_fields = [f for f in required_fields if f not in metadata]
  921. if missing_fields:
  922. print(f"❌ JSON 缺少必需字段: {', '.join(missing_fields)}")
  923. return
  924. # 先同步实体(用于将 metadata.characters/name 解析为 entity_id)
  925. index.sync_entities_from_state()
  926. # 更新章节索引
  927. index.index_chapter(args.update_chapter, metadata)
  928. # 同步伏笔索引
  929. index.sync_foreshadowing_from_state()
  930. # bump_character_last_appearance_in_state 已删除(v4.0)
  931. index.sync_relationships_from_state()
  932. except json.JSONDecodeError as e:
  933. print(f"❌ JSON 解析失败: {e}")
  934. return
  935. # 模式2:直接接收 JSON 字符串(Linux/macOS,或测试时使用)
  936. elif args.metadata_json:
  937. try:
  938. metadata = json.loads(args.metadata_json)
  939. # 验证必需字段
  940. required_fields = ['title', 'location', 'characters', 'word_count', 'hash']
  941. missing_fields = [f for f in required_fields if f not in metadata]
  942. if missing_fields:
  943. print(f"❌ JSON 缺少必需字段: {', '.join(missing_fields)}")
  944. return
  945. # 先同步实体(用于将 metadata.characters/name 解析为 entity_id)
  946. index.sync_entities_from_state()
  947. # 更新章节索引
  948. index.index_chapter(args.update_chapter, metadata)
  949. # 同步伏笔索引
  950. index.sync_foreshadowing_from_state()
  951. # bump_character_last_appearance_in_state 已删除(v4.0)
  952. index.sync_relationships_from_state()
  953. except json.JSONDecodeError as e:
  954. print(f"❌ JSON 解析失败: {e}")
  955. return
  956. # 模式3:从章节文件提取元数据(旧模式,保持向后兼容)
  957. elif args.metadata:
  958. # 读取章节文件
  959. chapter_file = Path(args.metadata)
  960. if not chapter_file.exists():
  961. print(f"❌ 章节文件不存在: {chapter_file}")
  962. return
  963. # 提取元数据
  964. with open(chapter_file, 'r', encoding='utf-8') as f:
  965. content = f.read()
  966. metadata = index._extract_metadata_from_content(content, args.update_chapter)
  967. # 先同步实体(用于将 metadata.characters/name 解析为 entity_id)
  968. index.sync_entities_from_state()
  969. # 更新章节索引
  970. index.index_chapter(args.update_chapter, metadata)
  971. # 同步伏笔索引
  972. index.sync_foreshadowing_from_state()
  973. # bump_character_last_appearance_in_state 已删除(v4.0)
  974. index.sync_relationships_from_state()
  975. else:
  976. print("❌ 缺少参数:--metadata-file (推荐) / --metadata-json / --metadata")
  977. return
  978. elif args.rebuild_index:
  979. index.rebuild_all_indexes()
  980. elif args.query_location:
  981. results = index.query_chapters_by_location(args.query_location)
  982. if not results:
  983. print(f"未找到地点相关章节: {args.query_location}")
  984. else:
  985. print(f"找到 {len(results)} 个相关章节:")
  986. for chapter_num, title, characters in results:
  987. print(f" Ch{chapter_num}: {title} - 角色: {characters}")
  988. elif args.query_urgent_foreshadowing:
  989. results = index.query_urgent_foreshadowing(threshold=60)
  990. if not results:
  991. print("✅ 无紧急伏笔")
  992. else:
  993. print(f"⚠️ 检测到 {len(results)} 条紧急伏笔:")
  994. for item in results:
  995. print(f" - {item['content'][:30]}...(第 {item['introduced_chapter']} 章埋设,紧急度 {item['urgency']}/100)")
  996. elif args.fuzzy_search:
  997. results = index.fuzzy_search_character(args.fuzzy_search)
  998. if not results:
  999. print(f"未找到匹配角色: {' + '.join(args.fuzzy_search)}")
  1000. else:
  1001. print(f"找到 {len(results)} 个匹配角色:")
  1002. for i, char in enumerate(results, 1):
  1003. # v4.0: 使用新字段名
  1004. name = char.get('canonical_name', char.get('name', ''))
  1005. desc = char.get('desc', char.get('description', ''))[:50]
  1006. tier = char.get('tier', '')
  1007. print(f"{i}. {name} [{tier}] - {desc}...")
  1008. elif args.stats:
  1009. stats = index.get_index_stats()
  1010. print("📊 索引统计信息:")
  1011. print(f" 章节索引: {stats['chapter_count']}")
  1012. # v4.0: 显示实体统计
  1013. entity_stats = stats.get('entity_stats', {})
  1014. if entity_stats:
  1015. entity_summary = ", ".join([f"{t}: {c}" for t, c in entity_stats.items()])
  1016. print(f" 实体索引: {entity_summary}")
  1017. print(f" 别名索引: {stats.get('alias_count', 0)}")
  1018. print(f" 伏笔索引: {stats['foreshadowing_active']} 条活跃 + {stats['foreshadowing_resolved']} 条已回收")
  1019. print(f" 关系索引: {stats['relationship_count']}")
  1020. print(f" 数据库大小: {stats['db_size_kb']} KB")
  1021. else:
  1022. parser.print_help()
  1023. if __name__ == "__main__":
  1024. # Windows UTF-8 编码修复(仅在脚本直接运行时)
  1025. if sys.platform == 'win32':
  1026. import io
  1027. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
  1028. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
  1029. main()