context_pack_builder.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. #!/usr/bin/env python3
  2. """
  3. Context Pack Builder v5.1
  4. 为章节写作生成结构化上下文包,取代直接读取 state.json。
  5. v5.1 变更:
  6. - 使用 v5.1 index_manager schema (entities.id, aliases, current_json)
  7. - 移除对 entity_kv 表的依赖,改用 current_json 字段
  8. - 移除对 entity_aliases 表的依赖,改用 aliases 表
  9. 输出 Schema:
  10. {
  11. "core": {
  12. "chapter_outline": "本章大纲内容",
  13. "protagonist_snapshot": {...},
  14. "recent_summaries": [{...}, ...]
  15. },
  16. "scene": {
  17. "location_context": {...},
  18. "appearing_characters": [{entity_id, name, snapshot}, ...],
  19. "urgent_foreshadowing": [{...}, ...]
  20. },
  21. "global": {
  22. "worldview_skeleton": "...",
  23. "power_system_skeleton": "...",
  24. "style_contract_ref": "..."
  25. }
  26. }
  27. 使用方式:
  28. python context_pack_builder.py --chapter 45 --project-root /path/to/project
  29. python context_pack_builder.py --chapter 45 --output /tmp/context_pack.json
  30. """
  31. import json
  32. import os
  33. import sys
  34. import argparse
  35. import re
  36. import sqlite3
  37. from pathlib import Path
  38. from typing import Optional, Dict, List, Any
  39. # 导入项目工具
  40. from project_locator import resolve_project_root
  41. from chapter_paths import find_chapter_file
  42. # 导入配置
  43. try:
  44. from data_modules.config import get_config, DataModulesConfig
  45. except ImportError:
  46. from scripts.data_modules.config import get_config, DataModulesConfig
  47. class ContextPackBuilder:
  48. """上下文包构建器"""
  49. def __init__(self, project_root: Path = None):
  50. if project_root is None:
  51. try:
  52. project_root = resolve_project_root()
  53. except FileNotFoundError:
  54. project_root = Path.cwd()
  55. else:
  56. project_root = Path(project_root)
  57. self.project_root = project_root
  58. self.config = get_config(project_root)
  59. self.state_file = project_root / ".webnovel" / "state.json"
  60. self.index_db = project_root / ".webnovel" / "index.db"
  61. self.outline_dir = project_root / "大纲"
  62. self.settings_dir = project_root / "设定集"
  63. self.chapters_dir = project_root / "正文"
  64. self._conn: Optional[sqlite3.Connection] = None
  65. def _conn_index(self) -> Optional[sqlite3.Connection]:
  66. if self._conn is not None:
  67. return self._conn
  68. if not self.index_db.exists():
  69. return None
  70. conn = sqlite3.connect(str(self.index_db))
  71. conn.row_factory = sqlite3.Row
  72. self._conn = conn
  73. return conn
  74. def build(self, chapter_num: int) -> Dict[str, Any]:
  75. """构建完整上下文包"""
  76. state = self._load_state()
  77. return {
  78. "meta": {
  79. "chapter": chapter_num,
  80. "project_root": str(self.project_root),
  81. "version": "5.1"
  82. },
  83. "core": self._build_core(chapter_num),
  84. "scene": self._build_scene(chapter_num),
  85. "global": self._build_global(),
  86. "alerts": self._build_alerts(state)
  87. }
  88. def _build_core(self, chapter_num: int) -> Dict[str, Any]:
  89. """核心上下文:大纲、主角状态、近期摘要"""
  90. state = self._load_state()
  91. return {
  92. "chapter_outline": self._get_chapter_outline(chapter_num),
  93. "protagonist_snapshot": self._get_protagonist_snapshot(state),
  94. "recent_summaries": self._get_recent_summaries(
  95. chapter_num, window=self.config.context_recent_summaries_window
  96. )
  97. }
  98. def _build_scene(self, chapter_num: int) -> Dict[str, Any]:
  99. """场景上下文:地点、出场角色、紧急伏笔"""
  100. state = self._load_state()
  101. # 从大纲推断本章地点和角色
  102. outline = self._get_chapter_outline(chapter_num)
  103. predicted_location = self._predict_location(outline, state)
  104. predicted_characters = self._predict_characters(outline, state)
  105. return {
  106. "location_context": predicted_location,
  107. "appearing_characters": predicted_characters,
  108. "urgent_foreshadowing": self._get_urgent_foreshadowing(state, chapter_num)
  109. }
  110. def _build_global(self) -> Dict[str, Any]:
  111. """全局上下文:世界观、力量体系、风格契约"""
  112. return {
  113. "worldview_skeleton": self._load_skeleton("世界观"),
  114. "power_system_skeleton": self._load_skeleton("力量体系"),
  115. "style_contract_ref": self._get_style_contract_ref()
  116. }
  117. def _build_alerts(self, state: Dict) -> Dict[str, Any]:
  118. """风险提示:消歧警告、待确认项(v5.0)"""
  119. slice_size = self.config.context_alerts_slice
  120. return {
  121. "disambiguation_warnings": state.get("disambiguation_warnings", [])[-slice_size:],
  122. "disambiguation_pending": state.get("disambiguation_pending", [])[-slice_size:]
  123. }
  124. # ================== 辅助方法 ==================
  125. def _load_state(self) -> Dict:
  126. """加载 state.json"""
  127. if not self.state_file.exists():
  128. return {}
  129. with open(self.state_file, 'r', encoding='utf-8') as f:
  130. return json.load(f)
  131. def _get_chapter_outline(self, chapter_num: int) -> str:
  132. """获取本章大纲"""
  133. # 尝试多种大纲文件格式
  134. patterns = [
  135. f"第{chapter_num}章*.md",
  136. f"第{chapter_num:02d}章*.md",
  137. f"第{chapter_num:03d}章*.md",
  138. f"第{chapter_num:04d}章*.md",
  139. f"章纲/第{chapter_num}章*.md",
  140. f"章纲/第{chapter_num:02d}章*.md",
  141. ]
  142. for pattern in patterns:
  143. matches = list(self.outline_dir.glob(pattern))
  144. if matches:
  145. with open(matches[0], 'r', encoding='utf-8') as f:
  146. return f.read()
  147. # 尝试从卷纲中提取
  148. volume_outline = self._extract_from_volume_outline(chapter_num)
  149. if volume_outline:
  150. return volume_outline
  151. return f"[大纲未找到: 第{chapter_num}章]"
  152. def _extract_from_volume_outline(self, chapter_num: int) -> Optional[str]:
  153. """从卷纲中提取章节大纲"""
  154. volume_files = list(self.outline_dir.glob("卷纲*.md")) + list(self.outline_dir.glob("*卷*.md"))
  155. for vf in volume_files:
  156. with open(vf, 'r', encoding='utf-8') as f:
  157. content = f.read()
  158. # 查找章节标记
  159. pattern = rf'第{chapter_num}章[^\n]*\n(.*?)(?=第\d+章|$)'
  160. match = re.search(pattern, content, re.DOTALL)
  161. if match:
  162. return match.group(0).strip()
  163. return None
  164. def _get_protagonist_snapshot(self, state: Dict) -> Dict:
  165. """获取主角状态快照"""
  166. protagonist = state.get("protagonist_state", {}) or {}
  167. power = protagonist.get("power", {}) or {}
  168. location = protagonist.get("location", {}) or {}
  169. snapshot: Dict[str, Any] = {
  170. "entity_id": str(protagonist.get("entity_id", "") or "").strip(),
  171. "name": str(protagonist.get("name", "") or "").strip() or "主角",
  172. "realm": str(power.get("realm", "") or "").strip(),
  173. "layer": power.get("layer", 0),
  174. "bottleneck": str(power.get("bottleneck", "") or "").strip(),
  175. "golden_finger": protagonist.get("golden_finger", {}) or {},
  176. "location": str(location.get("current", "") or "").strip(),
  177. }
  178. # 可选:从 index.db 补齐(以 entity_id 为准)
  179. protagonist_id = snapshot.get("entity_id", "")
  180. conn = self._conn_index()
  181. if protagonist_id and conn is not None:
  182. # v5.1 schema: entities 表使用 id 字段,current_json 存储状态
  183. row = conn.execute(
  184. "SELECT canonical_name, current_json FROM entities WHERE id = ? LIMIT 1",
  185. (protagonist_id,),
  186. ).fetchone()
  187. if row:
  188. if row["canonical_name"]:
  189. snapshot["name"] = row["canonical_name"]
  190. # 从 current_json 解析状态
  191. if row["current_json"]:
  192. try:
  193. current = json.loads(row["current_json"])
  194. if isinstance(current.get("realm"), str) and current.get("realm"):
  195. snapshot["realm"] = current["realm"]
  196. if current.get("layer") is not None and current.get("layer") != "":
  197. snapshot["layer"] = current["layer"]
  198. if isinstance(current.get("bottleneck"), str) and current.get("bottleneck"):
  199. snapshot["bottleneck"] = current["bottleneck"]
  200. if isinstance(current.get("location"), str) and current.get("location"):
  201. snapshot["location"] = current["location"]
  202. except (json.JSONDecodeError, TypeError):
  203. pass
  204. return snapshot
  205. def _get_recent_summaries(self, chapter_num: int, window: int = 5) -> List[Dict]:
  206. """获取最近 N 章的摘要"""
  207. summaries = []
  208. start = max(1, chapter_num - window)
  209. for ch in range(start, chapter_num):
  210. chapter_file = find_chapter_file(self.project_root, ch)
  211. if chapter_file and chapter_file.exists():
  212. summary = self._extract_summary_from_chapter(chapter_file, ch)
  213. if summary:
  214. summaries.append(summary)
  215. return summaries
  216. def _extract_summary_from_chapter(self, chapter_file: Path, chapter_num: int) -> Optional[Dict]:
  217. """从章节文件中提取摘要"""
  218. with open(chapter_file, 'r', encoding='utf-8') as f:
  219. content = f.read()
  220. # 查找摘要区块
  221. summary_match = re.search(r'## 本章摘要\s*\n(.*?)(?=\n##|$)', content, re.DOTALL)
  222. if summary_match:
  223. summary_text = summary_match.group(1).strip()
  224. return {
  225. "chapter": chapter_num,
  226. "summary": summary_text
  227. }
  228. # 没有摘要,返回章节标题
  229. title_match = re.match(r'^#\s*(.+)', content)
  230. title = title_match.group(1).strip() if title_match else f"第{chapter_num}章"
  231. return {
  232. "chapter": chapter_num,
  233. "title": title,
  234. "summary": None
  235. }
  236. def _predict_location(self, outline: str, state: Dict) -> Dict:
  237. """从大纲推断地点(优先使用 index.db 别名表)"""
  238. conn = self._conn_index()
  239. if conn is None:
  240. return {"name": "未知地点", "desc": ""}
  241. # v5.1 schema: 使用 aliases 表(替代 entity_aliases)
  242. rows = conn.execute(
  243. "SELECT alias, entity_id FROM aliases WHERE entity_type = ?",
  244. ("地点",),
  245. ).fetchall()
  246. if not rows:
  247. return {"name": "未知地点", "desc": ""}
  248. # 先匹配更长的别名,降低误命中
  249. candidates = sorted(
  250. ((r["alias"], r["entity_id"]) for r in rows if r["alias"]),
  251. key=lambda x: len(x[0]),
  252. reverse=True,
  253. )
  254. for alias, entity_id in candidates:
  255. if len(alias) < 2:
  256. continue
  257. if alias not in outline:
  258. continue
  259. # v5.1 schema: entities 表使用 id 字段
  260. e = conn.execute(
  261. "SELECT canonical_name, desc FROM entities WHERE id = ? LIMIT 1",
  262. (entity_id,),
  263. ).fetchone()
  264. return {
  265. "entity_id": entity_id,
  266. "name": (e["canonical_name"] if e else "") or alias,
  267. "desc": (e["desc"] if e else "") or "",
  268. "match": alias,
  269. }
  270. return {"name": "未知地点", "desc": ""}
  271. def _predict_characters(self, outline: str, state: Dict) -> List[Dict]:
  272. """从大纲推断出场角色(优先使用 index.db 别名表)"""
  273. conn = self._conn_index()
  274. if conn is None:
  275. return []
  276. # v5.1 schema: 使用 aliases 表(替代 entity_aliases)
  277. rows = conn.execute(
  278. "SELECT alias, entity_id FROM aliases WHERE entity_type = ?",
  279. ("角色",),
  280. ).fetchall()
  281. if not rows:
  282. return []
  283. matched_ids: set[str] = set()
  284. for r in rows:
  285. alias = r["alias"] or ""
  286. if len(alias) < 2:
  287. continue
  288. if alias in outline:
  289. matched_ids.add(r["entity_id"])
  290. if not matched_ids:
  291. return []
  292. tier_order = {"核心": 0, "支线": 1, "装饰": 2, "": 3}
  293. matched: List[Dict[str, Any]] = []
  294. for entity_id in matched_ids:
  295. # v5.1 schema: entities 表使用 id 字段,current_json 存储状态
  296. e = conn.execute(
  297. "SELECT canonical_name, tier, current_json FROM entities WHERE id = ? LIMIT 1",
  298. (entity_id,),
  299. ).fetchone()
  300. if not e:
  301. continue
  302. # 从 current_json 解析快照
  303. snapshot = {}
  304. if e["current_json"]:
  305. try:
  306. snapshot = json.loads(e["current_json"])
  307. except (json.JSONDecodeError, TypeError):
  308. pass
  309. matched.append(
  310. {
  311. "entity_id": entity_id,
  312. "name": e["canonical_name"] or entity_id,
  313. "tier": e["tier"] or "",
  314. "snapshot": snapshot,
  315. }
  316. )
  317. matched.sort(key=lambda x: tier_order.get(x.get("tier", ""), 3))
  318. return matched[:self.config.context_max_appearing_characters]
  319. def _get_urgent_foreshadowing(self, state: Dict, chapter_num: int) -> List[Dict]:
  320. """获取紧急伏笔(优先使用 index.db 伏笔索引)"""
  321. conn = self._conn_index()
  322. if conn is not None:
  323. try:
  324. rows = conn.execute(
  325. "SELECT content, introduced_chapter, resolved_chapter, status, urgency, location "
  326. "FROM foreshadowing_index WHERE status = '未回收' ORDER BY urgency DESC LIMIT 5"
  327. ).fetchall()
  328. return [dict(r) for r in rows] if rows else []
  329. except sqlite3.Error:
  330. pass
  331. # fallback:项目未建索引时直接读取 state.json
  332. plot_threads = state.get("plot_threads", {}) or {}
  333. items = plot_threads.get("foreshadowing", []) or []
  334. urgent: List[Dict[str, Any]] = []
  335. for fs in items:
  336. if not isinstance(fs, dict):
  337. continue
  338. status = str(fs.get("status", "")).strip()
  339. if status in {"已回收"}:
  340. continue
  341. planted_chapter = fs.get("planted_chapter") or fs.get("introduced_chapter") or 0
  342. target_chapter = fs.get("target_chapter") or fs.get("target") or 0
  343. try:
  344. planted_chapter = int(planted_chapter)
  345. except (TypeError, ValueError):
  346. planted_chapter = 0
  347. try:
  348. target_chapter = int(target_chapter) if target_chapter else 0
  349. except (TypeError, ValueError):
  350. target_chapter = 0
  351. chapters_pending = chapter_num - planted_chapter if planted_chapter else 0
  352. # 使用配置的紧急度阈值
  353. cfg = self.config
  354. if chapters_pending > cfg.foreshadowing_urgency_pending_high:
  355. urgency = cfg.foreshadowing_urgency_score_high
  356. elif chapters_pending > cfg.foreshadowing_urgency_pending_medium:
  357. urgency = cfg.foreshadowing_urgency_score_medium
  358. elif target_chapter and chapter_num >= target_chapter - cfg.foreshadowing_urgency_target_proximity:
  359. urgency = cfg.foreshadowing_urgency_score_target
  360. else:
  361. urgency = cfg.foreshadowing_urgency_score_low
  362. if urgency >= cfg.foreshadowing_urgency_threshold_show:
  363. urgent.append(
  364. {
  365. "content": fs.get("content") or fs.get("description") or "",
  366. "planted_chapter": planted_chapter,
  367. "target_chapter": target_chapter,
  368. "tier": fs.get("tier", ""),
  369. "urgency": urgency,
  370. }
  371. )
  372. urgent.sort(key=lambda x: x.get("urgency", 0), reverse=True)
  373. return urgent[:self.config.context_max_urgent_foreshadowing]
  374. def _load_skeleton(self, setting_type: str) -> str:
  375. """加载设定骨架"""
  376. patterns = [
  377. f"{setting_type}.md",
  378. f"{setting_type}/*.md",
  379. f"*{setting_type}*.md"
  380. ]
  381. for pattern in patterns:
  382. matches = list(self.settings_dir.glob(pattern))
  383. if matches:
  384. # 如果是目录,合并所有文件
  385. if matches[0].is_dir():
  386. content = []
  387. for f in sorted(matches[0].glob("*.md")):
  388. with open(f, 'r', encoding='utf-8') as file:
  389. content.append(f"## {f.stem}\n{file.read()}")
  390. return "\n\n".join(content)
  391. else:
  392. with open(matches[0], 'r', encoding='utf-8') as f:
  393. return f.read()
  394. return f"[{setting_type}设定未找到]"
  395. def _get_style_contract_ref(self) -> str:
  396. """获取风格契约引用"""
  397. style_file = self.settings_dir / "风格契约.md"
  398. if style_file.exists():
  399. with open(style_file, 'r', encoding='utf-8') as f:
  400. return f.read()
  401. # 检查其他可能的位置
  402. for pattern in ["风格*.md", "写作风格*.md", "style*.md"]:
  403. matches = list(self.settings_dir.glob(pattern))
  404. if matches:
  405. with open(matches[0], 'r', encoding='utf-8') as f:
  406. return f.read()
  407. return "[风格契约未定义]"
  408. def main():
  409. parser = argparse.ArgumentParser(description="Context Pack Builder v5.1")
  410. parser.add_argument("--chapter", type=int, required=True, help="章节编号")
  411. parser.add_argument("--project-root", metavar="PATH", help="项目根目录")
  412. parser.add_argument("--output", metavar="FILE", help="输出文件路径(默认输出到 stdout)")
  413. parser.add_argument("--pretty", action="store_true", help="格式化 JSON 输出")
  414. args = parser.parse_args()
  415. # 构建上下文包
  416. builder = ContextPackBuilder(project_root=args.project_root)
  417. context_pack = builder.build(args.chapter)
  418. # 输出
  419. if args.pretty:
  420. output = json.dumps(context_pack, ensure_ascii=False, indent=2)
  421. else:
  422. output = json.dumps(context_pack, ensure_ascii=False)
  423. if args.output:
  424. with open(args.output, 'w', encoding='utf-8') as f:
  425. f.write(output)
  426. print(f"✅ 上下文包已保存到: {args.output}")
  427. else:
  428. print(output)
  429. if __name__ == "__main__":
  430. # Windows UTF-8 编码修复
  431. if sys.platform == 'win32':
  432. import io
  433. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
  434. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
  435. main()