context_pack_builder.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. #!/usr/bin/env python3
  2. """
  3. Context Pack Builder v4.0
  4. 为章节写作生成结构化上下文包,取代直接读取 state.json。
  5. 输出 Schema:
  6. {
  7. "core": {
  8. "chapter_outline": "本章大纲内容",
  9. "protagonist_snapshot": {...},
  10. "recent_summaries": [{...}, ...]
  11. },
  12. "scene": {
  13. "location_context": {...},
  14. "appearing_characters": [{entity_id, name, snapshot}, ...],
  15. "urgent_foreshadowing": [{...}, ...]
  16. },
  17. "global": {
  18. "worldview_skeleton": "...",
  19. "power_system_skeleton": "...",
  20. "style_contract_ref": "..."
  21. }
  22. }
  23. 使用方式:
  24. python context_pack_builder.py --chapter 45 --project-root /path/to/project
  25. python context_pack_builder.py --chapter 45 --output /tmp/context_pack.json
  26. """
  27. import json
  28. import os
  29. import sys
  30. import argparse
  31. import re
  32. import sqlite3
  33. from pathlib import Path
  34. from typing import Optional, Dict, List, Any
  35. # 导入项目工具
  36. from project_locator import resolve_project_root
  37. from chapter_paths import find_chapter_file
  38. # 导入配置
  39. try:
  40. from data_modules.config import get_config, DataModulesConfig
  41. except ImportError:
  42. from scripts.data_modules.config import get_config, DataModulesConfig
  43. class ContextPackBuilder:
  44. """上下文包构建器"""
  45. def __init__(self, project_root: Path = None):
  46. if project_root is None:
  47. try:
  48. project_root = resolve_project_root()
  49. except FileNotFoundError:
  50. project_root = Path.cwd()
  51. else:
  52. project_root = Path(project_root)
  53. self.project_root = project_root
  54. self.config = get_config(project_root)
  55. self.state_file = project_root / ".webnovel" / "state.json"
  56. self.index_db = project_root / ".webnovel" / "index.db"
  57. self.outline_dir = project_root / "大纲"
  58. self.settings_dir = project_root / "设定集"
  59. self.chapters_dir = project_root / "正文"
  60. self._conn: Optional[sqlite3.Connection] = None
  61. def _conn_index(self) -> Optional[sqlite3.Connection]:
  62. if self._conn is not None:
  63. return self._conn
  64. if not self.index_db.exists():
  65. return None
  66. conn = sqlite3.connect(str(self.index_db))
  67. conn.row_factory = sqlite3.Row
  68. self._conn = conn
  69. return conn
  70. def build(self, chapter_num: int) -> Dict[str, Any]:
  71. """构建完整上下文包"""
  72. state = self._load_state()
  73. return {
  74. "meta": {
  75. "chapter": chapter_num,
  76. "project_root": str(self.project_root),
  77. "version": "5.0"
  78. },
  79. "core": self._build_core(chapter_num),
  80. "scene": self._build_scene(chapter_num),
  81. "global": self._build_global(),
  82. "alerts": self._build_alerts(state)
  83. }
  84. def _build_core(self, chapter_num: int) -> Dict[str, Any]:
  85. """核心上下文:大纲、主角状态、近期摘要"""
  86. state = self._load_state()
  87. return {
  88. "chapter_outline": self._get_chapter_outline(chapter_num),
  89. "protagonist_snapshot": self._get_protagonist_snapshot(state),
  90. "recent_summaries": self._get_recent_summaries(
  91. chapter_num, window=self.config.context_recent_summaries_window
  92. )
  93. }
  94. def _build_scene(self, chapter_num: int) -> Dict[str, Any]:
  95. """场景上下文:地点、出场角色、紧急伏笔"""
  96. state = self._load_state()
  97. # 从大纲推断本章地点和角色
  98. outline = self._get_chapter_outline(chapter_num)
  99. predicted_location = self._predict_location(outline, state)
  100. predicted_characters = self._predict_characters(outline, state)
  101. return {
  102. "location_context": predicted_location,
  103. "appearing_characters": predicted_characters,
  104. "urgent_foreshadowing": self._get_urgent_foreshadowing(state, chapter_num)
  105. }
  106. def _build_global(self) -> Dict[str, Any]:
  107. """全局上下文:世界观、力量体系、风格契约"""
  108. return {
  109. "worldview_skeleton": self._load_skeleton("世界观"),
  110. "power_system_skeleton": self._load_skeleton("力量体系"),
  111. "style_contract_ref": self._get_style_contract_ref()
  112. }
  113. def _build_alerts(self, state: Dict) -> Dict[str, Any]:
  114. """风险提示:消歧警告、待确认项(v5.0)"""
  115. slice_size = self.config.context_alerts_slice
  116. return {
  117. "disambiguation_warnings": state.get("disambiguation_warnings", [])[-slice_size:],
  118. "disambiguation_pending": state.get("disambiguation_pending", [])[-slice_size:]
  119. }
  120. # ================== 辅助方法 ==================
  121. def _load_state(self) -> Dict:
  122. """加载 state.json"""
  123. if not self.state_file.exists():
  124. return {}
  125. with open(self.state_file, 'r', encoding='utf-8') as f:
  126. return json.load(f)
  127. def _get_chapter_outline(self, chapter_num: int) -> str:
  128. """获取本章大纲"""
  129. # 尝试多种大纲文件格式
  130. patterns = [
  131. f"第{chapter_num}章*.md",
  132. f"第{chapter_num:02d}章*.md",
  133. f"第{chapter_num:03d}章*.md",
  134. f"第{chapter_num:04d}章*.md",
  135. f"章纲/第{chapter_num}章*.md",
  136. f"章纲/第{chapter_num:02d}章*.md",
  137. ]
  138. for pattern in patterns:
  139. matches = list(self.outline_dir.glob(pattern))
  140. if matches:
  141. with open(matches[0], 'r', encoding='utf-8') as f:
  142. return f.read()
  143. # 尝试从卷纲中提取
  144. volume_outline = self._extract_from_volume_outline(chapter_num)
  145. if volume_outline:
  146. return volume_outline
  147. return f"[大纲未找到: 第{chapter_num}章]"
  148. def _extract_from_volume_outline(self, chapter_num: int) -> Optional[str]:
  149. """从卷纲中提取章节大纲"""
  150. volume_files = list(self.outline_dir.glob("卷纲*.md")) + list(self.outline_dir.glob("*卷*.md"))
  151. for vf in volume_files:
  152. with open(vf, 'r', encoding='utf-8') as f:
  153. content = f.read()
  154. # 查找章节标记
  155. pattern = rf'第{chapter_num}章[^\n]*\n(.*?)(?=第\d+章|$)'
  156. match = re.search(pattern, content, re.DOTALL)
  157. if match:
  158. return match.group(0).strip()
  159. return None
  160. def _get_protagonist_snapshot(self, state: Dict) -> Dict:
  161. """获取主角状态快照"""
  162. protagonist = state.get("protagonist_state", {}) or {}
  163. power = protagonist.get("power", {}) or {}
  164. location = protagonist.get("location", {}) or {}
  165. snapshot: Dict[str, Any] = {
  166. "entity_id": str(protagonist.get("entity_id", "") or "").strip(),
  167. "name": str(protagonist.get("name", "") or "").strip() or "主角",
  168. "realm": str(power.get("realm", "") or "").strip(),
  169. "layer": power.get("layer", 0),
  170. "bottleneck": str(power.get("bottleneck", "") or "").strip(),
  171. "golden_finger": protagonist.get("golden_finger", {}) or {},
  172. "location": str(location.get("current", "") or "").strip(),
  173. }
  174. # 可选:从 index.db 补齐(以 entity_id 为准)
  175. protagonist_id = snapshot.get("entity_id", "")
  176. conn = self._conn_index()
  177. if protagonist_id and conn is not None:
  178. row = conn.execute(
  179. "SELECT canonical_name FROM entities WHERE entity_id = ? LIMIT 1",
  180. (protagonist_id,),
  181. ).fetchone()
  182. if row and row["canonical_name"]:
  183. snapshot["name"] = row["canonical_name"]
  184. kv_rows = conn.execute(
  185. "SELECT key, value FROM entity_kv WHERE entity_id = ?",
  186. (protagonist_id,),
  187. ).fetchall()
  188. def _parse(v: str):
  189. try:
  190. return json.loads(v)
  191. except Exception:
  192. return v
  193. kv = {r["key"]: _parse(r["value"]) for r in kv_rows} if kv_rows else {}
  194. if isinstance(kv.get("realm"), str) and kv.get("realm"):
  195. snapshot["realm"] = kv["realm"]
  196. if kv.get("layer") is not None and kv.get("layer") != "":
  197. snapshot["layer"] = kv["layer"]
  198. if isinstance(kv.get("bottleneck"), str) and kv.get("bottleneck"):
  199. snapshot["bottleneck"] = kv["bottleneck"]
  200. if isinstance(kv.get("location"), str) and kv.get("location"):
  201. snapshot["location"] = kv["location"]
  202. return snapshot
  203. def _get_recent_summaries(self, chapter_num: int, window: int = 5) -> List[Dict]:
  204. """获取最近 N 章的摘要"""
  205. summaries = []
  206. start = max(1, chapter_num - window)
  207. for ch in range(start, chapter_num):
  208. chapter_file = find_chapter_file(self.project_root, ch)
  209. if chapter_file and chapter_file.exists():
  210. summary = self._extract_summary_from_chapter(chapter_file, ch)
  211. if summary:
  212. summaries.append(summary)
  213. return summaries
  214. def _extract_summary_from_chapter(self, chapter_file: Path, chapter_num: int) -> Optional[Dict]:
  215. """从章节文件中提取摘要"""
  216. with open(chapter_file, 'r', encoding='utf-8') as f:
  217. content = f.read()
  218. # 查找摘要区块
  219. summary_match = re.search(r'## 本章摘要\s*\n(.*?)(?=\n##|$)', content, re.DOTALL)
  220. if summary_match:
  221. summary_text = summary_match.group(1).strip()
  222. return {
  223. "chapter": chapter_num,
  224. "summary": summary_text
  225. }
  226. # 没有摘要,返回章节标题
  227. title_match = re.match(r'^#\s*(.+)', content)
  228. title = title_match.group(1).strip() if title_match else f"第{chapter_num}章"
  229. return {
  230. "chapter": chapter_num,
  231. "title": title,
  232. "summary": None
  233. }
  234. def _predict_location(self, outline: str, state: Dict) -> Dict:
  235. """从大纲推断地点(优先使用 index.db 别名表)"""
  236. conn = self._conn_index()
  237. if conn is None:
  238. return {"name": "未知地点", "desc": ""}
  239. rows = conn.execute(
  240. "SELECT alias, entity_id FROM entity_aliases WHERE entity_type = ?",
  241. ("地点",),
  242. ).fetchall()
  243. if not rows:
  244. return {"name": "未知地点", "desc": ""}
  245. # 先匹配更长的别名,降低误命中
  246. candidates = sorted(
  247. ((r["alias"], r["entity_id"]) for r in rows if r["alias"]),
  248. key=lambda x: len(x[0]),
  249. reverse=True,
  250. )
  251. for alias, entity_id in candidates:
  252. if len(alias) < 2:
  253. continue
  254. if alias not in outline:
  255. continue
  256. e = conn.execute(
  257. "SELECT canonical_name, desc FROM entities WHERE entity_id = ? LIMIT 1",
  258. (entity_id,),
  259. ).fetchone()
  260. return {
  261. "entity_id": entity_id,
  262. "name": (e["canonical_name"] if e else "") or alias,
  263. "desc": (e["desc"] if e else "") or "",
  264. "match": alias,
  265. }
  266. return {"name": "未知地点", "desc": ""}
  267. def _predict_characters(self, outline: str, state: Dict) -> List[Dict]:
  268. """从大纲推断出场角色(优先使用 index.db 别名表)"""
  269. conn = self._conn_index()
  270. if conn is None:
  271. return []
  272. rows = conn.execute(
  273. "SELECT alias, entity_id FROM entity_aliases WHERE entity_type = ?",
  274. ("角色",),
  275. ).fetchall()
  276. if not rows:
  277. return []
  278. matched_ids: set[str] = set()
  279. for r in rows:
  280. alias = r["alias"] or ""
  281. if len(alias) < 2:
  282. continue
  283. if alias in outline:
  284. matched_ids.add(r["entity_id"])
  285. if not matched_ids:
  286. return []
  287. def _parse(v: str):
  288. try:
  289. return json.loads(v)
  290. except Exception:
  291. return v
  292. tier_order = {"核心": 0, "支线": 1, "装饰": 2, "": 3}
  293. matched: List[Dict[str, Any]] = []
  294. for entity_id in matched_ids:
  295. e = conn.execute(
  296. "SELECT canonical_name, tier FROM entities WHERE entity_id = ? LIMIT 1",
  297. (entity_id,),
  298. ).fetchone()
  299. if not e:
  300. continue
  301. kv_rows = conn.execute(
  302. "SELECT key, value FROM entity_kv WHERE entity_id = ?",
  303. (entity_id,),
  304. ).fetchall()
  305. snapshot = {r["key"]: _parse(r["value"]) for r in kv_rows} if kv_rows else {}
  306. matched.append(
  307. {
  308. "entity_id": entity_id,
  309. "name": e["canonical_name"] or entity_id,
  310. "tier": e["tier"] or "",
  311. "snapshot": snapshot,
  312. }
  313. )
  314. matched.sort(key=lambda x: tier_order.get(x.get("tier", ""), 3))
  315. return matched[:self.config.context_max_appearing_characters]
  316. def _get_urgent_foreshadowing(self, state: Dict, chapter_num: int) -> List[Dict]:
  317. """获取紧急伏笔(优先使用 index.db 伏笔索引)"""
  318. conn = self._conn_index()
  319. if conn is not None:
  320. try:
  321. rows = conn.execute(
  322. "SELECT content, introduced_chapter, resolved_chapter, status, urgency, location "
  323. "FROM foreshadowing_index WHERE status = '未回收' ORDER BY urgency DESC LIMIT 5"
  324. ).fetchall()
  325. return [dict(r) for r in rows] if rows else []
  326. except sqlite3.Error:
  327. pass
  328. # fallback:项目未建索引时直接读取 state.json
  329. plot_threads = state.get("plot_threads", {}) or {}
  330. items = plot_threads.get("foreshadowing", []) or []
  331. urgent: List[Dict[str, Any]] = []
  332. for fs in items:
  333. if not isinstance(fs, dict):
  334. continue
  335. status = str(fs.get("status", "")).strip()
  336. if status in {"已回收"}:
  337. continue
  338. planted_chapter = fs.get("planted_chapter") or fs.get("introduced_chapter") or 0
  339. target_chapter = fs.get("target_chapter") or fs.get("target") or 0
  340. try:
  341. planted_chapter = int(planted_chapter)
  342. except (TypeError, ValueError):
  343. planted_chapter = 0
  344. try:
  345. target_chapter = int(target_chapter) if target_chapter else 0
  346. except (TypeError, ValueError):
  347. target_chapter = 0
  348. chapters_pending = chapter_num - planted_chapter if planted_chapter else 0
  349. # 使用配置的紧急度阈值
  350. cfg = self.config
  351. if chapters_pending > cfg.foreshadowing_urgency_pending_high:
  352. urgency = cfg.foreshadowing_urgency_score_high
  353. elif chapters_pending > cfg.foreshadowing_urgency_pending_medium:
  354. urgency = cfg.foreshadowing_urgency_score_medium
  355. elif target_chapter and chapter_num >= target_chapter - cfg.foreshadowing_urgency_target_proximity:
  356. urgency = cfg.foreshadowing_urgency_score_target
  357. else:
  358. urgency = cfg.foreshadowing_urgency_score_low
  359. if urgency >= cfg.foreshadowing_urgency_threshold_show:
  360. urgent.append(
  361. {
  362. "content": fs.get("content") or fs.get("description") or "",
  363. "planted_chapter": planted_chapter,
  364. "target_chapter": target_chapter,
  365. "tier": fs.get("tier", ""),
  366. "urgency": urgency,
  367. }
  368. )
  369. urgent.sort(key=lambda x: x.get("urgency", 0), reverse=True)
  370. return urgent[:self.config.context_max_urgent_foreshadowing]
  371. def _load_skeleton(self, setting_type: str) -> str:
  372. """加载设定骨架"""
  373. patterns = [
  374. f"{setting_type}.md",
  375. f"{setting_type}/*.md",
  376. f"*{setting_type}*.md"
  377. ]
  378. for pattern in patterns:
  379. matches = list(self.settings_dir.glob(pattern))
  380. if matches:
  381. # 如果是目录,合并所有文件
  382. if matches[0].is_dir():
  383. content = []
  384. for f in sorted(matches[0].glob("*.md")):
  385. with open(f, 'r', encoding='utf-8') as file:
  386. content.append(f"## {f.stem}\n{file.read()}")
  387. return "\n\n".join(content)
  388. else:
  389. with open(matches[0], 'r', encoding='utf-8') as f:
  390. return f.read()
  391. return f"[{setting_type}设定未找到]"
  392. def _get_style_contract_ref(self) -> str:
  393. """获取风格契约引用"""
  394. style_file = self.settings_dir / "风格契约.md"
  395. if style_file.exists():
  396. with open(style_file, 'r', encoding='utf-8') as f:
  397. return f.read()
  398. # 检查其他可能的位置
  399. for pattern in ["风格*.md", "写作风格*.md", "style*.md"]:
  400. matches = list(self.settings_dir.glob(pattern))
  401. if matches:
  402. with open(matches[0], 'r', encoding='utf-8') as f:
  403. return f.read()
  404. return "[风格契约未定义]"
  405. def main():
  406. parser = argparse.ArgumentParser(description="Context Pack Builder v4.0")
  407. parser.add_argument("--chapter", type=int, required=True, help="章节编号")
  408. parser.add_argument("--project-root", metavar="PATH", help="项目根目录")
  409. parser.add_argument("--output", metavar="FILE", help="输出文件路径(默认输出到 stdout)")
  410. parser.add_argument("--pretty", action="store_true", help="格式化 JSON 输出")
  411. args = parser.parse_args()
  412. # 构建上下文包
  413. builder = ContextPackBuilder(project_root=args.project_root)
  414. context_pack = builder.build(args.chapter)
  415. # 输出
  416. if args.pretty:
  417. output = json.dumps(context_pack, ensure_ascii=False, indent=2)
  418. else:
  419. output = json.dumps(context_pack, ensure_ascii=False)
  420. if args.output:
  421. with open(args.output, 'w', encoding='utf-8') as f:
  422. f.write(output)
  423. print(f"✅ 上下文包已保存到: {args.output}")
  424. else:
  425. print(output)
  426. if __name__ == "__main__":
  427. # Windows UTF-8 编码修复
  428. if sys.platform == 'win32':
  429. import io
  430. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
  431. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
  432. main()