extract_chapter_context.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. extract_chapter_context.py - extract chapter writing context
  5. Features:
  6. - chapter outline snippet
  7. - previous chapter summaries (prefers .webnovel/summaries)
  8. - compact state summary
  9. - ContextManager contract sections (reader_signal / genre_profile / writing_guidance)
  10. """
  11. from __future__ import annotations
  12. import argparse
  13. import json
  14. import re
  15. import sys
  16. from pathlib import Path
  17. from typing import Any, Dict, List
  18. try:
  19. from chapter_paths import find_chapter_file
  20. except ImportError: # pragma: no cover
  21. from scripts.chapter_paths import find_chapter_file
  22. def _ensure_scripts_path():
  23. scripts_dir = Path(__file__).resolve().parent
  24. if str(scripts_dir) not in sys.path:
  25. sys.path.insert(0, str(scripts_dir))
  26. def find_project_root(start_path: Path | None = None) -> Path:
  27. """Find project root containing `.webnovel` directory."""
  28. if start_path is None:
  29. start_path = Path.cwd()
  30. search_paths = [
  31. start_path,
  32. start_path / "webnovel-project",
  33. start_path.parent,
  34. ]
  35. for path in search_paths:
  36. if (path / ".webnovel").exists():
  37. return path
  38. raise FileNotFoundError("未找到 .webnovel 目录,请确认项目路径")
  39. def extract_chapter_outline(project_root: Path, chapter_num: int) -> str:
  40. """Extract chapter outline segment from volume outline file."""
  41. volume_num = (chapter_num - 1) // 50 + 1
  42. outline_file = project_root / "大纲" / f"第{volume_num}卷 详细大纲.md"
  43. if not outline_file.exists():
  44. return f"⚠️ 大纲文件不存在: {outline_file}"
  45. content = outline_file.read_text(encoding="utf-8")
  46. pattern = rf"###\s*第\s*{chapter_num}\s*章[::]\s*(.+?)(?=###\s*第\s*\d+\s*章|##\s|$)"
  47. match = re.search(pattern, content, re.DOTALL)
  48. if not match:
  49. pattern2 = rf"###\s*第{chapter_num}章[::]\s*(.+?)(?=###\s*第\d+章|##\s|$)"
  50. match = re.search(pattern2, content, re.DOTALL)
  51. if match:
  52. outline = match.group(0).strip()
  53. if len(outline) > 1500:
  54. outline = outline[:1500] + "\n...(已截断)"
  55. return outline
  56. return f"⚠️ 未找到第 {chapter_num} 章的大纲"
  57. def _load_summary_file(project_root: Path, chapter_num: int) -> str:
  58. """Load summary section from `.webnovel/summaries/chNNNN.md`."""
  59. summary_path = project_root / ".webnovel" / "summaries" / f"ch{chapter_num:04d}.md"
  60. if not summary_path.exists():
  61. return ""
  62. text = summary_path.read_text(encoding="utf-8")
  63. summary_match = re.search(r"##\s*剧情摘要\s*\r?\n(.+?)(?=\r?\n##|$)", text, re.DOTALL)
  64. if summary_match:
  65. return summary_match.group(1).strip()
  66. return ""
  67. def extract_chapter_summary(project_root: Path, chapter_num: int) -> str:
  68. """Extract chapter summary, fallback to chapter body head."""
  69. summary = _load_summary_file(project_root, chapter_num)
  70. if summary:
  71. return summary
  72. chapter_file = find_chapter_file(project_root, chapter_num)
  73. if not chapter_file or not chapter_file.exists():
  74. return f"⚠️ 第{chapter_num}章文件不存在"
  75. content = chapter_file.read_text(encoding="utf-8")
  76. summary_match = re.search(r"##\s*本章摘要\s*\r?\n(.+?)(?=\r?\n##|$)", content, re.DOTALL)
  77. if summary_match:
  78. return summary_match.group(1).strip()
  79. stats_match = re.search(r"##\s*本章统计\s*\r?\n(.+?)(?=\r?\n##|$)", content, re.DOTALL)
  80. if stats_match:
  81. return f"[无摘要,仅统计]\n{stats_match.group(1).strip()}"
  82. lines = content.split("\n")
  83. text_lines = [line for line in lines if not line.startswith("#") and line.strip()]
  84. text = "\n".join(text_lines)[:500]
  85. return f"[自动截取前500字]\n{text}..."
  86. def extract_state_summary(project_root: Path) -> str:
  87. """Extract key fields from `.webnovel/state.json`."""
  88. state_file = project_root / ".webnovel" / "state.json"
  89. if not state_file.exists():
  90. return "⚠️ state.json 不存在"
  91. state = json.loads(state_file.read_text(encoding="utf-8"))
  92. summary_parts: List[str] = []
  93. if "progress" in state:
  94. progress = state["progress"]
  95. summary_parts.append(
  96. f"**进度**: 第{progress.get('current_chapter', '?')}章 / {progress.get('total_words', '?')}字"
  97. )
  98. if "protagonist_state" in state:
  99. ps = state["protagonist_state"]
  100. power = ps.get("power", {})
  101. summary_parts.append(f"**主角实力**: {power.get('realm', '?')} {power.get('layer', '?')}层")
  102. summary_parts.append(f"**当前位置**: {ps.get('location', '?')}")
  103. golden_finger = ps.get("golden_finger", {})
  104. summary_parts.append(
  105. f"**金手指**: {golden_finger.get('name', '?')} Lv.{golden_finger.get('level', '?')}"
  106. )
  107. if "strand_tracker" in state:
  108. tracker = state["strand_tracker"]
  109. history = tracker.get("history", [])[-5:]
  110. if history:
  111. items: List[str] = []
  112. for row in history:
  113. if not isinstance(row, dict):
  114. continue
  115. chapter = row.get("chapter", "?")
  116. strand = row.get("strand") or row.get("dominant") or "unknown"
  117. items.append(f"Ch{chapter}:{strand}")
  118. if items:
  119. summary_parts.append(f"**近5章Strand**: {', '.join(items)}")
  120. plot_threads = state.get("plot_threads", {}) if isinstance(state.get("plot_threads"), dict) else {}
  121. foreshadowing = plot_threads.get("foreshadowing", [])
  122. if isinstance(foreshadowing, list) and foreshadowing:
  123. active = [row for row in foreshadowing if row.get("status") in {"active", "未回收"}]
  124. urgent = [row for row in active if row.get("urgency", 0) > 50]
  125. if urgent:
  126. urgent_list = [
  127. f"{row.get('content', '?')[:30]}... (紧急度:{row.get('urgency')})"
  128. for row in urgent[:3]
  129. ]
  130. summary_parts.append(f"**紧急伏笔**: {'; '.join(urgent_list)}")
  131. return "\n".join(summary_parts)
  132. def _load_contract_context(project_root: Path, chapter_num: int) -> Dict[str, Any]:
  133. """Build context via ContextManager and return selected sections."""
  134. _ensure_scripts_path()
  135. from data_modules.config import DataModulesConfig
  136. from data_modules.context_manager import ContextManager
  137. config = DataModulesConfig.from_project_root(project_root)
  138. manager = ContextManager(config)
  139. payload = manager.build_context(
  140. chapter=chapter_num,
  141. template="plot",
  142. use_snapshot=True,
  143. save_snapshot=True,
  144. max_chars=8000,
  145. )
  146. sections = payload.get("sections", {})
  147. return {
  148. "context_contract_version": (payload.get("meta") or {}).get("context_contract_version"),
  149. "reader_signal": (sections.get("reader_signal") or {}).get("content", {}),
  150. "genre_profile": (sections.get("genre_profile") or {}).get("content", {}),
  151. "writing_guidance": (sections.get("writing_guidance") or {}).get("content", {}),
  152. }
  153. def build_chapter_context_payload(project_root: Path, chapter_num: int) -> Dict[str, Any]:
  154. """Assemble full chapter context payload for text/json output."""
  155. outline = extract_chapter_outline(project_root, chapter_num)
  156. prev_summaries = []
  157. for prev_ch in range(max(1, chapter_num - 2), chapter_num):
  158. summary = extract_chapter_summary(project_root, prev_ch)
  159. prev_summaries.append(f"### 第{prev_ch}章摘要\n{summary}")
  160. state_summary = extract_state_summary(project_root)
  161. contract_context = _load_contract_context(project_root, chapter_num)
  162. return {
  163. "chapter": chapter_num,
  164. "outline": outline,
  165. "previous_summaries": prev_summaries,
  166. "state_summary": state_summary,
  167. "context_contract_version": contract_context.get("context_contract_version"),
  168. "reader_signal": contract_context.get("reader_signal", {}),
  169. "genre_profile": contract_context.get("genre_profile", {}),
  170. "writing_guidance": contract_context.get("writing_guidance", {}),
  171. }
  172. def _render_text(payload: Dict[str, Any]) -> str:
  173. chapter_num = payload.get("chapter")
  174. lines: List[str] = []
  175. lines.append(f"# 第 {chapter_num} 章创作上下文")
  176. lines.append("")
  177. lines.append("## 本章大纲")
  178. lines.append("")
  179. lines.append(str(payload.get("outline", "")))
  180. lines.append("")
  181. lines.append("---")
  182. lines.append("")
  183. lines.append("## 前文摘要")
  184. lines.append("")
  185. for item in payload.get("previous_summaries", []):
  186. lines.append(item)
  187. lines.append("")
  188. lines.append("---")
  189. lines.append("")
  190. lines.append("## 当前状态")
  191. lines.append("")
  192. lines.append(str(payload.get("state_summary", "")))
  193. lines.append("")
  194. contract_version = payload.get("context_contract_version")
  195. if contract_version:
  196. lines.append(f"## Contract ({contract_version})")
  197. lines.append("")
  198. writing_guidance = payload.get("writing_guidance") or {}
  199. guidance_items = writing_guidance.get("guidance_items") or []
  200. if guidance_items:
  201. lines.append("## 写作执行建议")
  202. lines.append("")
  203. for idx, item in enumerate(guidance_items, start=1):
  204. lines.append(f"{idx}. {item}")
  205. lines.append("")
  206. reader_signal = payload.get("reader_signal") or {}
  207. review_trend = reader_signal.get("review_trend") or {}
  208. if review_trend:
  209. overall_avg = review_trend.get("overall_avg")
  210. lines.append("## 追读信号")
  211. lines.append("")
  212. lines.append(f"- 最近审查均分: {overall_avg}")
  213. low_ranges = reader_signal.get("low_score_ranges") or []
  214. if low_ranges:
  215. lines.append(f"- 低分区间数: {len(low_ranges)}")
  216. lines.append("")
  217. genre_profile = payload.get("genre_profile") or {}
  218. if genre_profile.get("genre"):
  219. lines.append("## 题材锚定")
  220. lines.append("")
  221. lines.append(f"- 题材: {genre_profile.get('genre')}")
  222. refs = genre_profile.get("reference_hints") or []
  223. for row in refs[:3]:
  224. lines.append(f"- {row}")
  225. lines.append("")
  226. return "\n".join(lines).rstrip() + "\n"
  227. def main():
  228. parser = argparse.ArgumentParser(description="提取章节创作所需的精简上下文")
  229. parser.add_argument("--chapter", type=int, required=True, help="目标章节号")
  230. parser.add_argument("--project-root", type=str, help="项目根目录")
  231. parser.add_argument("--format", choices=["text", "json"], default="text", help="输出格式")
  232. args = parser.parse_args()
  233. try:
  234. project_root = Path(args.project_root) if args.project_root else find_project_root()
  235. payload = build_chapter_context_payload(project_root, args.chapter)
  236. if args.format == "json":
  237. print(json.dumps(payload, ensure_ascii=False, indent=2))
  238. else:
  239. print(_render_text(payload), end="")
  240. except Exception as exc:
  241. print(f"❌ 错误: {exc}", file=sys.stderr)
  242. sys.exit(1)
  243. if __name__ == "__main__":
  244. main()