extract_chapter_context.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. #!/usr/bin/env python3
  2. """
  3. extract_chapter_context.py - 提取章节创作所需的精简上下文
  4. 功能:
  5. - 提取当前章节的大纲片段(~500字)
  6. - 提取前2章的摘要(~400字)
  7. - 提取 state.json 关键字段(~300字)
  8. 用法:
  9. python extract_chapter_context.py --chapter 7
  10. python extract_chapter_context.py --chapter 7 --project-root ./webnovel-project
  11. """
  12. import argparse
  13. import json
  14. import re
  15. import sys
  16. from pathlib import Path
  17. def find_project_root(start_path: Path = None) -> Path:
  18. """查找包含 .webnovel 目录的项目根目录"""
  19. if start_path is None:
  20. start_path = Path.cwd()
  21. search_paths = [
  22. start_path,
  23. start_path / "webnovel-project",
  24. start_path.parent,
  25. ]
  26. for path in search_paths:
  27. if (path / ".webnovel").exists():
  28. return path
  29. raise FileNotFoundError("未找到 .webnovel 目录,请确认项目路径")
  30. def extract_chapter_outline(project_root: Path, chapter_num: int) -> str:
  31. """从大纲文件中提取指定章节的大纲片段"""
  32. volume_num = (chapter_num - 1) // 50 + 1
  33. outline_file = project_root / "大纲" / f"第{volume_num}卷-详细大纲.md"
  34. if not outline_file.exists():
  35. return f"⚠️ 大纲文件不存在: {outline_file}"
  36. content = outline_file.read_text(encoding="utf-8")
  37. # 匹配章节大纲块
  38. # 格式:### 第 N 章:标题 或 ### 第 N 章: 标题
  39. pattern = rf"###\s*第\s*{chapter_num}\s*章[::]\s*(.+?)(?=###\s*第\s*\d+\s*章|##\s|$)"
  40. match = re.search(pattern, content, re.DOTALL)
  41. if match:
  42. outline = match.group(0).strip()
  43. # 限制长度
  44. if len(outline) > 1500:
  45. outline = outline[:1500] + "\n...(已截断)"
  46. return outline
  47. # 尝试另一种格式:### 第 1 章:标题(无空格)
  48. pattern2 = rf"###\s*第{chapter_num}章[::]\s*(.+?)(?=###\s*第\d+章|##\s|$)"
  49. match2 = re.search(pattern2, content, re.DOTALL)
  50. if match2:
  51. outline = match2.group(0).strip()
  52. if len(outline) > 1500:
  53. outline = outline[:1500] + "\n...(已截断)"
  54. return outline
  55. return f"⚠️ 未找到第 {chapter_num} 章的大纲"
  56. def extract_chapter_summary(project_root: Path, chapter_num: int) -> str:
  57. """提取指定章节的摘要(从章节文件末尾的"本章摘要"部分)"""
  58. volume_num = (chapter_num - 1) // 50 + 1
  59. chapter_dir = project_root / "正文" / f"第{volume_num}卷"
  60. # 尝试匹配章节文件
  61. patterns = [
  62. f"第{chapter_num:03d}章*.md",
  63. f"第{chapter_num:04d}章*.md",
  64. ]
  65. chapter_file = None
  66. for pattern in patterns:
  67. matches = list(chapter_dir.glob(pattern))
  68. if matches:
  69. chapter_file = matches[0]
  70. break
  71. if not chapter_file or not chapter_file.exists():
  72. return f"⚠️ 第 {chapter_num} 章文件不存在"
  73. content = chapter_file.read_text(encoding="utf-8")
  74. # 尝试提取"本章摘要"部分
  75. summary_match = re.search(r"##\s*本章摘要\s*\n(.+?)(?=##|$)", content, re.DOTALL)
  76. if summary_match:
  77. return summary_match.group(1).strip()
  78. # 如果没有摘要,提取"本章统计"部分
  79. stats_match = re.search(r"##\s*本章统计\s*\n(.+?)(?=##|$)", content, re.DOTALL)
  80. if stats_match:
  81. return f"[无摘要,仅统计]\n{stats_match.group(1).strip()}"
  82. # 最后降级:提取前500字作为摘要
  83. # 跳过标题
  84. lines = content.split("\n")
  85. text_lines = [l for l in lines if not l.startswith("#") and l.strip()]
  86. text = "\n".join(text_lines)[:500]
  87. return f"[自动截取前500字]\n{text}..."
  88. def extract_state_summary(project_root: Path) -> str:
  89. """提取 state.json 的关键字段"""
  90. state_file = project_root / ".webnovel" / "state.json"
  91. if not state_file.exists():
  92. return "⚠️ state.json 不存在"
  93. state = json.loads(state_file.read_text(encoding="utf-8"))
  94. # 提取关键字段
  95. summary_parts = []
  96. # 进度
  97. if "progress" in state:
  98. p = state["progress"]
  99. summary_parts.append(f"**进度**: 第 {p.get('current_chapter', '?')} 章 / {p.get('total_words', '?')} 字")
  100. # 主角状态
  101. if "protagonist_state" in state:
  102. ps = state["protagonist_state"]
  103. power = ps.get("power", {})
  104. summary_parts.append(f"**主角实力**: {power.get('realm', '?')} {power.get('layer', '?')}层")
  105. summary_parts.append(f"**当前位置**: {ps.get('location', '?')}")
  106. gf = ps.get("golden_finger", {})
  107. summary_parts.append(f"**金手指**: {gf.get('name', '?')} Lv.{gf.get('level', '?')}")
  108. # Strand 追踪
  109. if "strand_tracker" in state:
  110. st = state["strand_tracker"]
  111. history = st.get("history", [])[-5:] # 最近5章
  112. if history:
  113. strand_str = ", ".join([f"Ch{h['chapter']}:{h['strand']}" for h in history])
  114. summary_parts.append(f"**近5章Strand**: {strand_str}")
  115. # 活跃伏笔(只显示紧急的)
  116. if "foreshadowing" in state:
  117. fs = state["foreshadowing"]
  118. active = [f for f in fs if f.get("status") == "active"]
  119. urgent = [f for f in active if f.get("urgency", 0) > 50]
  120. if urgent:
  121. urgent_list = [f"{f.get('content', '?')[:30]}... (紧急度:{f.get('urgency')})" for f in urgent[:3]]
  122. summary_parts.append(f"**紧急伏笔**: {'; '.join(urgent_list)}")
  123. return "\n".join(summary_parts)
  124. def main():
  125. parser = argparse.ArgumentParser(description="提取章节创作所需的精简上下文")
  126. parser.add_argument("--chapter", type=int, required=True, help="目标章节号")
  127. parser.add_argument("--project-root", type=str, help="项目根目录")
  128. parser.add_argument("--format", choices=["text", "json"], default="text", help="输出格式")
  129. args = parser.parse_args()
  130. try:
  131. if args.project_root:
  132. project_root = Path(args.project_root)
  133. else:
  134. project_root = find_project_root()
  135. chapter_num = args.chapter
  136. # 提取各部分
  137. outline = extract_chapter_outline(project_root, chapter_num)
  138. # 提取前2章摘要
  139. prev_summaries = []
  140. for prev_ch in range(max(1, chapter_num - 2), chapter_num):
  141. summary = extract_chapter_summary(project_root, prev_ch)
  142. prev_summaries.append(f"### 第 {prev_ch} 章摘要\n{summary}")
  143. state_summary = extract_state_summary(project_root)
  144. if args.format == "json":
  145. result = {
  146. "chapter": chapter_num,
  147. "outline": outline,
  148. "previous_summaries": prev_summaries,
  149. "state_summary": state_summary,
  150. }
  151. print(json.dumps(result, ensure_ascii=False, indent=2))
  152. else:
  153. print(f"# 第 {chapter_num} 章创作上下文\n")
  154. print("## 本章大纲\n")
  155. print(outline)
  156. print("\n---\n")
  157. print("## 前文摘要\n")
  158. for s in prev_summaries:
  159. print(s)
  160. print()
  161. print("---\n")
  162. print("## 当前状态\n")
  163. print(state_summary)
  164. except Exception as e:
  165. print(f"❌ 错误: {e}", file=sys.stderr)
  166. sys.exit(1)
  167. if __name__ == "__main__":
  168. main()