chapter_outline_loader.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. from __future__ import annotations
  4. import json
  5. import re
  6. from pathlib import Path
  7. from typing import Any, Dict
  8. try:
  9. from chapter_paths import volume_num_for_chapter
  10. except ImportError: # pragma: no cover
  11. from scripts.chapter_paths import volume_num_for_chapter
  12. _CHAPTER_RANGE_RE = re.compile(r"^\s*(\d+)\s*-\s*(\d+)\s*$")
  13. def _parse_chapters_range(value: object) -> tuple[int, int] | None:
  14. if not isinstance(value, str):
  15. return None
  16. match = _CHAPTER_RANGE_RE.match(value)
  17. if not match:
  18. return None
  19. try:
  20. start = int(match.group(1))
  21. end = int(match.group(2))
  22. except ValueError:
  23. return None
  24. if start <= 0 or end <= 0 or start > end:
  25. return None
  26. return start, end
  27. def volume_num_for_chapter_from_state(project_root: Path, chapter_num: int) -> int | None:
  28. state_path = project_root / ".webnovel" / "state.json"
  29. if not state_path.exists():
  30. return None
  31. try:
  32. state = json.loads(state_path.read_text(encoding="utf-8"))
  33. except Exception:
  34. return None
  35. if not isinstance(state, dict):
  36. return None
  37. progress = state.get("progress")
  38. if not isinstance(progress, dict):
  39. return None
  40. volumes_planned = progress.get("volumes_planned")
  41. if not isinstance(volumes_planned, list):
  42. return None
  43. best: tuple[int, int] | None = None
  44. for item in volumes_planned:
  45. if not isinstance(item, dict):
  46. continue
  47. volume = item.get("volume")
  48. if not isinstance(volume, int) or volume <= 0:
  49. continue
  50. parsed = _parse_chapters_range(item.get("chapters_range"))
  51. if not parsed:
  52. continue
  53. start, end = parsed
  54. if start <= chapter_num <= end:
  55. candidate = (start, volume)
  56. if best is None or candidate[0] > best[0] or (candidate[0] == best[0] and candidate[1] < best[1]):
  57. best = candidate
  58. return best[1] if best else None
  59. def _find_split_outline_file(outline_dir: Path, chapter_num: int) -> Path | None:
  60. patterns = [
  61. f"第{chapter_num}章*.md",
  62. f"第{chapter_num:02d}章*.md",
  63. f"第{chapter_num:03d}章*.md",
  64. f"第{chapter_num:04d}章*.md",
  65. ]
  66. for pattern in patterns:
  67. matches = sorted(outline_dir.glob(pattern))
  68. if matches:
  69. return matches[0]
  70. return None
  71. def _find_volume_outline_file(project_root: Path, chapter_num: int) -> Path | None:
  72. outline_dir = project_root / "大纲"
  73. volume_num = volume_num_for_chapter_from_state(project_root, chapter_num) or volume_num_for_chapter(chapter_num)
  74. candidates = [
  75. outline_dir / f"第{volume_num}卷-详细大纲.md",
  76. outline_dir / f"第{volume_num}卷 - 详细大纲.md",
  77. outline_dir / f"第{volume_num}卷 详细大纲.md",
  78. ]
  79. return next((path for path in candidates if path.exists()), None)
  80. def _extract_outline_section(content: str, chapter_num: int) -> str | None:
  81. patterns = [
  82. rf"###\s*第\s*{chapter_num}\s*章[::]\s*(.+?)(?=###\s*第\s*\d+\s*章|##\s|$)",
  83. rf"###\s*第{chapter_num}章[::]\s*(.+?)(?=###\s*第\d+章|##\s|$)",
  84. ]
  85. for pattern in patterns:
  86. match = re.search(pattern, content, re.DOTALL)
  87. if match:
  88. return match.group(0).strip()
  89. return None
  90. def _parse_chinese_chapter_num(value: str) -> int | None:
  91. text = str(value or "").strip()
  92. if not text:
  93. return None
  94. if text.isdigit():
  95. return int(text)
  96. if text in _CHINESE_NUMERAL_DIGITS:
  97. return _CHINESE_NUMERAL_DIGITS[text]
  98. if text == "十":
  99. return 10
  100. if "十" in text:
  101. left, _, right = text.partition("十")
  102. tens = _CHINESE_NUMERAL_DIGITS.get(left, 1 if not left else 0)
  103. ones = _CHINESE_NUMERAL_DIGITS.get(right, 0) if right else 0
  104. parsed = tens * 10 + ones
  105. return parsed or None
  106. parsed = 0
  107. for char in text:
  108. digit = _CHINESE_NUMERAL_DIGITS.get(char)
  109. if digit is None:
  110. return None
  111. parsed = parsed * 10 + digit
  112. return parsed or None
  113. def _extract_directive_section(content: str, chapter_num: int) -> str | None:
  114. matches = list(_CHAPTER_HEADING_RE.finditer(content))
  115. for index, match in enumerate(matches):
  116. parsed = _parse_chinese_chapter_num(match.group(2))
  117. if parsed != chapter_num:
  118. continue
  119. end = matches[index + 1].start() if index + 1 < len(matches) else len(content)
  120. return content[match.start():end].strip()
  121. return _extract_outline_section(content, chapter_num)
  122. def load_chapter_outline(project_root: Path, chapter_num: int, max_chars: int | None = 1500) -> str:
  123. outline_dir = project_root / "大纲"
  124. split_outline = _find_split_outline_file(outline_dir, chapter_num)
  125. if split_outline is not None:
  126. return split_outline.read_text(encoding="utf-8")
  127. volume_outline = _find_volume_outline_file(project_root, chapter_num)
  128. if volume_outline is None:
  129. return f"⚠️ 大纲文件不存在:第 {chapter_num} 章"
  130. outline = _extract_outline_section(volume_outline.read_text(encoding="utf-8"), chapter_num)
  131. if outline is None:
  132. return f"⚠️ 未找到第 {chapter_num} 章的大纲"
  133. if max_chars and len(outline) > max_chars:
  134. return outline[:max_chars] + "\n...(已截断)"
  135. return outline
  136. _PLOT_SECTION_FIELD_MAP = {
  137. "cbn": "cbn",
  138. "cpns": "cpns",
  139. "cen": "cen",
  140. "必须覆盖节点": "mandatory_nodes",
  141. "本章禁区": "prohibitions",
  142. }
  143. _CHAPTER_HEADING_RE = re.compile(
  144. r"^(#{1,6})\s*第\s*([0-9零〇一二两三四五六七八九十]+)\s*章\b.*$",
  145. re.MULTILINE,
  146. )
  147. _CHINESE_NUMERAL_DIGITS = {
  148. "零": 0,
  149. "〇": 0,
  150. "一": 1,
  151. "二": 2,
  152. "两": 2,
  153. "三": 3,
  154. "四": 4,
  155. "五": 5,
  156. "六": 6,
  157. "七": 7,
  158. "八": 8,
  159. "九": 9,
  160. }
  161. _DIRECTIVE_FIELD_MAP = {
  162. "目标": "goal",
  163. "本章目标": "goal",
  164. "章目标": "goal",
  165. "阻力": "obstacles",
  166. "障碍": "obstacles",
  167. "代价": "cost",
  168. "时间锚点": "time_anchor",
  169. "时间": "time_anchor",
  170. "章内跨度": "chapter_span",
  171. "章节跨度": "chapter_span",
  172. "倒计时状态": "countdown",
  173. "倒计时": "countdown",
  174. "cbn": "cbn",
  175. "cpns": "cpns",
  176. "cen": "cen",
  177. "必须覆盖节点": "must_cover_nodes",
  178. "本章禁区": "forbidden_zones",
  179. "章末未闭合问题": "chapter_end_open_question",
  180. "章末问题": "chapter_end_open_question",
  181. "钩子类型": "hook_type",
  182. "钩子强度": "hook_strength",
  183. "关键实体": "key_entities",
  184. "涉及实体": "key_entities",
  185. "strand": "strand",
  186. "反派层级": "antagonist_tier",
  187. }
  188. _DIRECTIVE_LIST_FIELDS = {"cpns", "must_cover_nodes", "forbidden_zones", "key_entities"}
  189. def _clean_plot_line(line: str) -> str:
  190. text = str(line or "").strip()
  191. text = re.sub(r"^[\-\*•]+\s*", "", text)
  192. text = re.sub(r"^\d+[\.、]\s*", "", text)
  193. return text.strip()
  194. def _append_plot_value(target: Dict[str, Any], field: str, value: str) -> None:
  195. value = _clean_plot_line(value)
  196. if not value:
  197. return
  198. if field in {"cpns", "mandatory_nodes", "prohibitions"}:
  199. target.setdefault(field, [])
  200. candidates = [value]
  201. if field in {"mandatory_nodes", "prohibitions"}:
  202. split_values = [part.strip() for part in re.split(r"[、,,;;|]+", value) if part.strip()]
  203. if split_values:
  204. candidates = split_values
  205. for item in candidates:
  206. if item not in target[field]:
  207. target[field].append(item)
  208. return
  209. if field not in target:
  210. target[field] = value
  211. def _split_directive_values(value: str) -> list[str]:
  212. text = _clean_plot_line(value)
  213. if not text:
  214. return []
  215. return [part.strip() for part in re.split(r"[、,,;;|]+", text) if part.strip()]
  216. def _append_directive_value(target: Dict[str, Any], field: str, value: str) -> None:
  217. value = _clean_plot_line(value)
  218. if not value:
  219. return
  220. if field in _DIRECTIVE_LIST_FIELDS:
  221. target.setdefault(field, [])
  222. for item in _split_directive_values(value) or [value]:
  223. if item not in target[field]:
  224. target[field].append(item)
  225. return
  226. if field not in target:
  227. target[field] = value
  228. def parse_chapter_plot_structure(outline_text: str) -> Dict[str, Any]:
  229. text = str(outline_text or "")
  230. if not text or text.startswith("⚠️"):
  231. return {}
  232. structure: Dict[str, Any] = {}
  233. current_field = ""
  234. for raw_line in text.splitlines():
  235. stripped = raw_line.strip()
  236. if not stripped:
  237. current_field = ""
  238. continue
  239. if re.match(r"^#{1,6}\s*第\s*\d+\s*章", stripped):
  240. current_field = ""
  241. continue
  242. cleaned = _clean_plot_line(stripped)
  243. matched_field = ""
  244. matched_value = ""
  245. for label, field in _PLOT_SECTION_FIELD_MAP.items():
  246. match = re.match(rf"^{re.escape(label)}\s*[::]\s*(.*)$", cleaned, re.IGNORECASE)
  247. if match:
  248. matched_field = field
  249. matched_value = match.group(1).strip()
  250. break
  251. if matched_field:
  252. current_field = matched_field
  253. _append_plot_value(structure, matched_field, matched_value)
  254. continue
  255. if current_field:
  256. _append_plot_value(structure, current_field, cleaned)
  257. cpns = structure.get("cpns") or []
  258. mandatory_nodes = structure.get("mandatory_nodes") or []
  259. prohibitions = structure.get("prohibitions") or []
  260. if not any([structure.get("cbn"), cpns, structure.get("cen"), mandatory_nodes, prohibitions]):
  261. return {}
  262. return {
  263. "cbn": str(structure.get("cbn") or "").strip(),
  264. "cpns": cpns,
  265. "cen": str(structure.get("cen") or "").strip(),
  266. "mandatory_nodes": mandatory_nodes,
  267. "prohibitions": prohibitions,
  268. "source": "chapter_outline",
  269. }
  270. def load_chapter_plot_structure(project_root: Path, chapter_num: int) -> Dict[str, Any]:
  271. outline = load_chapter_outline(project_root, chapter_num, max_chars=None)
  272. return parse_chapter_plot_structure(outline)
  273. def parse_chapter_execution_directive(outline_text: str) -> Dict[str, Any]:
  274. text = str(outline_text or "")
  275. if not text or text.startswith("⚠️"):
  276. return {}
  277. directive: Dict[str, Any] = {}
  278. current_field = ""
  279. for raw_line in text.splitlines():
  280. stripped = raw_line.strip()
  281. if not stripped:
  282. current_field = ""
  283. continue
  284. if _CHAPTER_HEADING_RE.match(stripped):
  285. current_field = ""
  286. continue
  287. cleaned = _clean_plot_line(stripped)
  288. matched_field = ""
  289. matched_value = ""
  290. for label, field in _DIRECTIVE_FIELD_MAP.items():
  291. match = re.match(rf"^{re.escape(label)}\s*[::]\s*(.*)$", cleaned, re.IGNORECASE)
  292. if match:
  293. matched_field = field
  294. matched_value = match.group(1).strip()
  295. break
  296. if matched_field:
  297. current_field = matched_field
  298. _append_directive_value(directive, matched_field, matched_value)
  299. continue
  300. if current_field:
  301. _append_directive_value(directive, current_field, cleaned)
  302. plot_structure = parse_chapter_plot_structure(text)
  303. for source_key, target_key in (
  304. ("cbn", "cbn"),
  305. ("cpns", "cpns"),
  306. ("cen", "cen"),
  307. ("mandatory_nodes", "must_cover_nodes"),
  308. ("prohibitions", "forbidden_zones"),
  309. ):
  310. if plot_structure.get(source_key) and not directive.get(target_key):
  311. directive[target_key] = plot_structure[source_key]
  312. if directive:
  313. directive["source"] = "chapter_outline"
  314. return directive
  315. def load_chapter_execution_directive(project_root: Path, chapter_num: int) -> Dict[str, Any]:
  316. outline_dir = project_root / "大纲"
  317. split_outline = _find_split_outline_file(outline_dir, chapter_num)
  318. if split_outline is not None:
  319. return parse_chapter_execution_directive(split_outline.read_text(encoding="utf-8"))
  320. volume_outline = _find_volume_outline_file(project_root, chapter_num)
  321. if volume_outline is None:
  322. return {}
  323. section = _extract_directive_section(volume_outline.read_text(encoding="utf-8"), chapter_num)
  324. if section is None:
  325. return {}
  326. return parse_chapter_execution_directive(section)