golden_three_checker.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570
  1. # -*- coding: utf-8 -*-
  2. """
  3. 黄金三章检查工具 v2.0 (LLM-Driven)
  4. 功能:检测小说前三章是否符合"黄金三章"标准
  5. v2.0 重大升级:
  6. - 保留关键词预检作为快速模式
  7. - 新增 LLM 深度评估模式(AI Native)
  8. - 生成结构化评估 Prompt,解析 XML 评估结果
  9. 核心检查点:
  10. - 第 1 章:300 字内主角出场 + 金手指线索 + 强冲突开局
  11. - 第 2 章:金手指展示 + 初次小胜 + 即时爽点
  12. - 第 3 章:悬念钩子 + 下一阶段预告 + 爽点密度 >= 1
  13. 使用方法:
  14. python golden_three_checker.py --auto # 快速关键词模式
  15. python golden_three_checker.py --auto --mode llm # LLM 深度评估(推荐)
  16. python golden_three_checker.py --auto --generate-prompt # 仅生成评估 Prompt
  17. """
  18. import sys
  19. import os
  20. import re
  21. import json
  22. import argparse
  23. from pathlib import Path
  24. from runtime_compat import enable_windows_utf8_stdio
  25. from typing import Dict, List, Optional, Any
  26. # 导入项目定位和章节路径模块
  27. from project_locator import resolve_project_root
  28. from chapter_paths import find_chapter_file
  29. # Windows UTF-8 输出修复
  30. if sys.platform == "win32":
  31. enable_windows_utf8_stdio()
  32. # ============================================================================
  33. # LLM 评估 Prompt 模板
  34. # ============================================================================
  35. LLM_EVALUATION_PROMPT = """你是一位网文编辑,专门负责评估小说开篇的"黄金三章"质量。
  36. 请根据以下标准,对这三章内容进行专业评估:
  37. ## 黄金三章标准
  38. ### 第 1 章核心检查点:
  39. 1. **主角 300 字内出场**:主角是否在前 300 字内登场?身份是否清晰?
  40. 2. **金手指线索**:是否有金手指/外挂的暗示或线索?
  41. 3. **强冲突开局**:开篇是否有足够强的冲突/危机/矛盾?
  42. ### 第 2 章核心检查点:
  43. 1. **金手指展示**:金手指是否有明确展示?读者能否理解其能力?
  44. 2. **初次小胜**:主角是否获得了第一次小规模胜利/成功?
  45. 3. **即时爽点**:是否有让读者感到爽快/满足的场景?
  46. ### 第 3 章核心检查点:
  47. 1. **悬念钩子**:章节结尾是否有悬念?能否驱动读者继续阅读?
  48. 2. **下一阶段预告**:是否暗示了接下来的剧情走向/新挑战?
  49. 3. **爽点密度**:本章是否至少有 1 个明显的爽点场景?
  50. ---
  51. ## 待评估内容
  52. ### 第 1 章
  53. ```
  54. {chapter1_content}
  55. ```
  56. ### 第 2 章
  57. ```
  58. {chapter2_content}
  59. ```
  60. ### 第 3 章
  61. ```
  62. {chapter3_content}
  63. ```
  64. ---
  65. ## 输出要求
  66. 请以如下 XML 格式输出你的评估结果(务必严格遵循格式):
  67. ```xml
  68. <golden_three_assessment>
  69. <chapter num="1">
  70. <check name="主角300字内出场" passed="true|false" score="0-100">
  71. <evidence>具体证据/引用原文</evidence>
  72. <suggestion>如未通过,给出改进建议</suggestion>
  73. </check>
  74. <check name="金手指线索" passed="true|false" score="0-100">
  75. <evidence>具体证据</evidence>
  76. <suggestion>改进建议</suggestion>
  77. </check>
  78. <check name="强冲突开局" passed="true|false" score="0-100">
  79. <evidence>具体证据</evidence>
  80. <suggestion>改进建议</suggestion>
  81. </check>
  82. </chapter>
  83. <chapter num="2">
  84. <check name="金手指展示" passed="true|false" score="0-100">
  85. <evidence>具体证据</evidence>
  86. <suggestion>改进建议</suggestion>
  87. </check>
  88. <check name="初次小胜" passed="true|false" score="0-100">
  89. <evidence>具体证据</evidence>
  90. <suggestion>改进建议</suggestion>
  91. </check>
  92. <check name="即时爽点" passed="true|false" score="0-100">
  93. <evidence>具体证据</evidence>
  94. <suggestion>改进建议</suggestion>
  95. </check>
  96. </chapter>
  97. <chapter num="3">
  98. <check name="悬念钩子" passed="true|false" score="0-100">
  99. <evidence>具体证据</evidence>
  100. <suggestion>改进建议</suggestion>
  101. </check>
  102. <check name="下一阶段预告" passed="true|false" score="0-100">
  103. <evidence>具体证据</evidence>
  104. <suggestion>改进建议</suggestion>
  105. </check>
  106. <check name="爽点密度>=1" passed="true|false" score="0-100">
  107. <evidence>具体证据</evidence>
  108. <suggestion>改进建议</suggestion>
  109. </check>
  110. </chapter>
  111. <overall_score>0-100</overall_score>
  112. <verdict>优秀|良好|需改进|严重不足</verdict>
  113. <top_issues>
  114. <issue priority="1">最需要改进的问题</issue>
  115. <issue priority="2">次要问题</issue>
  116. </top_issues>
  117. </golden_three_assessment>
  118. ```
  119. 现在开始评估:
  120. """
  121. class GoldenThreeChecker:
  122. """黄金三章检查器 v2.0"""
  123. def __init__(self, chapter_files: List[str], mode: str = "keyword"):
  124. """
  125. 初始化检查器
  126. Args:
  127. chapter_files: 章节文件路径列表(必须是前3章)
  128. mode: 检查模式 ("keyword" 快速模式, "llm" LLM评估模式)
  129. """
  130. if len(chapter_files) != 3:
  131. raise ValueError("必须提供前 3 章的文件路径")
  132. self.chapter_files = chapter_files
  133. self.mode = mode
  134. self.chapters: List[Dict[str, Any]] = []
  135. self.results: Dict[str, Any] = {
  136. "mode": mode,
  137. "ch1": {"主角300字内出场": False, "金手指线索": False, "强冲突开局": False, "详细": {}},
  138. "ch2": {"金手指展示": False, "初次小胜": False, "即时爽点": False, "详细": {}},
  139. "ch3": {"悬念钩子": False, "下一阶段预告": False, "爽点密度>=1": False, "详细": {}},
  140. }
  141. def load_chapters(self) -> None:
  142. """加载章节内容"""
  143. for i, file_path in enumerate(self.chapter_files):
  144. if not os.path.exists(file_path):
  145. raise FileNotFoundError(f"文件不存在: {file_path}")
  146. with open(file_path, 'r', encoding='utf-8') as f:
  147. content = f.read()
  148. self.chapters.append({
  149. "number": i + 1,
  150. "path": file_path,
  151. "content": content,
  152. "word_count": len(re.sub(r'\s+', '', content))
  153. })
  154. # ============================================================================
  155. # 快速关键词模式(保留原有逻辑)
  156. # ============================================================================
  157. def check_chapter1_keywords(self) -> None:
  158. """检查第1章(关键词模式)"""
  159. content = self.chapters[0]["content"]
  160. first_300_chars = content[:300]
  161. # 检查1: 主角 300 字内出场
  162. protagonist_keywords = ["林天", "我", "主角", "少年", "他", "叶凡", "萧炎", "楚枫"]
  163. for keyword in protagonist_keywords:
  164. if keyword in first_300_chars:
  165. self.results["ch1"]["主角300字内出场"] = True
  166. self.results["ch1"]["详细"]["主角出场关键词"] = keyword
  167. break
  168. # 检查2: 金手指线索
  169. golden_finger_keywords = [
  170. "系统", "空间", "重生", "穿越", "戒指", "老爷爷",
  171. "器灵", "传承", "血脉", "觉醒", "签到", "任务", "面板", "属性"
  172. ]
  173. found = [kw for kw in golden_finger_keywords if kw in content]
  174. self.results["ch1"]["金手指线索"] = len(found) > 0
  175. self.results["ch1"]["详细"]["金手指关键词"] = found
  176. # 检查3: 强冲突开局
  177. conflict_keywords = [
  178. "退婚", "羞辱", "嘲讽", "废物", "落魄", "危机",
  179. "追杀", "绝境", "被困", "重伤", "濒死", "灭族"
  180. ]
  181. found = [kw for kw in conflict_keywords if kw in content]
  182. self.results["ch1"]["强冲突开局"] = len(found) > 0
  183. self.results["ch1"]["详细"]["冲突关键词"] = found
  184. def check_chapter2_keywords(self) -> None:
  185. """检查第2章(关键词模式)"""
  186. content = self.chapters[1]["content"]
  187. system_display_keywords = ["【", "╔", "姓名", "境界", "力量", "属性", "获得", "奖励", "升级"]
  188. found = [kw for kw in system_display_keywords if kw in content]
  189. self.results["ch2"]["金手指展示"] = len(found) >= 2
  190. self.results["ch2"]["详细"]["展示关键词"] = found
  191. victory_keywords = ["击败", "胜利", "获胜", "成功", "通过", "突破", "秒杀", "碾压"]
  192. found = [kw for kw in victory_keywords if kw in content]
  193. self.results["ch2"]["初次小胜"] = len(found) > 0
  194. self.results["ch2"]["详细"]["胜利关键词"] = found
  195. cool_keywords = ["震惊", "不可能", "怎么会", "全场哗然", "目瞪口呆", "难以置信"]
  196. found = [kw for kw in cool_keywords if kw in content]
  197. self.results["ch2"]["即时爽点"] = len(found) >= 2
  198. self.results["ch2"]["详细"]["爽点关键词"] = found
  199. def check_chapter3_keywords(self) -> None:
  200. """检查第3章(关键词模式)"""
  201. content = self.chapters[2]["content"]
  202. last_300_chars = content[-300:]
  203. suspense_keywords = ["?", "!", "危机", "即将", "突然", "就在这时", "阴影", "杀机"]
  204. found = [kw for kw in suspense_keywords if kw in last_300_chars]
  205. self.results["ch3"]["悬念钩子"] = len(found) >= 2
  206. self.results["ch3"]["详细"]["悬念关键词"] = found
  207. preview_keywords = ["秘境", "大比", "选拔", "试炼", "任务", "挑战", "前往", "即将"]
  208. found = [kw for kw in preview_keywords if kw in content]
  209. self.results["ch3"]["下一阶段预告"] = len(found) > 0
  210. self.results["ch3"]["详细"]["预告关键词"] = found
  211. cool_count = sum(content.count(kw) for kw in ["震惊", "不可能", "全场哗然", "天才", "击败", "获得"])
  212. self.results["ch3"]["爽点密度>=1"] = cool_count >= 1
  213. self.results["ch3"]["详细"]["爽点统计"] = cool_count
  214. # ============================================================================
  215. # LLM 评估模式
  216. # ============================================================================
  217. def generate_llm_prompt(self) -> str:
  218. """生成 LLM 评估 Prompt"""
  219. # 截取每章内容(避免过长)
  220. max_chars_per_chapter = 6000
  221. ch1 = self.chapters[0]["content"][:max_chars_per_chapter]
  222. ch2 = self.chapters[1]["content"][:max_chars_per_chapter]
  223. ch3 = self.chapters[2]["content"][:max_chars_per_chapter]
  224. prompt = LLM_EVALUATION_PROMPT.format(
  225. chapter1_content=ch1,
  226. chapter2_content=ch2,
  227. chapter3_content=ch3
  228. )
  229. return prompt
  230. def parse_llm_response(self, xml_response: str) -> Dict[str, Any]:
  231. """解析 LLM 返回的 XML 评估结果"""
  232. results: Dict[str, Any] = {
  233. "mode": "llm",
  234. "ch1": {"详细": {}},
  235. "ch2": {"详细": {}},
  236. "ch3": {"详细": {}},
  237. "overall_score": 0,
  238. "verdict": "",
  239. "top_issues": []
  240. }
  241. # 提取 overall_score
  242. score_match = re.search(r'<overall_score>(\d+)</overall_score>', xml_response)
  243. if score_match:
  244. results["overall_score"] = int(score_match.group(1))
  245. # 提取 verdict
  246. verdict_match = re.search(r'<verdict>([^<]+)</verdict>', xml_response)
  247. if verdict_match:
  248. results["verdict"] = verdict_match.group(1).strip()
  249. # 提取每章的检查点
  250. chapter_pattern = re.compile(
  251. r'<chapter num="(\d)">(.*?)</chapter>',
  252. re.DOTALL
  253. )
  254. check_pattern = re.compile(
  255. r'<check name="([^"]+)" passed="(true|false)" score="(\d+)">\s*'
  256. r'<evidence>([^<]*)</evidence>\s*'
  257. r'<suggestion>([^<]*)</suggestion>\s*'
  258. r'</check>',
  259. re.DOTALL
  260. )
  261. for chapter_match in chapter_pattern.finditer(xml_response):
  262. chapter_num = chapter_match.group(1)
  263. chapter_content = chapter_match.group(2)
  264. chapter_key = f"ch{chapter_num}"
  265. for check_match in check_pattern.finditer(chapter_content):
  266. check_name = check_match.group(1)
  267. passed = check_match.group(2) == "true"
  268. score = int(check_match.group(3))
  269. evidence = check_match.group(4).strip()
  270. suggestion = check_match.group(5).strip()
  271. results[chapter_key][check_name] = passed
  272. results[chapter_key]["详细"][check_name] = {
  273. "score": score,
  274. "evidence": evidence,
  275. "suggestion": suggestion
  276. }
  277. # 提取 top_issues
  278. issue_pattern = re.compile(r'<issue priority="(\d)">([^<]+)</issue>')
  279. for issue_match in issue_pattern.finditer(xml_response):
  280. priority = int(issue_match.group(1))
  281. issue_text = issue_match.group(2).strip()
  282. results["top_issues"].append({"priority": priority, "issue": issue_text})
  283. return results
  284. # ============================================================================
  285. # 报告生成
  286. # ============================================================================
  287. def calculate_score(self) -> tuple:
  288. """计算总体得分"""
  289. total_checks = 0
  290. passed_checks = 0
  291. for chapter_key in ["ch1", "ch2", "ch3"]:
  292. for check_key, check_value in self.results[chapter_key].items():
  293. if check_key != "详细" and isinstance(check_value, bool):
  294. total_checks += 1
  295. if check_value:
  296. passed_checks += 1
  297. score = (passed_checks / total_checks) * 100 if total_checks > 0 else 0
  298. return score, passed_checks, total_checks
  299. def generate_report(self) -> str:
  300. """生成检查报告"""
  301. score, passed, total = self.calculate_score()
  302. report = []
  303. report.append("=" * 60)
  304. report.append(f"黄金三章诊断报告 (模式: {self.mode})")
  305. report.append("=" * 60)
  306. report.append(f"\n总体得分: {score:.1f}% ({passed}/{total} 项通过)\n")
  307. # 第 1 章
  308. report.append("-" * 60)
  309. report.append("【第 1 章】检查结果")
  310. report.append("-" * 60)
  311. for check_name in ["主角300字内出场", "金手指线索", "强冲突开局"]:
  312. passed = self.results["ch1"].get(check_name, False)
  313. icon = "✅" if passed else "❌"
  314. report.append(f"{icon} {check_name}: {'通过' if passed else '未通过'}")
  315. # 显示详细信息
  316. detail = self.results["ch1"]["详细"].get(check_name)
  317. if isinstance(detail, dict):
  318. if detail.get("evidence"):
  319. report.append(f" └─ 证据: {detail['evidence'][:100]}...")
  320. if not passed and detail.get("suggestion"):
  321. report.append(f" └─ 建议: {detail['suggestion']}")
  322. elif isinstance(detail, list) and detail:
  323. report.append(f" └─ 关键词: {', '.join(detail[:5])}")
  324. # 第 2 章
  325. report.append("\n" + "-" * 60)
  326. report.append("【第 2 章】检查结果")
  327. report.append("-" * 60)
  328. for check_name in ["金手指展示", "初次小胜", "即时爽点"]:
  329. passed = self.results["ch2"].get(check_name, False)
  330. icon = "✅" if passed else "❌"
  331. report.append(f"{icon} {check_name}: {'通过' if passed else '未通过'}")
  332. detail = self.results["ch2"]["详细"].get(check_name)
  333. if isinstance(detail, dict) and detail.get("evidence"):
  334. report.append(f" └─ 证据: {detail['evidence'][:100]}...")
  335. elif isinstance(detail, list) and detail:
  336. report.append(f" └─ 关键词: {', '.join(detail[:5])}")
  337. # 第 3 章
  338. report.append("\n" + "-" * 60)
  339. report.append("【第 3 章】检查结果")
  340. report.append("-" * 60)
  341. for check_name in ["悬念钩子", "下一阶段预告", "爽点密度>=1"]:
  342. passed = self.results["ch3"].get(check_name, False)
  343. icon = "✅" if passed else "❌"
  344. report.append(f"{icon} {check_name}: {'通过' if passed else '未通过'}")
  345. detail = self.results["ch3"]["详细"].get(check_name)
  346. if isinstance(detail, dict) and detail.get("evidence"):
  347. report.append(f" └─ 证据: {detail['evidence'][:100]}...")
  348. # 改进建议
  349. report.append("\n" + "=" * 60)
  350. report.append("【改进建议】")
  351. report.append("=" * 60)
  352. if score < 60:
  353. report.append("\n🔴 警告: 开篇吸引力不足,严重影响读者留存率!")
  354. elif score < 80:
  355. report.append("\n🟡 注意: 开篇有改进空间")
  356. else:
  357. report.append("\n✅ 很好!开篇符合黄金三章标准")
  358. # LLM 模式的额外信息
  359. if self.mode == "llm" and self.results.get("top_issues"):
  360. report.append("\n优先修复:")
  361. for issue in self.results["top_issues"]:
  362. report.append(f" {issue['priority']}. {issue['issue']}")
  363. report.append("\n" + "=" * 60)
  364. return "\n".join(report)
  365. def run(self) -> None:
  366. """执行检查"""
  367. print("正在加载章节...")
  368. self.load_chapters()
  369. print(f"✅ 已加载 {len(self.chapters)} 章")
  370. for ch in self.chapters:
  371. print(f" - 第 {ch['number']} 章: {ch['word_count']} 字")
  372. print(f"\n正在执行检查 (模式: {self.mode})...\n")
  373. if self.mode == "keyword":
  374. self.check_chapter1_keywords()
  375. self.check_chapter2_keywords()
  376. self.check_chapter3_keywords()
  377. report = self.generate_report()
  378. print(report)
  379. elif self.mode == "llm":
  380. prompt = self.generate_llm_prompt()
  381. print("=" * 60)
  382. print("LLM 评估模式:请将以下 Prompt 发送给 Claude/GPT")
  383. print("=" * 60)
  384. print("\n--- PROMPT START ---\n")
  385. print(prompt[:2000] + "\n...[内容已截断,完整版见输出文件]...")
  386. print("\n--- PROMPT END ---\n")
  387. # 保存完整 prompt
  388. output_dir = Path(".webnovel")
  389. output_dir.mkdir(exist_ok=True)
  390. prompt_file = output_dir / "golden_three_prompt.md"
  391. with open(prompt_file, 'w', encoding='utf-8') as f:
  392. f.write(prompt)
  393. print(f"📄 完整 Prompt 已保存至: {prompt_file}")
  394. print("\n💡 使用方法:")
  395. print(" 1. 将 Prompt 发送给 Claude/GPT")
  396. print(" 2. 获取 XML 格式的评估结果")
  397. print(" 3. 运行: python golden_three_checker.py --parse-response <response.xml>")
  398. # 保存结果
  399. output_dir = Path(".webnovel")
  400. output_dir.mkdir(exist_ok=True)
  401. output_file = output_dir / "golden_three_report.json"
  402. with open(output_file, 'w', encoding='utf-8') as f:
  403. json.dump(self.results, f, ensure_ascii=False, indent=2)
  404. print(f"\n📄 详细结果已保存至: {output_file}")
  405. def main():
  406. parser = argparse.ArgumentParser(
  407. description="黄金三章检查工具 v2.0 (LLM-Driven)",
  408. formatter_class=argparse.RawDescriptionHelpFormatter,
  409. epilog="""
  410. 示例:
  411. # 快速关键词模式(默认)
  412. python golden_three_checker.py --auto
  413. # LLM 深度评估模式(推荐)
  414. python golden_three_checker.py --auto --mode llm
  415. # 解析 LLM 返回的评估结果
  416. python golden_three_checker.py --parse-response response.xml
  417. """.strip(),
  418. )
  419. parser.add_argument("chapter_files", nargs="*", help="前三章文件路径")
  420. parser.add_argument("--auto", action="store_true", help="自动定位前三章文件")
  421. parser.add_argument("--mode", choices=["keyword", "llm"], default="keyword",
  422. help="检查模式: keyword(快速) / llm(深度)")
  423. parser.add_argument("--project-root", default=None, help="项目根目录")
  424. parser.add_argument("--parse-response", metavar="FILE", help="解析 LLM 返回的 XML 文件")
  425. args = parser.parse_args()
  426. # 解析 LLM 响应模式
  427. if args.parse_response:
  428. if not os.path.exists(args.parse_response):
  429. print(f"❌ 文件不存在: {args.parse_response}")
  430. sys.exit(1)
  431. with open(args.parse_response, 'r', encoding='utf-8') as f:
  432. xml_content = f.read()
  433. checker = GoldenThreeChecker(["dummy"] * 3, mode="llm")
  434. checker.results = checker.parse_llm_response(xml_content)
  435. print("=" * 60)
  436. print("LLM 评估结果解析")
  437. print("=" * 60)
  438. print(json.dumps(checker.results, ensure_ascii=False, indent=2))
  439. sys.exit(0)
  440. # 正常检查模式
  441. chapter_files = []
  442. if args.auto or not args.chapter_files:
  443. try:
  444. project_root = resolve_project_root(args.project_root)
  445. except FileNotFoundError as e:
  446. print(f"❌ {e}")
  447. sys.exit(1)
  448. for i in range(1, 4):
  449. chapter_path = find_chapter_file(project_root, i)
  450. if chapter_path:
  451. chapter_files.append(str(chapter_path))
  452. else:
  453. print(f"❌ 找不到第 {i} 章文件")
  454. sys.exit(1)
  455. print(f"📂 项目根目录: {project_root}")
  456. print(f"📄 检测到前三章: {', '.join(Path(f).name for f in chapter_files)}\n")
  457. else:
  458. if len(args.chapter_files) < 3:
  459. print("用法: python golden_three_checker.py <第1章路径> <第2章路径> <第3章路径>")
  460. sys.exit(1)
  461. chapter_files = args.chapter_files[:3]
  462. try:
  463. checker = GoldenThreeChecker(chapter_files, mode=args.mode)
  464. checker.run()
  465. except Exception as e:
  466. print(f"❌ 错误: {e}")
  467. import traceback
  468. traceback.print_exc()
  469. sys.exit(1)
  470. if __name__ == "__main__":
  471. main()