golden_three_checker.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570
  1. # -*- coding: utf-8 -*-
  2. """
  3. 黄金三章检查工具 v2.0 (LLM-Driven)
  4. 功能:检测小说前三章是否符合"黄金三章"标准
  5. v2.0 重大升级:
  6. - 保留关键词预检作为快速模式
  7. - 新增 LLM 深度评估模式(AI Native)
  8. - 生成结构化评估 Prompt,解析 XML 评估结果
  9. 核心检查点:
  10. - 第 1 章:300 字内主角出场 + 金手指线索 + 强冲突开局
  11. - 第 2 章:金手指展示 + 初次小胜 + 即时爽点
  12. - 第 3 章:悬念钩子 + 下一阶段预告 + 爽点密度 >= 1
  13. 使用方法:
  14. python golden_three_checker.py --auto # 快速关键词模式
  15. python golden_three_checker.py --auto --mode llm # LLM 深度评估(推荐)
  16. python golden_three_checker.py --auto --generate-prompt # 仅生成评估 Prompt
  17. """
  18. import sys
  19. import os
  20. import re
  21. import json
  22. import argparse
  23. from pathlib import Path
  24. from typing import Dict, List, Optional, Any
  25. # 导入项目定位和章节路径模块
  26. from project_locator import resolve_project_root
  27. from chapter_paths import find_chapter_file
  28. # Windows UTF-8 输出修复
  29. if sys.platform == 'win32':
  30. import io
  31. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
  32. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
  33. # ============================================================================
  34. # LLM 评估 Prompt 模板
  35. # ============================================================================
  36. LLM_EVALUATION_PROMPT = """你是一位网文编辑,专门负责评估小说开篇的"黄金三章"质量。
  37. 请根据以下标准,对这三章内容进行专业评估:
  38. ## 黄金三章标准
  39. ### 第 1 章核心检查点:
  40. 1. **主角 300 字内出场**:主角是否在前 300 字内登场?身份是否清晰?
  41. 2. **金手指线索**:是否有金手指/外挂的暗示或线索?
  42. 3. **强冲突开局**:开篇是否有足够强的冲突/危机/矛盾?
  43. ### 第 2 章核心检查点:
  44. 1. **金手指展示**:金手指是否有明确展示?读者能否理解其能力?
  45. 2. **初次小胜**:主角是否获得了第一次小规模胜利/成功?
  46. 3. **即时爽点**:是否有让读者感到爽快/满足的场景?
  47. ### 第 3 章核心检查点:
  48. 1. **悬念钩子**:章节结尾是否有悬念?能否驱动读者继续阅读?
  49. 2. **下一阶段预告**:是否暗示了接下来的剧情走向/新挑战?
  50. 3. **爽点密度**:本章是否至少有 1 个明显的爽点场景?
  51. ---
  52. ## 待评估内容
  53. ### 第 1 章
  54. ```
  55. {chapter1_content}
  56. ```
  57. ### 第 2 章
  58. ```
  59. {chapter2_content}
  60. ```
  61. ### 第 3 章
  62. ```
  63. {chapter3_content}
  64. ```
  65. ---
  66. ## 输出要求
  67. 请以如下 XML 格式输出你的评估结果(务必严格遵循格式):
  68. ```xml
  69. <golden_three_assessment>
  70. <chapter num="1">
  71. <check name="主角300字内出场" passed="true|false" score="0-100">
  72. <evidence>具体证据/引用原文</evidence>
  73. <suggestion>如未通过,给出改进建议</suggestion>
  74. </check>
  75. <check name="金手指线索" passed="true|false" score="0-100">
  76. <evidence>具体证据</evidence>
  77. <suggestion>改进建议</suggestion>
  78. </check>
  79. <check name="强冲突开局" passed="true|false" score="0-100">
  80. <evidence>具体证据</evidence>
  81. <suggestion>改进建议</suggestion>
  82. </check>
  83. </chapter>
  84. <chapter num="2">
  85. <check name="金手指展示" passed="true|false" score="0-100">
  86. <evidence>具体证据</evidence>
  87. <suggestion>改进建议</suggestion>
  88. </check>
  89. <check name="初次小胜" passed="true|false" score="0-100">
  90. <evidence>具体证据</evidence>
  91. <suggestion>改进建议</suggestion>
  92. </check>
  93. <check name="即时爽点" passed="true|false" score="0-100">
  94. <evidence>具体证据</evidence>
  95. <suggestion>改进建议</suggestion>
  96. </check>
  97. </chapter>
  98. <chapter num="3">
  99. <check name="悬念钩子" passed="true|false" score="0-100">
  100. <evidence>具体证据</evidence>
  101. <suggestion>改进建议</suggestion>
  102. </check>
  103. <check name="下一阶段预告" passed="true|false" score="0-100">
  104. <evidence>具体证据</evidence>
  105. <suggestion>改进建议</suggestion>
  106. </check>
  107. <check name="爽点密度>=1" passed="true|false" score="0-100">
  108. <evidence>具体证据</evidence>
  109. <suggestion>改进建议</suggestion>
  110. </check>
  111. </chapter>
  112. <overall_score>0-100</overall_score>
  113. <verdict>优秀|良好|需改进|严重不足</verdict>
  114. <top_issues>
  115. <issue priority="1">最需要改进的问题</issue>
  116. <issue priority="2">次要问题</issue>
  117. </top_issues>
  118. </golden_three_assessment>
  119. ```
  120. 现在开始评估:
  121. """
  122. class GoldenThreeChecker:
  123. """黄金三章检查器 v2.0"""
  124. def __init__(self, chapter_files: List[str], mode: str = "keyword"):
  125. """
  126. 初始化检查器
  127. Args:
  128. chapter_files: 章节文件路径列表(必须是前3章)
  129. mode: 检查模式 ("keyword" 快速模式, "llm" LLM评估模式)
  130. """
  131. if len(chapter_files) != 3:
  132. raise ValueError("必须提供前 3 章的文件路径")
  133. self.chapter_files = chapter_files
  134. self.mode = mode
  135. self.chapters: List[Dict[str, Any]] = []
  136. self.results: Dict[str, Any] = {
  137. "mode": mode,
  138. "ch1": {"主角300字内出场": False, "金手指线索": False, "强冲突开局": False, "详细": {}},
  139. "ch2": {"金手指展示": False, "初次小胜": False, "即时爽点": False, "详细": {}},
  140. "ch3": {"悬念钩子": False, "下一阶段预告": False, "爽点密度>=1": False, "详细": {}},
  141. }
  142. def load_chapters(self) -> None:
  143. """加载章节内容"""
  144. for i, file_path in enumerate(self.chapter_files):
  145. if not os.path.exists(file_path):
  146. raise FileNotFoundError(f"文件不存在: {file_path}")
  147. with open(file_path, 'r', encoding='utf-8') as f:
  148. content = f.read()
  149. self.chapters.append({
  150. "number": i + 1,
  151. "path": file_path,
  152. "content": content,
  153. "word_count": len(re.sub(r'\s+', '', content))
  154. })
  155. # ============================================================================
  156. # 快速关键词模式(保留原有逻辑)
  157. # ============================================================================
  158. def check_chapter1_keywords(self) -> None:
  159. """检查第1章(关键词模式)"""
  160. content = self.chapters[0]["content"]
  161. first_300_chars = content[:300]
  162. # 检查1: 主角 300 字内出场
  163. protagonist_keywords = ["林天", "我", "主角", "少年", "他", "叶凡", "萧炎", "楚枫"]
  164. for keyword in protagonist_keywords:
  165. if keyword in first_300_chars:
  166. self.results["ch1"]["主角300字内出场"] = True
  167. self.results["ch1"]["详细"]["主角出场关键词"] = keyword
  168. break
  169. # 检查2: 金手指线索
  170. golden_finger_keywords = [
  171. "系统", "空间", "重生", "穿越", "戒指", "老爷爷",
  172. "器灵", "传承", "血脉", "觉醒", "签到", "任务", "面板", "属性"
  173. ]
  174. found = [kw for kw in golden_finger_keywords if kw in content]
  175. self.results["ch1"]["金手指线索"] = len(found) > 0
  176. self.results["ch1"]["详细"]["金手指关键词"] = found
  177. # 检查3: 强冲突开局
  178. conflict_keywords = [
  179. "退婚", "羞辱", "嘲讽", "废物", "落魄", "危机",
  180. "追杀", "绝境", "被困", "重伤", "濒死", "灭族"
  181. ]
  182. found = [kw for kw in conflict_keywords if kw in content]
  183. self.results["ch1"]["强冲突开局"] = len(found) > 0
  184. self.results["ch1"]["详细"]["冲突关键词"] = found
  185. def check_chapter2_keywords(self) -> None:
  186. """检查第2章(关键词模式)"""
  187. content = self.chapters[1]["content"]
  188. system_display_keywords = ["【", "╔", "姓名", "境界", "力量", "属性", "获得", "奖励", "升级"]
  189. found = [kw for kw in system_display_keywords if kw in content]
  190. self.results["ch2"]["金手指展示"] = len(found) >= 2
  191. self.results["ch2"]["详细"]["展示关键词"] = found
  192. victory_keywords = ["击败", "胜利", "获胜", "成功", "通过", "突破", "秒杀", "碾压"]
  193. found = [kw for kw in victory_keywords if kw in content]
  194. self.results["ch2"]["初次小胜"] = len(found) > 0
  195. self.results["ch2"]["详细"]["胜利关键词"] = found
  196. cool_keywords = ["震惊", "不可能", "怎么会", "全场哗然", "目瞪口呆", "难以置信"]
  197. found = [kw for kw in cool_keywords if kw in content]
  198. self.results["ch2"]["即时爽点"] = len(found) >= 2
  199. self.results["ch2"]["详细"]["爽点关键词"] = found
  200. def check_chapter3_keywords(self) -> None:
  201. """检查第3章(关键词模式)"""
  202. content = self.chapters[2]["content"]
  203. last_300_chars = content[-300:]
  204. suspense_keywords = ["?", "!", "危机", "即将", "突然", "就在这时", "阴影", "杀机"]
  205. found = [kw for kw in suspense_keywords if kw in last_300_chars]
  206. self.results["ch3"]["悬念钩子"] = len(found) >= 2
  207. self.results["ch3"]["详细"]["悬念关键词"] = found
  208. preview_keywords = ["秘境", "大比", "选拔", "试炼", "任务", "挑战", "前往", "即将"]
  209. found = [kw for kw in preview_keywords if kw in content]
  210. self.results["ch3"]["下一阶段预告"] = len(found) > 0
  211. self.results["ch3"]["详细"]["预告关键词"] = found
  212. cool_count = sum(content.count(kw) for kw in ["震惊", "不可能", "全场哗然", "天才", "击败", "获得"])
  213. self.results["ch3"]["爽点密度>=1"] = cool_count >= 1
  214. self.results["ch3"]["详细"]["爽点统计"] = cool_count
  215. # ============================================================================
  216. # LLM 评估模式
  217. # ============================================================================
  218. def generate_llm_prompt(self) -> str:
  219. """生成 LLM 评估 Prompt"""
  220. # 截取每章内容(避免过长)
  221. max_chars_per_chapter = 6000
  222. ch1 = self.chapters[0]["content"][:max_chars_per_chapter]
  223. ch2 = self.chapters[1]["content"][:max_chars_per_chapter]
  224. ch3 = self.chapters[2]["content"][:max_chars_per_chapter]
  225. prompt = LLM_EVALUATION_PROMPT.format(
  226. chapter1_content=ch1,
  227. chapter2_content=ch2,
  228. chapter3_content=ch3
  229. )
  230. return prompt
  231. def parse_llm_response(self, xml_response: str) -> Dict[str, Any]:
  232. """解析 LLM 返回的 XML 评估结果"""
  233. results: Dict[str, Any] = {
  234. "mode": "llm",
  235. "ch1": {"详细": {}},
  236. "ch2": {"详细": {}},
  237. "ch3": {"详细": {}},
  238. "overall_score": 0,
  239. "verdict": "",
  240. "top_issues": []
  241. }
  242. # 提取 overall_score
  243. score_match = re.search(r'<overall_score>(\d+)</overall_score>', xml_response)
  244. if score_match:
  245. results["overall_score"] = int(score_match.group(1))
  246. # 提取 verdict
  247. verdict_match = re.search(r'<verdict>([^<]+)</verdict>', xml_response)
  248. if verdict_match:
  249. results["verdict"] = verdict_match.group(1).strip()
  250. # 提取每章的检查点
  251. chapter_pattern = re.compile(
  252. r'<chapter num="(\d)">(.*?)</chapter>',
  253. re.DOTALL
  254. )
  255. check_pattern = re.compile(
  256. r'<check name="([^"]+)" passed="(true|false)" score="(\d+)">\s*'
  257. r'<evidence>([^<]*)</evidence>\s*'
  258. r'<suggestion>([^<]*)</suggestion>\s*'
  259. r'</check>',
  260. re.DOTALL
  261. )
  262. for chapter_match in chapter_pattern.finditer(xml_response):
  263. chapter_num = chapter_match.group(1)
  264. chapter_content = chapter_match.group(2)
  265. chapter_key = f"ch{chapter_num}"
  266. for check_match in check_pattern.finditer(chapter_content):
  267. check_name = check_match.group(1)
  268. passed = check_match.group(2) == "true"
  269. score = int(check_match.group(3))
  270. evidence = check_match.group(4).strip()
  271. suggestion = check_match.group(5).strip()
  272. results[chapter_key][check_name] = passed
  273. results[chapter_key]["详细"][check_name] = {
  274. "score": score,
  275. "evidence": evidence,
  276. "suggestion": suggestion
  277. }
  278. # 提取 top_issues
  279. issue_pattern = re.compile(r'<issue priority="(\d)">([^<]+)</issue>')
  280. for issue_match in issue_pattern.finditer(xml_response):
  281. priority = int(issue_match.group(1))
  282. issue_text = issue_match.group(2).strip()
  283. results["top_issues"].append({"priority": priority, "issue": issue_text})
  284. return results
  285. # ============================================================================
  286. # 报告生成
  287. # ============================================================================
  288. def calculate_score(self) -> tuple:
  289. """计算总体得分"""
  290. total_checks = 0
  291. passed_checks = 0
  292. for chapter_key in ["ch1", "ch2", "ch3"]:
  293. for check_key, check_value in self.results[chapter_key].items():
  294. if check_key != "详细" and isinstance(check_value, bool):
  295. total_checks += 1
  296. if check_value:
  297. passed_checks += 1
  298. score = (passed_checks / total_checks) * 100 if total_checks > 0 else 0
  299. return score, passed_checks, total_checks
  300. def generate_report(self) -> str:
  301. """生成检查报告"""
  302. score, passed, total = self.calculate_score()
  303. report = []
  304. report.append("=" * 60)
  305. report.append(f"黄金三章诊断报告 (模式: {self.mode})")
  306. report.append("=" * 60)
  307. report.append(f"\n总体得分: {score:.1f}% ({passed}/{total} 项通过)\n")
  308. # 第 1 章
  309. report.append("-" * 60)
  310. report.append("【第 1 章】检查结果")
  311. report.append("-" * 60)
  312. for check_name in ["主角300字内出场", "金手指线索", "强冲突开局"]:
  313. passed = self.results["ch1"].get(check_name, False)
  314. icon = "✅" if passed else "❌"
  315. report.append(f"{icon} {check_name}: {'通过' if passed else '未通过'}")
  316. # 显示详细信息
  317. detail = self.results["ch1"]["详细"].get(check_name)
  318. if isinstance(detail, dict):
  319. if detail.get("evidence"):
  320. report.append(f" └─ 证据: {detail['evidence'][:100]}...")
  321. if not passed and detail.get("suggestion"):
  322. report.append(f" └─ 建议: {detail['suggestion']}")
  323. elif isinstance(detail, list) and detail:
  324. report.append(f" └─ 关键词: {', '.join(detail[:5])}")
  325. # 第 2 章
  326. report.append("\n" + "-" * 60)
  327. report.append("【第 2 章】检查结果")
  328. report.append("-" * 60)
  329. for check_name in ["金手指展示", "初次小胜", "即时爽点"]:
  330. passed = self.results["ch2"].get(check_name, False)
  331. icon = "✅" if passed else "❌"
  332. report.append(f"{icon} {check_name}: {'通过' if passed else '未通过'}")
  333. detail = self.results["ch2"]["详细"].get(check_name)
  334. if isinstance(detail, dict) and detail.get("evidence"):
  335. report.append(f" └─ 证据: {detail['evidence'][:100]}...")
  336. elif isinstance(detail, list) and detail:
  337. report.append(f" └─ 关键词: {', '.join(detail[:5])}")
  338. # 第 3 章
  339. report.append("\n" + "-" * 60)
  340. report.append("【第 3 章】检查结果")
  341. report.append("-" * 60)
  342. for check_name in ["悬念钩子", "下一阶段预告", "爽点密度>=1"]:
  343. passed = self.results["ch3"].get(check_name, False)
  344. icon = "✅" if passed else "❌"
  345. report.append(f"{icon} {check_name}: {'通过' if passed else '未通过'}")
  346. detail = self.results["ch3"]["详细"].get(check_name)
  347. if isinstance(detail, dict) and detail.get("evidence"):
  348. report.append(f" └─ 证据: {detail['evidence'][:100]}...")
  349. # 改进建议
  350. report.append("\n" + "=" * 60)
  351. report.append("【改进建议】")
  352. report.append("=" * 60)
  353. if score < 60:
  354. report.append("\n🔴 警告: 开篇吸引力不足,严重影响读者留存率!")
  355. elif score < 80:
  356. report.append("\n🟡 注意: 开篇有改进空间")
  357. else:
  358. report.append("\n✅ 很好!开篇符合黄金三章标准")
  359. # LLM 模式的额外信息
  360. if self.mode == "llm" and self.results.get("top_issues"):
  361. report.append("\n优先修复:")
  362. for issue in self.results["top_issues"]:
  363. report.append(f" {issue['priority']}. {issue['issue']}")
  364. report.append("\n" + "=" * 60)
  365. return "\n".join(report)
  366. def run(self) -> None:
  367. """执行检查"""
  368. print("正在加载章节...")
  369. self.load_chapters()
  370. print(f"✅ 已加载 {len(self.chapters)} 章")
  371. for ch in self.chapters:
  372. print(f" - 第 {ch['number']} 章: {ch['word_count']} 字")
  373. print(f"\n正在执行检查 (模式: {self.mode})...\n")
  374. if self.mode == "keyword":
  375. self.check_chapter1_keywords()
  376. self.check_chapter2_keywords()
  377. self.check_chapter3_keywords()
  378. report = self.generate_report()
  379. print(report)
  380. elif self.mode == "llm":
  381. prompt = self.generate_llm_prompt()
  382. print("=" * 60)
  383. print("LLM 评估模式:请将以下 Prompt 发送给 Claude/GPT")
  384. print("=" * 60)
  385. print("\n--- PROMPT START ---\n")
  386. print(prompt[:2000] + "\n...[内容已截断,完整版见输出文件]...")
  387. print("\n--- PROMPT END ---\n")
  388. # 保存完整 prompt
  389. output_dir = Path(".webnovel")
  390. output_dir.mkdir(exist_ok=True)
  391. prompt_file = output_dir / "golden_three_prompt.md"
  392. with open(prompt_file, 'w', encoding='utf-8') as f:
  393. f.write(prompt)
  394. print(f"📄 完整 Prompt 已保存至: {prompt_file}")
  395. print("\n💡 使用方法:")
  396. print(" 1. 将 Prompt 发送给 Claude/GPT")
  397. print(" 2. 获取 XML 格式的评估结果")
  398. print(" 3. 运行: python golden_three_checker.py --parse-response <response.xml>")
  399. # 保存结果
  400. output_dir = Path(".webnovel")
  401. output_dir.mkdir(exist_ok=True)
  402. output_file = output_dir / "golden_three_report.json"
  403. with open(output_file, 'w', encoding='utf-8') as f:
  404. json.dump(self.results, f, ensure_ascii=False, indent=2)
  405. print(f"\n📄 详细结果已保存至: {output_file}")
  406. def main():
  407. parser = argparse.ArgumentParser(
  408. description="黄金三章检查工具 v2.0 (LLM-Driven)",
  409. formatter_class=argparse.RawDescriptionHelpFormatter,
  410. epilog="""
  411. 示例:
  412. # 快速关键词模式(默认)
  413. python golden_three_checker.py --auto
  414. # LLM 深度评估模式(推荐)
  415. python golden_three_checker.py --auto --mode llm
  416. # 解析 LLM 返回的评估结果
  417. python golden_three_checker.py --parse-response response.xml
  418. """.strip(),
  419. )
  420. parser.add_argument("chapter_files", nargs="*", help="前三章文件路径")
  421. parser.add_argument("--auto", action="store_true", help="自动定位前三章文件")
  422. parser.add_argument("--mode", choices=["keyword", "llm"], default="keyword",
  423. help="检查模式: keyword(快速) / llm(深度)")
  424. parser.add_argument("--project-root", default=None, help="项目根目录")
  425. parser.add_argument("--parse-response", metavar="FILE", help="解析 LLM 返回的 XML 文件")
  426. args = parser.parse_args()
  427. # 解析 LLM 响应模式
  428. if args.parse_response:
  429. if not os.path.exists(args.parse_response):
  430. print(f"❌ 文件不存在: {args.parse_response}")
  431. sys.exit(1)
  432. with open(args.parse_response, 'r', encoding='utf-8') as f:
  433. xml_content = f.read()
  434. checker = GoldenThreeChecker(["dummy"] * 3, mode="llm")
  435. checker.results = checker.parse_llm_response(xml_content)
  436. print("=" * 60)
  437. print("LLM 评估结果解析")
  438. print("=" * 60)
  439. print(json.dumps(checker.results, ensure_ascii=False, indent=2))
  440. sys.exit(0)
  441. # 正常检查模式
  442. chapter_files = []
  443. if args.auto or not args.chapter_files:
  444. try:
  445. project_root = resolve_project_root(args.project_root)
  446. except FileNotFoundError as e:
  447. print(f"❌ {e}")
  448. sys.exit(1)
  449. for i in range(1, 4):
  450. chapter_path = find_chapter_file(project_root, i)
  451. if chapter_path:
  452. chapter_files.append(str(chapter_path))
  453. else:
  454. print(f"❌ 找不到第 {i} 章文件")
  455. sys.exit(1)
  456. print(f"📂 项目根目录: {project_root}")
  457. print(f"📄 检测到前三章: {', '.join(Path(f).name for f in chapter_files)}\n")
  458. else:
  459. if len(args.chapter_files) < 3:
  460. print("用法: python golden_three_checker.py <第1章路径> <第2章路径> <第3章路径>")
  461. sys.exit(1)
  462. chapter_files = args.chapter_files[:3]
  463. try:
  464. checker = GoldenThreeChecker(chapter_files, mode=args.mode)
  465. checker.run()
  466. except Exception as e:
  467. print(f"❌ 错误: {e}")
  468. import traceback
  469. traceback.print_exc()
  470. sys.exit(1)
  471. if __name__ == "__main__":
  472. main()