1
0

check.test.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. import { test } from 'node:test'
  2. import assert from 'node:assert/strict'
  3. import path from 'node:path'
  4. import { mechanicalCheck } from '../../src/mechanical-check/index.js'
  5. import { repoCtx } from '../commands/_helper.js'
  6. const 文风铁律 = `---
  7. 禁词:
  8. - 眸子一缩
  9. 禁句式:
  10. - '不是.*而是'
  11. ---
  12. ## 铁律
  13. 节奏优先。
  14. `
  15. const 名册 = '| 正名 | 别名 | 类型 | 首现章 |\n|--|--|--|--|\n| 林晚 | 晚晚 | character | 1 |\n'
  16. const 信息差 = '---\n读者已知: false\n登记章: 1\n关键词:\n - 玉佩\n---\n## 内容\n秘密。\n'
  17. // 组装一个含 front matter + 正文的草稿,并放进受控临时仓库
  18. function files(draftBody, { fm, extra } = {}) {
  19. const front =
  20. fm ??
  21. `章号: 3\n标题: 测试章\n卷: 1\n字数: ${[...draftBody.replace(/\s+/g, '')].length}\n章定位: 推进\n钩子: 危机钩-强\n情绪定位: 铺垫`
  22. return {
  23. 'book.yaml': 'spec_version: "7.0"\n书名: 测\n每章目标字数: 50\n',
  24. '文风/文风铁律.md': 文风铁律,
  25. '定稿/设定/名册.md': 名册,
  26. '定稿/设定/信息差/信息差-001-x.md': 信息差,
  27. '工作区/草稿-A.md': `---\n${front}\n---\n${draftBody}`,
  28. ...extra,
  29. }
  30. }
  31. async function run(draftBody, opts) {
  32. const { ctx, cleanup } = await repoCtx(null, files(draftBody, opts))
  33. try {
  34. const draftPath = path.join(ctx.repoPath, '工作区', '草稿-A.md')
  35. const r = await mechanicalCheck(ctx, { chapterNum: 3, draftPath })
  36. return { r, cleanup }
  37. } catch (e) {
  38. await cleanup()
  39. throw e
  40. }
  41. }
  42. const 正常正文 = '林晚立于大殿之前,握紧手中令牌,暗自下定决心,此番定要查明当年旧案,还师门公道。'
  43. test('机检 正常草稿 → pass=true,无阻断 issue', async () => {
  44. const { r, cleanup } = await run(正常正文)
  45. try {
  46. assert.equal(r.ok, true)
  47. assert.equal(r.pass, true, `不应有阻断 issue:${JSON.stringify(r.issues)}`)
  48. } finally {
  49. await cleanup()
  50. }
  51. })
  52. test('机检 字数太短 → 阻断 issue(字数)', async () => {
  53. const { r, cleanup } = await run('林晚。', { fm: '章号: 3\n标题: 测\n卷: 1\n字数: 2\n章定位: 推进\n钩子: 危机钩-强\n情绪定位: 铺垫' })
  54. try {
  55. assert.equal(r.pass, false)
  56. assert.ok(r.issues.some((i) => i.check === '字数'))
  57. } finally {
  58. await cleanup()
  59. }
  60. })
  61. test('机检 命中禁词 → 阻断 issue(禁词)', async () => {
  62. const { r, cleanup } = await run(正常正文 + '他眸子一缩,盯着令牌看了又看,心头警兆大作久久难平。')
  63. try {
  64. assert.ok(r.issues.some((i) => i.check === '禁词'))
  65. assert.equal(r.pass, false)
  66. } finally {
  67. await cleanup()
  68. }
  69. })
  70. test('机检 命中禁句式正则 → 阻断 issue(禁句式)', async () => {
  71. const { r, cleanup } = await run('这把剑不是凡铁而是上古神兵,林晚握着它,只觉一股暖流缓缓涌入四肢百骸之间。')
  72. try {
  73. assert.ok(r.issues.some((i) => i.check === '禁句式'))
  74. } finally {
  75. await cleanup()
  76. }
  77. })
  78. test('机检 本章内复读 → 阻断 issue(复读)', async () => {
  79. const { r, cleanup } = await run('空气仿佛凝固空气仿佛凝固空气仿佛凝固空气仿佛凝固,林晚站在原地一动不动。')
  80. try {
  81. assert.ok(r.issues.some((i) => i.check === '复读'))
  82. } finally {
  83. await cleanup()
  84. }
  85. })
  86. test('机检 缺 front matter 字段 → 阻断 issue(front matter)', async () => {
  87. const { r, cleanup } = await run(正常正文, { fm: '章号: 3\n标题: 测\n卷: 1\n字数: 40\n章定位: 推进' }) // 缺钩子/情绪定位
  88. try {
  89. assert.ok(r.issues.some((i) => i.check === 'front matter'))
  90. } finally {
  91. await cleanup()
  92. }
  93. })
  94. test('机检 新专名比名册 → 候选(非阻断)', async () => {
  95. const { r, cleanup } = await run('赵铁山道:“何人擅闯?”林晚抬眼望去,只见来人一身玄衣气度不凡令人不敢直视。')
  96. try {
  97. assert.ok(r.candidates.some((c) => c.type === '新专名' && c.value === '赵铁山'))
  98. // 新专名非阻断
  99. assert.ok(!r.issues.some((i) => i.check === '新专名'))
  100. } finally {
  101. await cleanup()
  102. }
  103. })
  104. test('机检 信息差关键词命中 → 候选(非阻断)', async () => {
  105. const { r, cleanup } = await run('林晚摩挲着那枚玉佩,心中疑云密布,却始终参不透其中藏着的惊天秘密究竟为何。')
  106. try {
  107. assert.ok(r.candidates.some((c) => c.type === '信息差候选'))
  108. } finally {
  109. await cleanup()
  110. }
  111. })
  112. // —— 条目变动形式检查(spec 0.9 §8 第 5 步,AC6)——
  113. const 条目 = (状态 = '进行') => `---\n强度: 高\n状态: ${状态}\n开启章: 1\n---\n## 履历\n- 第1章:埋下\n`
  114. const 条目库 = {
  115. '大纲/伏笔/伏笔-001-旧案.md': 条目('进行'),
  116. '大纲/伏笔/伏笔-002-旧刀.md': 条目('已收尾'),
  117. }
  118. const declFm = (decl) =>
  119. `章号: 3\n标题: 测\n卷: 1\n字数: ${[...正常正文.replace(/\s+/g, '')].length}\n章定位: 推进\n钩子: 危机钩-强\n情绪定位: 铺垫\n${decl}`
  120. const 条目issues = (r) => r.issues.filter((i) => i.check === '条目变动')
  121. test('机检 条目声明合法(推进进行中 + 埋下新编号)→ 无条目变动 issue', async () => {
  122. const { r, cleanup } = await run(正常正文, {
  123. fm: declFm('伏笔:\n - 推进 伏笔-001\n - 埋下 伏笔-003'),
  124. extra: 条目库,
  125. })
  126. try {
  127. assert.equal(r.ok, true)
  128. assert.deepEqual(条目issues(r), [], JSON.stringify(r.issues))
  129. } finally {
  130. await cleanup()
  131. }
  132. })
  133. test('机检 悬念清单混入伏笔编号 → 阻断(类型一致)', async () => {
  134. const { r, cleanup } = await run(正常正文, {
  135. fm: declFm('悬念:\n - 推进 伏笔-001'),
  136. extra: 条目库,
  137. })
  138. try {
  139. assert.equal(r.pass, false)
  140. assert.ok(条目issues(r).some((i) => i.blocking && i.description.includes('异类编号')))
  141. } finally {
  142. await cleanup()
  143. }
  144. })
  145. test('机检 开启类动词撞已有编号 → 阻断', async () => {
  146. const { r, cleanup } = await run(正常正文, {
  147. fm: declFm('伏笔:\n - 埋下 伏笔-001'),
  148. extra: 条目库,
  149. })
  150. try {
  151. assert.ok(条目issues(r).some((i) => i.blocking && i.description.includes('已存在')))
  152. } finally {
  153. await cleanup()
  154. }
  155. })
  156. test('机检 推进不存在的编号 → 阻断(疑似笔误)', async () => {
  157. const { r, cleanup } = await run(正常正文, {
  158. fm: declFm('伏笔:\n - 推进 伏笔-099'),
  159. extra: 条目库,
  160. })
  161. try {
  162. assert.ok(条目issues(r).some((i) => i.blocking && i.description.includes('不存在')))
  163. } finally {
  164. await cleanup()
  165. }
  166. })
  167. test('机检 推进已收尾条目 → 阻断(状态不兼容)', async () => {
  168. const { r, cleanup } = await run(正常正文, {
  169. fm: declFm('伏笔:\n - 推进 伏笔-002'),
  170. extra: 条目库,
  171. })
  172. try {
  173. assert.ok(条目issues(r).some((i) => i.blocking && i.description.includes('已收尾')))
  174. } finally {
  175. await cleanup()
  176. }
  177. })
  178. test('机检 类型不认识的动词 → 阻断(合法动词提示)', async () => {
  179. const { r, cleanup } = await run(正常正文, {
  180. fm: declFm('伏笔:\n - 揭晓 伏笔-001'), // 揭晓属悬念,伏笔应为回收
  181. extra: 条目库,
  182. })
  183. try {
  184. assert.ok(条目issues(r).some((i) => i.blocking && i.description.includes('合法动词')))
  185. } finally {
  186. await cleanup()
  187. }
  188. })
  189. test('机检 声明行不合「动词 编号」格式 → 阻断', async () => {
  190. const { r, cleanup } = await run(正常正文, {
  191. fm: declFm('伏笔:\n - 伏笔-001'),
  192. extra: 条目库,
  193. })
  194. try {
  195. assert.ok(条目issues(r).some((i) => i.blocking && i.description.includes('动词 编号')))
  196. } finally {
  197. await cleanup()
  198. }
  199. })
  200. // —— 体检消费两候选(M5.5:高频意象命中 + 句式偏离 vs 基线指纹,均非阻断)——
  201. // 千字文选段:字字不重,按长度切句不会误触「复读」检查
  202. const 字池 =
  203. '天地玄黄宇宙洪荒日月盈昃辰宿列张寒来暑往秋收冬藏闰余成岁律吕调阳云腾致雨露结为霜金生丽水玉出昆冈剑号巨阙珠称夜光果珍李柰菜重芥姜海咸河淡鳞潜羽翔龙师火帝鸟官人皇始制文字乃服衣裳推位让国有虞陶唐吊民伐罪周发殷汤坐朝问道垂拱平章爱育黎首臣伏戎羌遐迩一体率宾归王鸣凤在竹白驹食场'
  204. function sentencesOfLengths(lengths) {
  205. let pos = 0
  206. const parts = []
  207. for (const n of lengths) {
  208. parts.push(字池.slice(pos, pos + n))
  209. pos += n
  210. }
  211. return parts.join('。') + '。'
  212. }
  213. async function runWithCache(draftBody, { extra, seed } = {}) {
  214. const { ctx, cleanup } = await repoCtx(null, files(draftBody, { extra }))
  215. try {
  216. if (seed) await seed(ctx)
  217. const draftPath = path.join(ctx.repoPath, '工作区', '草稿-A.md')
  218. const r = await mechanicalCheck(ctx, { chapterNum: 3, draftPath })
  219. return { r, cleanup }
  220. } catch (e) {
  221. await cleanup()
  222. throw e
  223. }
  224. }
  225. const 基线指纹 = (avg, variance) => (ctx) =>
  226. ctx.cache.run(
  227. "INSERT INTO fingerprints (chapter_range_start, chapter_range_end, is_baseline, avg_sentence_length, sentence_length_variance, avg_paragraph_length, common_phrase_frequency, vocabulary_richness, fingerprint_data) VALUES (1, 2, 1, ?, ?, 20, '{}', 0.5, '{}')",
  228. [avg, variance]
  229. )
  230. const 目标字数 = (n) => ({ 'book.yaml': `spec_version: "7.0"\n书名: 测\n每章目标字数: ${n}\n` })
  231. test('机检 高频意象命中(体检缓存)→ 候选非阻断,pass 不受影响', async () => {
  232. const seed = (ctx) =>
  233. ctx.cache.run("INSERT OR REPLACE INTO meta (key, value) VALUES ('imagery_top', ?)", [
  234. JSON.stringify([
  235. { phrase: '空气仿佛凝固', count: 47, chapterCount: 12, firstChapter: 3, lastChapter: 40 },
  236. ]),
  237. ])
  238. const { r, cleanup } = await runWithCache(
  239. '林晚推门而入,空气仿佛凝固。她环视四周缓缓落座,空气仿佛凝固,无人开口说话,落针可闻此时无声。',
  240. { seed }
  241. )
  242. try {
  243. const c = r.candidates.find((x) => x.type === '高频意象')
  244. assert.ok(c, JSON.stringify(r.candidates))
  245. assert.equal(c.value, '空气仿佛凝固')
  246. assert.match(c.description, /全书已用 47 次,本章又用 2 次/)
  247. assert.equal(r.pass, true, JSON.stringify(r.issues))
  248. } finally {
  249. await cleanup()
  250. }
  251. })
  252. test('机检 无体检数据 → 高频意象/句式偏离静默跳过', async () => {
  253. const { r, cleanup } = await run(正常正文)
  254. try {
  255. assert.ok(!r.candidates.some((x) => x.type === '高频意象' || x.type === '句式偏离'))
  256. } finally {
  257. await cleanup()
  258. }
  259. })
  260. test('机检 句式偏离边界:平均句长偏 29% 不报', async () => {
  261. const body = sentencesOfLengths([12, 13, 13, 13, 13, 13, 13, 13, 13, 13]) // 均 12.9,基线 10
  262. const { r, cleanup } = await runWithCache(body, { extra: 目标字数(130), seed: 基线指纹(10, 0) })
  263. try {
  264. assert.ok(!r.candidates.some((x) => x.type === '句式偏离'), JSON.stringify(r.candidates))
  265. } finally {
  266. await cleanup()
  267. }
  268. })
  269. test('机检 句式偏离边界:平均句长偏 31% 报(非阻断)', async () => {
  270. const body = sentencesOfLengths([13, 13, 13, 13, 13, 13, 13, 13, 13, 14]) // 均 13.1,基线 10
  271. const { r, cleanup } = await runWithCache(body, { extra: 目标字数(130), seed: 基线指纹(10, 0) })
  272. try {
  273. const c = r.candidates.find((x) => x.type === '句式偏离' && x.value === '平均句长')
  274. assert.ok(c, JSON.stringify(r.candidates))
  275. assert.match(c.description, /偏了 31%/)
  276. assert.ok(!r.issues.some((i) => i.check === '句式偏离'), '句式偏离只进候选不进 issues')
  277. } finally {
  278. await cleanup()
  279. }
  280. })
  281. test('机检 句长方差偏离 ≥50% 报,平均句长未偏不误报', async () => {
  282. const body = sentencesOfLengths([10, 14]) // 均 12 与基线持平;方差 4 vs 基线 1
  283. const { r, cleanup } = await runWithCache(body, { extra: 目标字数(25), seed: 基线指纹(12, 1) })
  284. try {
  285. const c = r.candidates.find((x) => x.type === '句式偏离' && x.value === '句长方差')
  286. assert.ok(c, JSON.stringify(r.candidates))
  287. assert.ok(!r.candidates.some((x) => x.type === '句式偏离' && x.value === '平均句长'))
  288. } finally {
  289. await cleanup()
  290. }
  291. })