haiany
/
webnovel-writer
mirror de https://github.com/lingfengQAQ/webnovel-writer.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
							import { promises as fs } from 'node:fs'
import path from 'node:path'
import { parseFrontMatter } from '../storage/parsers/front-matter.js'
import { BookConfigReader } from '../storage/adapters/BookConfigReader.js'
import { parseThreadDeclarations, VERBS, OPENING_VERBS } from '../util/thread-declarations.js'
import { styleMetrics } from '../style-stats/index.js'

// front matter 章档案必填字段（§4.1 机器消费部分）
const REQUIRED_FM = ['章号', '标题', '卷', '字数', '章定位', '钩子', '情绪定位']

// 句式偏离容差（vs 基线指纹；硬编码合理默认，候选只提醒不拦截）
const AVG_LEN_TOLERANCE = 0.3
const VARIANCE_TOLERANCE = 0.5

/**
 * 机检：零 token 可计数项（D2 七项 + 条目变动形式检查，spec 0.9 §8 第 5 步）。
 * 不过关（pass=false）= 存在阻断 issue。新专名/信息差关键词/高频意象/句式偏离只出候选
 * （candidates），不拦截。高频意象与句式偏离消费体检缓存（meta 清单/基线指纹），
 * 体检产出、机检消费——机检不做全书扫描。
 * @param {{repoPath: string, cache: object}} ctx
 * @param {{chapterNum: number, draftPath: string}} args
 * @returns {Promise<{ok: boolean, pass: boolean, issues: object[], candidates: object[], error: string}>}
 */
export async function mechanicalCheck(ctx, { chapterNum, draftPath }) {
  try {
    const { repoPath, cache } = ctx
    const raw = await fs.readFile(draftPath, 'utf8')
    const parsed = parseFrontMatter(raw)
    const body = parsed.ok ? parsed.body : raw
    const fm = parsed.ok ? parsed.data : {}

    const config = await new BookConfigReader(repoPath).read()
    const bookConfig = config.ok ? config.data : {}
    const style = await readStyleRules(repoPath)

    const issues = []
    const candidates = []

    checkWordCount(body, bookConfig, issues) // 1
    checkBannedWords(body, style.禁词, issues) // 2
    checkBannedPatterns(body, style.禁句式, issues) // 3
    checkRepetition(body, issues) // 4
    await checkNewProperNouns(body, cache, candidates) // 5（候选）
    checkFrontMatter(parsed, fm, issues) // 6
    await checkSecretKeywords(body, cache, candidates) // 7（候选）
    await checkThreadDeclarations(fm, cache, issues) // 8（条目变动，只查形式）
    await checkImageryHits(body, cache, candidates) // 9（候选，消费体检的高频意象清单）
    await checkStyleDeviation(body, cache, candidates) // 10（候选，vs 基线指纹）

    return { ok: true, pass: issues.length === 0, issues, candidates, error: '' }
  } catch (err) {
    return { ok: false, pass: false, issues: [], candidates: [], error: `机检失败：${err.message}` }
  }
}

async function readStyleRules(repoPath) {
  try {
    const content = await fs.readFile(path.join(repoPath, '文风', '文风铁律.md'), 'utf8')
    const parsed = parseFrontMatter(content)
    if (parsed.ok) {
      return { 禁词: parsed.data.禁词 || [], 禁句式: parsed.data.禁句式 || [] }
    }
  } catch {
    // 无文风铁律
  }
  return { 禁词: [], 禁句式: [] }
}

function checkWordCount(body, config, issues) {
  const target = config.每章目标字数 || 3000
  const tol = 0.3
  const count = [...body.replace(/\s+/g, '')].length
  if (count < target * (1 - tol)) {
    issues.push({ check: '字数', severity: 'medium', blocking: true, description: `字数 ${count} 低于目标 ${target} 下限` })
  } else if (count > target * (1 + tol)) {
    issues.push({ check: '字数', severity: 'medium', blocking: true, description: `字数 ${count} 高于目标 ${target} 上限` })
  }
}

function checkBannedWords(body, banned, issues) {
  for (const w of banned) {
    if (w && body.includes(w)) {
      issues.push({ check: '禁词', severity: 'high', blocking: true, description: `命中禁词「${w}」` })
    }
  }
}

function checkBannedPatterns(body, patterns, issues) {
  for (const p of patterns) {
    if (!p) continue
    try {
      if (new RegExp(p).test(body)) {
        issues.push({ check: '禁句式', severity: 'high', blocking: true, description: `命中禁句式 /${p}/` })
      }
    } catch {
      // 非法正则跳过（文风铁律里写错不该崩机检）
    }
  }
}

function checkRepetition(body, issues) {
  const text = body.replace(/\s+/g, '')
  const L = 6
  const threshold = 3
  if (text.length < L) return
  const counts = new Map()
  for (let i = 0; i + L <= text.length; i++) {
    const g = text.slice(i, i + L)
    counts.set(g, (counts.get(g) || 0) + 1)
  }
  for (const [g, c] of counts) {
    if (c >= threshold) {
      issues.push({ check: '复读', severity: 'medium', blocking: true, description: `短语「${g}」重复 ${c} 次` })
      break
    }
  }
}

// 保守启发式：对话提示词（道/说/问…）前的 2-3 字 Han 视作疑似人名，比对名册（非阻断候选）
async function checkNewProperNouns(body, cache, candidates) {
  const known = new Set()
  try {
    for (const e of await cache.query('SELECT id FROM entities')) known.add(e.id)
    for (const a of await cache.query('SELECT alias FROM entity_aliases')) known.add(a.alias)
  } catch {
    // 无缓存，跳过
  }
  const seen = new Set()
  const re = /([一-龥]{2,3})(冷笑道|笑道|喝道|说道|问道|答道|道|说|喊|问)/g
  let m
  while ((m = re.exec(body))) {
    const name = m[1]
    if (!known.has(name) && !seen.has(name)) {
      seen.add(name)
      candidates.push({
        type: '新专名',
        value: name,
        description: `正文出现疑似新专名「${name}」，名册未登记，请确认（新实体 or 笔误）`,
      })
    }
  }
}

async function checkSecretKeywords(body, cache, candidates) {
  let secrets = []
  try {
    secrets = await cache.query('SELECT id, keywords FROM secrets WHERE reader_knows = 0')
  } catch {
    return
  }
  for (const s of secrets) {
    let kws = []
    try {
      kws = JSON.parse(s.keywords || '[]')
    } catch {
      kws = []
    }
    for (const kw of kws) {
      if (kw && body.includes(kw)) {
        candidates.push({
          type: '信息差候选',
          value: s.id,
          description: `正文出现信息差「${s.id}」关键词「${kw}」，疑似泄密候选（不拦截，请人工确认）`,
        })
        break
      }
    }
  }
}

function checkFrontMatter(parsed, fm, issues) {
  if (!parsed.ok) {
    issues.push({ check: 'front matter', severity: 'high', blocking: true, description: `front matter 解析失败：${parsed.error}` })
    return
  }
  const missing = REQUIRED_FM.filter(
    (k) => !(k in fm) || fm[k] === '' || fm[k] === null || fm[k] === undefined
  )
  if (missing.length) {
    issues.push({ check: 'front matter', severity: 'high', blocking: true, description: `front matter 缺字段：${missing.join('、')}` })
  }
}

// 条目变动形式检查（spec 0.9 §8 第 5 步；查 threads 表，零语义）：
// ①类型一致 ②开启类动词不得撞已有编号 ③非开启动词要求条目存在且状态=进行
async function checkThreadDeclarations(fm, cache, issues) {
  const { declarations, malformed } = parseThreadDeclarations(fm)
  for (const bad of malformed) {
    issues.push({ check: '条目变动', severity: 'high', blocking: true, description: `条目声明格式应为「动词 编号」：${bad}` })
  }
  if (!declarations.length) return

  const known = new Map()
  try {
    for (const t of await cache.query('SELECT id, status FROM threads')) known.set(t.id, t.status)
  } catch {
    return // 无缓存，跳过（形式检查依赖条目表）
  }

  for (const d of declarations) {
    if (!d.id.startsWith(`${d.type}-`)) {
      issues.push({ check: '条目变动', severity: 'high', blocking: true, description: `「${d.type}」清单里出现异类编号「${d.id}」` })
      continue
    }
    if (!VERBS[d.type].includes(d.verb)) {
      issues.push({ check: '条目变动', severity: 'high', blocking: true, description: `「${d.type}」没有动词「${d.verb}」（${d.raw}），合法动词：${VERBS[d.type].join('/')}` })
      continue
    }
    const status = known.get(d.id)
    if (OPENING_VERBS.has(d.verb)) {
      if (status !== undefined) {
        issues.push({ check: '条目变动', severity: 'high', blocking: true, description: `「${d.raw}」：${d.id} 已存在（状态：${status}），开新条目须用新编号` })
      }
    } else if (status === undefined) {
      issues.push({ check: '条目变动', severity: 'high', blocking: true, description: `「${d.raw}」：${d.id} 不存在，疑似编号笔误` })
    } else if (status !== '进行') {
      issues.push({ check: '条目变动', severity: 'high', blocking: true, description: `「${d.raw}」：${d.id} 状态是「${status}」，不能再「${d.verb}」` })
    }
  }
}

// 体检产出的跨章高频意象清单（meta imagery_top）：本章草稿命中 → 非阻断提醒；未体检过 → 静默跳过
async function checkImageryHits(body, cache, candidates) {
  let top = []
  try {
    const rows = await cache.query("SELECT value FROM meta WHERE key = 'imagery_top'")
    top = JSON.parse(rows[0]?.value || '[]')
  } catch {
    return
  }
  for (const t of top) {
    if (!t?.phrase) continue
    const hits = body.split(t.phrase).length - 1
    if (hits > 0) {
      candidates.push({
        type: '高频意象',
        value: t.phrase,
        description: `「${t.phrase}」全书已用 ${t.count} 次，本章又用 ${hits} 次，建议换个写法`,
      })
    }
  }
}

// 本章句式 vs 基线指纹（体检 upsert 的基线行）：平均句长偏 ≥30% 或句长方差偏 ≥50% → 非阻断提醒；无基线 → 静默跳过
async function checkStyleDeviation(body, cache, candidates) {
  let base = null
  try {
    const rows = await cache.query(
      'SELECT avg_sentence_length, sentence_length_variance FROM fingerprints WHERE is_baseline = 1 ORDER BY chapter_range_end DESC LIMIT 1'
    )
    base = rows[0] || null
  } catch {
    return
  }
  if (!base) return
  const m = styleMetrics(body)
  if (base.avg_sentence_length > 0) {
    const dev = (m.平均句长 - base.avg_sentence_length) / base.avg_sentence_length
    if (Math.abs(dev) >= AVG_LEN_TOLERANCE) {
      candidates.push({
        type: '句式偏离',
        value: '平均句长',
        description: `本章平均句长 ${m.平均句长.toFixed(1)} 字，基线 ${base.avg_sentence_length.toFixed(1)} 字，偏了 ${Math.round(Math.abs(dev) * 100)}%，句子比基线明显${dev > 0 ? '变长' : '变短'}`,
      })
    }
  }
  if (base.sentence_length_variance > 0) {
    const dev = (m.句长方差 - base.sentence_length_variance) / base.sentence_length_variance
    if (Math.abs(dev) >= VARIANCE_TOLERANCE) {
      candidates.push({
        type: '句式偏离',
        value: '句长方差',
        description: `本章句长方差 ${m.句长方差.toFixed(1)}，基线 ${base.sentence_length_variance.toFixed(1)}，偏了 ${Math.round(Math.abs(dev) * 100)}%，句子长短比基线${dev > 0 ? '更参差' : '更齐整'}`,
      })
    }
  }
}