2 ماه پیش · caa7fe644a
--- a/webnovel-writer/scripts/data_modules/tests/test_prompt_integrity.py
+++ b/webnovel-writer/scripts/data_modules/tests/test_prompt_integrity.py
@@ -0,0 +1,236 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Prompt 完整性静态校验。
			
 
				+
			
 
				+验证 agents/*.md 和 skills/*/SKILL.md 的结构、引用、CLI 命令等，
			
 
				+不需要 LLM 调用，可加入 CI。
			
 
				+"""
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import re
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# 基础路径
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+PLUGIN_ROOT = Path(__file__).resolve().parent.parent.parent.parent
			
 
				+AGENTS_DIR = PLUGIN_ROOT / "agents"
			
 
				+SKILLS_DIR = PLUGIN_ROOT / "skills"
			
 
				+REFERENCES_DIR = PLUGIN_ROOT / "references"
			
 
				+SCRIPTS_DIR = PLUGIN_ROOT / "scripts"
			
 
				+
			
 
				+AGENT_FILES = sorted(AGENTS_DIR.glob("*.md"))
			
 
				+SKILL_FILES = sorted(SKILLS_DIR.glob("*/SKILL.md"))
			
 
				+ALL_PROMPT_FILES = AGENT_FILES + SKILL_FILES
			
 
				+
			
 
				+# webnovel.py 注册的子命令（从 add_parser 提取）
			
 
				+REGISTERED_CLI_SUBCOMMANDS = {
			
 
				+    "where", "preflight", "use",
			
 
				+    "index", "state", "rag", "style", "entity", "context", "memory",
			
 
				+    "migrate", "status", "update-state", "backup", "archive",
			
 
				+    "init", "extract-context", "memory-contract", "review-pipeline",
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Helpers
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+def _read_text(path: Path) -> str:
			
 
				+    return path.read_text(encoding="utf-8")
			
 
				+
			
 
				+
			
 
				+def _extract_frontmatter(text: str) -> dict:
			
 
				+    """提取 YAML frontmatter 为 dict。"""
			
 
				+    m = re.match(r"^---\s*\n(.*?)\n---", text, re.DOTALL)
			
 
				+    if not m:
			
 
				+        return {}
			
 
				+    result = {}
			
 
				+    for line in m.group(1).splitlines():
			
 
				+        if ":" in line:
			
 
				+            key, _, value = line.partition(":")
			
 
				+            result[key.strip()] = value.strip()
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def _extract_referenced_paths(text: str, base_dir: Path) -> list[tuple[str, Path]]:
			
 
				+    """从 markdown 中提取被引用的文件路径（references/, skills/, agents/ 等）。
			
 
				+
			
 
				+    返回 (raw_ref, resolved_path) 列表。
			
 
				+    """
			
 
				+    refs = []
			
 
				+    # 匹配 `references/xxx.md`、`../../references/xxx.md`、`skills/xxx` 等相对路径
			
 
				+    for m in re.finditer(r'[`"]((?:\.\./)*(?:references|skills|agents)/[^\s`"]+\.md)[`"]', text):
			
 
				+        raw = m.group(1)
			
 
				+        resolved = (base_dir / raw).resolve()
			
 
				+        refs.append((raw, resolved))
			
 
				+    # 匹配 references 段落中列出的路径（不带引号）
			
 
				+    for m in re.finditer(r'^- `((?:\.\./)*(?:references|skills|agents)/[^\s`]+\.md)`', text, re.MULTILINE):
			
 
				+        raw = m.group(1)
			
 
				+        resolved = (base_dir / raw).resolve()
			
 
				+        refs.append((raw, resolved))
			
 
				+    return refs
			
 
				+
			
 
				+
			
 
				+def _extract_cli_subcommands(text: str) -> list[str]:
			
 
				+    """从 prompt 中提取 webnovel.py 调用的子命令。"""
			
 
				+    cmds = set()
			
 
				+    for m in re.finditer(r'webnovel\.py["\s]+--project-root\s+[^\s]+\s+([a-z][\w-]*)', text):
			
 
				+        cmd = m.group(1)
			
 
				+        cmds.add(cmd)
			
 
				+    return sorted(cmds)
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# 1. Frontmatter 完整性
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+@pytest.mark.parametrize("agent_file", AGENT_FILES, ids=lambda f: f.name)
			
 
				+def test_agent_frontmatter_complete(agent_file: Path):
			
 
				+    """每个 agent 必须有 name, description, tools。"""
			
 
				+    fm = _extract_frontmatter(_read_text(agent_file))
			
 
				+    assert "name" in fm, f"{agent_file.name}: 缺少 name"
			
 
				+    assert "description" in fm, f"{agent_file.name}: 缺少 description"
			
 
				+    assert "tools" in fm, f"{agent_file.name}: 缺少 tools"
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize("skill_file", SKILL_FILES, ids=lambda f: f.parent.name)
			
 
				+def test_skill_frontmatter_complete(skill_file: Path):
			
 
				+    """每个 skill 必须有 name, description。"""
			
 
				+    fm = _extract_frontmatter(_read_text(skill_file))
			
 
				+    assert "name" in fm, f"{skill_file.parent.name}: 缺少 name"
			
 
				+    assert "description" in fm, f"{skill_file.parent.name}: 缺少 description"
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# 2. Agent 模板结构（9 段）
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+EXPECTED_AGENT_SECTIONS = [
			
 
				+    "1. 身份与目标",
			
 
				+    "2. 可用工具",
			
 
				+    "3. 思维链",
			
 
				+    "4. 输入",
			
 
				+    "5. 执行流程",
			
 
				+    "6. 边界与禁区",
			
 
				+    "7. 检查清单",
			
 
				+    "8. 输出格式",
			
 
				+    "9. 错误处理",
			
 
				+]
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize("agent_file", AGENT_FILES, ids=lambda f: f.name)
			
 
				+def test_agent_template_structure(agent_file: Path):
			
 
				+    """每个 agent 必须包含 9 个编号段。"""
			
 
				+    text = _read_text(agent_file)
			
 
				+    missing = []
			
 
				+    for section in EXPECTED_AGENT_SECTIONS:
			
 
				+        # 匹配 "## 1. 身份与目标" 或 "## 2. 可用工具与脚本"（允许后缀）
			
 
				+        pattern = rf"^## {re.escape(section)}"
			
 
				+        if not re.search(pattern, text, re.MULTILINE):
			
 
				+            missing.append(section)
			
 
				+    assert not missing, f"{agent_file.name}: 缺少段落 {missing}"
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# 3. 引用完整性
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+@pytest.mark.parametrize("prompt_file", ALL_PROMPT_FILES, ids=lambda f: f.name)
			
 
				+def test_all_references_exist(prompt_file: Path):
			
 
				+    """prompt 中引用的所有文件路径都必须真实存在。"""
			
 
				+    text = _read_text(prompt_file)
			
 
				+    base_dir = prompt_file.parent
			
 
				+    refs = _extract_referenced_paths(text, base_dir)
			
 
				+    missing = []
			
 
				+    for raw, resolved in refs:
			
 
				+        if not resolved.exists():
			
 
				+            missing.append(raw)
			
 
				+    assert not missing, f"{prompt_file.name}: 引用了不存在的文件 {missing}"
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# 4. CLI 命令有效性
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+@pytest.mark.parametrize("prompt_file", ALL_PROMPT_FILES, ids=lambda f: f.name)
			
 
				+def test_cli_commands_valid(prompt_file: Path):
			
 
				+    """prompt 中的 webnovel.py 子命令都必须在 CLI 注册表中。"""
			
 
				+    text = _read_text(prompt_file)
			
 
				+    cmds = _extract_cli_subcommands(text)
			
 
				+    # 排除已知例外（如 webnovel-review 的 workflow 命令待重构）
			
 
				+    skill_name = prompt_file.parent.name
			
 
				+    exceptions = _KNOWN_CLI_EXCEPTIONS.get(skill_name, set())
			
 
				+    invalid = [c for c in cmds if c not in REGISTERED_CLI_SUBCOMMANDS and c not in exceptions]
			
 
				+    assert not invalid, f"{prompt_file.name}: 使用了未注册的 CLI 子命令 {invalid}"
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# 5. Review Schema 一致性
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+def test_review_schema_consistency():
			
 
				+    """reviewer.md 输出格式中的字段必须与 review_schema.py 定义匹配。"""
			
 
				+    reviewer_text = _read_text(AGENTS_DIR / "reviewer.md")
			
 
				+
			
 
				+    # 从 reviewer.md 的 JSON 示例中提取 issue 字段
			
 
				+    issue_fields_in_prompt = set()
			
 
				+    json_block = re.search(r'"issues":\s*\[\s*\{([^}]+)\}', reviewer_text, re.DOTALL)
			
 
				+    if json_block:
			
 
				+        for m in re.finditer(r'"(\w+)":', json_block.group(1)):
			
 
				+            issue_fields_in_prompt.add(m.group(1))
			
 
				+
			
 
				+    # 从 review_schema.py 提取 ReviewIssue 字段
			
 
				+    schema_path = SCRIPTS_DIR / "data_modules" / "review_schema.py"
			
 
				+    schema_text = _read_text(schema_path)
			
 
				+    schema_fields = set()
			
 
				+    in_review_issue = False
			
 
				+    for line in schema_text.splitlines():
			
 
				+        if "class ReviewIssue" in line:
			
 
				+            in_review_issue = True
			
 
				+            continue
			
 
				+        if in_review_issue:
			
 
				+            if line.strip().startswith("class ") or line.strip().startswith("def "):
			
 
				+                break
			
 
				+            m = re.match(r"\s+(\w+):\s+", line)
			
 
				+            if m:
			
 
				+                schema_fields.add(m.group(1))
			
 
				+
			
 
				+    # reviewer prompt 中的字段应该是 schema 字段的子集
			
 
				+    assert issue_fields_in_prompt, "无法从 reviewer.md 提取 issue 字段"
			
 
				+    assert schema_fields, "无法从 review_schema.py 提取字段"
			
 
				+    extra = issue_fields_in_prompt - schema_fields
			
 
				+    assert not extra, f"reviewer.md 中有字段不在 review_schema.py 中: {extra}"
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# 6. 无残留引用（已删文件）
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+KNOWN_DELETED_FILES = [
			
 
				+    "step-1.5-contract.md",
			
 
				+    "step-3-review-gate.md",
			
 
				+    "step-5-debt-switch.md",
			
 
				+    "workflow-details.md",
			
 
				+    "checker-output-schema.md",
			
 
				+    "workflow_manager.py",
			
 
				+    "webnovel-resume",
			
 
				+]
			
 
				+
			
 
				+# webnovel-review SKILL.md 仍有大量 workflow 命令，需单独重构（Phase 1 遗漏）
			
 
				+_KNOWN_CLI_EXCEPTIONS = {
			
 
				+    "webnovel-review": {"workflow"},
			
 
				+}
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize("prompt_file", ALL_PROMPT_FILES, ids=lambda f: f.name)
			
 
				+def test_no_stale_references(prompt_file: Path):
			
 
				+    """不得引用已知已删除的文件。"""
			
 
				+    text = _read_text(prompt_file)
			
 
				+    found = [name for name in KNOWN_DELETED_FILES if name in text]
			
 
				+    assert not found, f"{prompt_file.name}: 残留引用已删除文件 {found}"