#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Prompt 完整性静态校验。 验证 agents/*.md 和 skills/*/SKILL.md 的结构、引用、CLI 命令等, 不需要 LLM 调用,可加入 CI。 """ from __future__ import annotations import re import sys from pathlib import Path import pytest # --------------------------------------------------------------------------- # 基础路径 # --------------------------------------------------------------------------- PLUGIN_ROOT = Path(__file__).resolve().parent.parent.parent.parent AGENTS_DIR = PLUGIN_ROOT / "agents" SKILLS_DIR = PLUGIN_ROOT / "skills" REFERENCES_DIR = PLUGIN_ROOT / "references" SCRIPTS_DIR = PLUGIN_ROOT / "scripts" AGENT_FILES = sorted(AGENTS_DIR.glob("*.md")) SKILL_FILES = sorted(SKILLS_DIR.glob("*/SKILL.md")) ALL_PROMPT_FILES = AGENT_FILES + SKILL_FILES # webnovel.py 注册的子命令(从 add_parser 提取) REGISTERED_CLI_SUBCOMMANDS = { "where", "preflight", "use", "index", "state", "rag", "style", "entity", "context", "memory", "migrate", "status", "update-state", "backup", "archive", "init", "extract-context", "memory-contract", "review-pipeline", } # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _read_text(path: Path) -> str: return path.read_text(encoding="utf-8") def _extract_frontmatter(text: str) -> dict: """提取 YAML frontmatter 为 dict。""" m = re.match(r"^---\s*\n(.*?)\n---", text, re.DOTALL) if not m: return {} result = {} for line in m.group(1).splitlines(): if ":" in line: key, _, value = line.partition(":") result[key.strip()] = value.strip() return result def _extract_referenced_paths(text: str, base_dir: Path) -> list[tuple[str, Path]]: """从 markdown 中提取被引用的文件路径(references/, skills/, agents/ 等)。 返回 (raw_ref, resolved_path) 列表。 """ refs = [] # 匹配 `references/xxx.md`、`../../references/xxx.md`、`skills/xxx` 等相对路径 for m in re.finditer(r'[`"]((?:\.\./)*(?:references|skills|agents)/[^\s`"]+\.md)[`"]', text): raw = m.group(1) resolved = (base_dir / raw).resolve() refs.append((raw, resolved)) # 匹配 references 段落中列出的路径(不带引号) for m in re.finditer(r'^- `((?:\.\./)*(?:references|skills|agents)/[^\s`]+\.md)`', text, re.MULTILINE): raw = m.group(1) resolved = (base_dir / raw).resolve() refs.append((raw, resolved)) return refs def _extract_cli_subcommands(text: str) -> list[str]: """从 prompt 中提取 webnovel.py 调用的子命令。""" cmds = set() for m in re.finditer(r'webnovel\.py["\s]+--project-root\s+[^\s]+\s+([a-z][\w-]*)', text): cmd = m.group(1) cmds.add(cmd) return sorted(cmds) # --------------------------------------------------------------------------- # 1. Frontmatter 完整性 # --------------------------------------------------------------------------- @pytest.mark.parametrize("agent_file", AGENT_FILES, ids=lambda f: f.name) def test_agent_frontmatter_complete(agent_file: Path): """每个 agent 必须有 name, description, tools。""" fm = _extract_frontmatter(_read_text(agent_file)) assert "name" in fm, f"{agent_file.name}: 缺少 name" assert "description" in fm, f"{agent_file.name}: 缺少 description" assert "tools" in fm, f"{agent_file.name}: 缺少 tools" @pytest.mark.parametrize("skill_file", SKILL_FILES, ids=lambda f: f.parent.name) def test_skill_frontmatter_complete(skill_file: Path): """每个 skill 必须有 name, description。""" fm = _extract_frontmatter(_read_text(skill_file)) assert "name" in fm, f"{skill_file.parent.name}: 缺少 name" assert "description" in fm, f"{skill_file.parent.name}: 缺少 description" # --------------------------------------------------------------------------- # 2. Agent 模板结构(9 段) # --------------------------------------------------------------------------- EXPECTED_AGENT_SECTIONS = [ "1. 身份与目标", "2. 可用工具", "3. 思维链", "4. 输入", "5. 执行流程", "6. 边界与禁区", "7. 检查清单", "8. 输出格式", "9. 错误处理", ] @pytest.mark.parametrize("agent_file", AGENT_FILES, ids=lambda f: f.name) def test_agent_template_structure(agent_file: Path): """每个 agent 必须包含 9 个编号段。""" text = _read_text(agent_file) missing = [] for section in EXPECTED_AGENT_SECTIONS: # 匹配 "## 1. 身份与目标" 或 "## 2. 可用工具与脚本"(允许后缀) pattern = rf"^## {re.escape(section)}" if not re.search(pattern, text, re.MULTILINE): missing.append(section) assert not missing, f"{agent_file.name}: 缺少段落 {missing}" # --------------------------------------------------------------------------- # 3. 引用完整性 # --------------------------------------------------------------------------- @pytest.mark.parametrize("prompt_file", ALL_PROMPT_FILES, ids=lambda f: f.name) def test_all_references_exist(prompt_file: Path): """prompt 中引用的所有文件路径都必须真实存在。""" text = _read_text(prompt_file) base_dir = prompt_file.parent refs = _extract_referenced_paths(text, base_dir) missing = [] for raw, resolved in refs: if not resolved.exists(): missing.append(raw) assert not missing, f"{prompt_file.name}: 引用了不存在的文件 {missing}" # --------------------------------------------------------------------------- # 4. CLI 命令有效性 # --------------------------------------------------------------------------- @pytest.mark.parametrize("prompt_file", ALL_PROMPT_FILES, ids=lambda f: f.name) def test_cli_commands_valid(prompt_file: Path): """prompt 中的 webnovel.py 子命令都必须在 CLI 注册表中。""" text = _read_text(prompt_file) cmds = _extract_cli_subcommands(text) # 排除已知例外(如 webnovel-review 的 workflow 命令待重构) skill_name = prompt_file.parent.name exceptions = _KNOWN_CLI_EXCEPTIONS.get(skill_name, set()) invalid = [c for c in cmds if c not in REGISTERED_CLI_SUBCOMMANDS and c not in exceptions] assert not invalid, f"{prompt_file.name}: 使用了未注册的 CLI 子命令 {invalid}" # --------------------------------------------------------------------------- # 5. Review Schema 一致性 # --------------------------------------------------------------------------- def test_review_schema_consistency(): """reviewer.md 输出格式中的字段必须与 review_schema.py 定义匹配。""" reviewer_text = _read_text(AGENTS_DIR / "reviewer.md") # 从 reviewer.md 的 JSON 示例中提取 issue 字段 issue_fields_in_prompt = set() json_block = re.search(r'"issues":\s*\[\s*\{([^}]+)\}', reviewer_text, re.DOTALL) if json_block: for m in re.finditer(r'"(\w+)":', json_block.group(1)): issue_fields_in_prompt.add(m.group(1)) # 从 review_schema.py 提取 ReviewIssue 字段 schema_path = SCRIPTS_DIR / "data_modules" / "review_schema.py" schema_text = _read_text(schema_path) schema_fields = set() in_review_issue = False for line in schema_text.splitlines(): if "class ReviewIssue" in line: in_review_issue = True continue if in_review_issue: if line.strip().startswith("class ") or line.strip().startswith("def "): break m = re.match(r"\s+(\w+):\s+", line) if m: schema_fields.add(m.group(1)) # reviewer prompt 中的字段应该是 schema 字段的子集 assert issue_fields_in_prompt, "无法从 reviewer.md 提取 issue 字段" assert schema_fields, "无法从 review_schema.py 提取字段" extra = issue_fields_in_prompt - schema_fields assert not extra, f"reviewer.md 中有字段不在 review_schema.py 中: {extra}" # --------------------------------------------------------------------------- # 6. 无残留引用(已删文件) # --------------------------------------------------------------------------- KNOWN_DELETED_FILES = [ "step-1.5-contract.md", "step-3-review-gate.md", "step-5-debt-switch.md", "workflow-details.md", "checker-output-schema.md", "workflow_manager.py", "webnovel-resume", ] _KNOWN_CLI_EXCEPTIONS = {} @pytest.mark.parametrize("prompt_file", ALL_PROMPT_FILES, ids=lambda f: f.name) def test_no_stale_references(prompt_file: Path): """不得引用已知已删除的文件。""" text = _read_text(prompt_file) found = [name for name in KNOWN_DELETED_FILES if name in text] assert not found, f"{prompt_file.name}: 残留引用已删除文件 {found}" def test_webnovel_review_skill_uses_unified_reviewer_pipeline(): """webnovel-review 必须与 webnovel-write 使用同一套 reviewer + review-pipeline 链路。""" skill_text = _read_text(SKILLS_DIR / "webnovel-review" / "SKILL.md") assert "`reviewer`" in skill_text assert "review-pipeline" in skill_text assert ".webnovel/tmp/review_results.json" in skill_text assert ".webnovel/tmp/review_metrics.json" in skill_text for legacy_agent in ( "consistency-checker", "continuity-checker", "ooc-checker", "reader-pull-checker", "high-point-checker", "pacing-checker", ): assert legacy_agent not in skill_text assert " workflow " not in skill_text