|
|
@@ -0,0 +1,236 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+"""
|
|
|
+Prompt 完整性静态校验。
|
|
|
+
|
|
|
+验证 agents/*.md 和 skills/*/SKILL.md 的结构、引用、CLI 命令等,
|
|
|
+不需要 LLM 调用,可加入 CI。
|
|
|
+"""
|
|
|
+from __future__ import annotations
|
|
|
+
|
|
|
+import re
|
|
|
+import sys
|
|
|
+from pathlib import Path
|
|
|
+
|
|
|
+import pytest
|
|
|
+
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+# 基础路径
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+
|
|
|
+PLUGIN_ROOT = Path(__file__).resolve().parent.parent.parent.parent
|
|
|
+AGENTS_DIR = PLUGIN_ROOT / "agents"
|
|
|
+SKILLS_DIR = PLUGIN_ROOT / "skills"
|
|
|
+REFERENCES_DIR = PLUGIN_ROOT / "references"
|
|
|
+SCRIPTS_DIR = PLUGIN_ROOT / "scripts"
|
|
|
+
|
|
|
+AGENT_FILES = sorted(AGENTS_DIR.glob("*.md"))
|
|
|
+SKILL_FILES = sorted(SKILLS_DIR.glob("*/SKILL.md"))
|
|
|
+ALL_PROMPT_FILES = AGENT_FILES + SKILL_FILES
|
|
|
+
|
|
|
+# webnovel.py 注册的子命令(从 add_parser 提取)
|
|
|
+REGISTERED_CLI_SUBCOMMANDS = {
|
|
|
+ "where", "preflight", "use",
|
|
|
+ "index", "state", "rag", "style", "entity", "context", "memory",
|
|
|
+ "migrate", "status", "update-state", "backup", "archive",
|
|
|
+ "init", "extract-context", "memory-contract", "review-pipeline",
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+# Helpers
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+
|
|
|
+def _read_text(path: Path) -> str:
|
|
|
+ return path.read_text(encoding="utf-8")
|
|
|
+
|
|
|
+
|
|
|
+def _extract_frontmatter(text: str) -> dict:
|
|
|
+ """提取 YAML frontmatter 为 dict。"""
|
|
|
+ m = re.match(r"^---\s*\n(.*?)\n---", text, re.DOTALL)
|
|
|
+ if not m:
|
|
|
+ return {}
|
|
|
+ result = {}
|
|
|
+ for line in m.group(1).splitlines():
|
|
|
+ if ":" in line:
|
|
|
+ key, _, value = line.partition(":")
|
|
|
+ result[key.strip()] = value.strip()
|
|
|
+ return result
|
|
|
+
|
|
|
+
|
|
|
+def _extract_referenced_paths(text: str, base_dir: Path) -> list[tuple[str, Path]]:
|
|
|
+ """从 markdown 中提取被引用的文件路径(references/, skills/, agents/ 等)。
|
|
|
+
|
|
|
+ 返回 (raw_ref, resolved_path) 列表。
|
|
|
+ """
|
|
|
+ refs = []
|
|
|
+ # 匹配 `references/xxx.md`、`../../references/xxx.md`、`skills/xxx` 等相对路径
|
|
|
+ for m in re.finditer(r'[`"]((?:\.\./)*(?:references|skills|agents)/[^\s`"]+\.md)[`"]', text):
|
|
|
+ raw = m.group(1)
|
|
|
+ resolved = (base_dir / raw).resolve()
|
|
|
+ refs.append((raw, resolved))
|
|
|
+ # 匹配 references 段落中列出的路径(不带引号)
|
|
|
+ for m in re.finditer(r'^- `((?:\.\./)*(?:references|skills|agents)/[^\s`]+\.md)`', text, re.MULTILINE):
|
|
|
+ raw = m.group(1)
|
|
|
+ resolved = (base_dir / raw).resolve()
|
|
|
+ refs.append((raw, resolved))
|
|
|
+ return refs
|
|
|
+
|
|
|
+
|
|
|
+def _extract_cli_subcommands(text: str) -> list[str]:
|
|
|
+ """从 prompt 中提取 webnovel.py 调用的子命令。"""
|
|
|
+ cmds = set()
|
|
|
+ for m in re.finditer(r'webnovel\.py["\s]+--project-root\s+[^\s]+\s+([a-z][\w-]*)', text):
|
|
|
+ cmd = m.group(1)
|
|
|
+ cmds.add(cmd)
|
|
|
+ return sorted(cmds)
|
|
|
+
|
|
|
+
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+# 1. Frontmatter 完整性
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+
|
|
|
+@pytest.mark.parametrize("agent_file", AGENT_FILES, ids=lambda f: f.name)
|
|
|
+def test_agent_frontmatter_complete(agent_file: Path):
|
|
|
+ """每个 agent 必须有 name, description, tools。"""
|
|
|
+ fm = _extract_frontmatter(_read_text(agent_file))
|
|
|
+ assert "name" in fm, f"{agent_file.name}: 缺少 name"
|
|
|
+ assert "description" in fm, f"{agent_file.name}: 缺少 description"
|
|
|
+ assert "tools" in fm, f"{agent_file.name}: 缺少 tools"
|
|
|
+
|
|
|
+
|
|
|
+@pytest.mark.parametrize("skill_file", SKILL_FILES, ids=lambda f: f.parent.name)
|
|
|
+def test_skill_frontmatter_complete(skill_file: Path):
|
|
|
+ """每个 skill 必须有 name, description。"""
|
|
|
+ fm = _extract_frontmatter(_read_text(skill_file))
|
|
|
+ assert "name" in fm, f"{skill_file.parent.name}: 缺少 name"
|
|
|
+ assert "description" in fm, f"{skill_file.parent.name}: 缺少 description"
|
|
|
+
|
|
|
+
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+# 2. Agent 模板结构(9 段)
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+
|
|
|
+EXPECTED_AGENT_SECTIONS = [
|
|
|
+ "1. 身份与目标",
|
|
|
+ "2. 可用工具",
|
|
|
+ "3. 思维链",
|
|
|
+ "4. 输入",
|
|
|
+ "5. 执行流程",
|
|
|
+ "6. 边界与禁区",
|
|
|
+ "7. 检查清单",
|
|
|
+ "8. 输出格式",
|
|
|
+ "9. 错误处理",
|
|
|
+]
|
|
|
+
|
|
|
+
|
|
|
+@pytest.mark.parametrize("agent_file", AGENT_FILES, ids=lambda f: f.name)
|
|
|
+def test_agent_template_structure(agent_file: Path):
|
|
|
+ """每个 agent 必须包含 9 个编号段。"""
|
|
|
+ text = _read_text(agent_file)
|
|
|
+ missing = []
|
|
|
+ for section in EXPECTED_AGENT_SECTIONS:
|
|
|
+ # 匹配 "## 1. 身份与目标" 或 "## 2. 可用工具与脚本"(允许后缀)
|
|
|
+ pattern = rf"^## {re.escape(section)}"
|
|
|
+ if not re.search(pattern, text, re.MULTILINE):
|
|
|
+ missing.append(section)
|
|
|
+ assert not missing, f"{agent_file.name}: 缺少段落 {missing}"
|
|
|
+
|
|
|
+
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+# 3. 引用完整性
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+
|
|
|
+@pytest.mark.parametrize("prompt_file", ALL_PROMPT_FILES, ids=lambda f: f.name)
|
|
|
+def test_all_references_exist(prompt_file: Path):
|
|
|
+ """prompt 中引用的所有文件路径都必须真实存在。"""
|
|
|
+ text = _read_text(prompt_file)
|
|
|
+ base_dir = prompt_file.parent
|
|
|
+ refs = _extract_referenced_paths(text, base_dir)
|
|
|
+ missing = []
|
|
|
+ for raw, resolved in refs:
|
|
|
+ if not resolved.exists():
|
|
|
+ missing.append(raw)
|
|
|
+ assert not missing, f"{prompt_file.name}: 引用了不存在的文件 {missing}"
|
|
|
+
|
|
|
+
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+# 4. CLI 命令有效性
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+
|
|
|
+@pytest.mark.parametrize("prompt_file", ALL_PROMPT_FILES, ids=lambda f: f.name)
|
|
|
+def test_cli_commands_valid(prompt_file: Path):
|
|
|
+ """prompt 中的 webnovel.py 子命令都必须在 CLI 注册表中。"""
|
|
|
+ text = _read_text(prompt_file)
|
|
|
+ cmds = _extract_cli_subcommands(text)
|
|
|
+ # 排除已知例外(如 webnovel-review 的 workflow 命令待重构)
|
|
|
+ skill_name = prompt_file.parent.name
|
|
|
+ exceptions = _KNOWN_CLI_EXCEPTIONS.get(skill_name, set())
|
|
|
+ invalid = [c for c in cmds if c not in REGISTERED_CLI_SUBCOMMANDS and c not in exceptions]
|
|
|
+ assert not invalid, f"{prompt_file.name}: 使用了未注册的 CLI 子命令 {invalid}"
|
|
|
+
|
|
|
+
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+# 5. Review Schema 一致性
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+
|
|
|
+def test_review_schema_consistency():
|
|
|
+ """reviewer.md 输出格式中的字段必须与 review_schema.py 定义匹配。"""
|
|
|
+ reviewer_text = _read_text(AGENTS_DIR / "reviewer.md")
|
|
|
+
|
|
|
+ # 从 reviewer.md 的 JSON 示例中提取 issue 字段
|
|
|
+ issue_fields_in_prompt = set()
|
|
|
+ json_block = re.search(r'"issues":\s*\[\s*\{([^}]+)\}', reviewer_text, re.DOTALL)
|
|
|
+ if json_block:
|
|
|
+ for m in re.finditer(r'"(\w+)":', json_block.group(1)):
|
|
|
+ issue_fields_in_prompt.add(m.group(1))
|
|
|
+
|
|
|
+ # 从 review_schema.py 提取 ReviewIssue 字段
|
|
|
+ schema_path = SCRIPTS_DIR / "data_modules" / "review_schema.py"
|
|
|
+ schema_text = _read_text(schema_path)
|
|
|
+ schema_fields = set()
|
|
|
+ in_review_issue = False
|
|
|
+ for line in schema_text.splitlines():
|
|
|
+ if "class ReviewIssue" in line:
|
|
|
+ in_review_issue = True
|
|
|
+ continue
|
|
|
+ if in_review_issue:
|
|
|
+ if line.strip().startswith("class ") or line.strip().startswith("def "):
|
|
|
+ break
|
|
|
+ m = re.match(r"\s+(\w+):\s+", line)
|
|
|
+ if m:
|
|
|
+ schema_fields.add(m.group(1))
|
|
|
+
|
|
|
+ # reviewer prompt 中的字段应该是 schema 字段的子集
|
|
|
+ assert issue_fields_in_prompt, "无法从 reviewer.md 提取 issue 字段"
|
|
|
+ assert schema_fields, "无法从 review_schema.py 提取字段"
|
|
|
+ extra = issue_fields_in_prompt - schema_fields
|
|
|
+ assert not extra, f"reviewer.md 中有字段不在 review_schema.py 中: {extra}"
|
|
|
+
|
|
|
+
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+# 6. 无残留引用(已删文件)
|
|
|
+# ---------------------------------------------------------------------------
|
|
|
+
|
|
|
+KNOWN_DELETED_FILES = [
|
|
|
+ "step-1.5-contract.md",
|
|
|
+ "step-3-review-gate.md",
|
|
|
+ "step-5-debt-switch.md",
|
|
|
+ "workflow-details.md",
|
|
|
+ "checker-output-schema.md",
|
|
|
+ "workflow_manager.py",
|
|
|
+ "webnovel-resume",
|
|
|
+]
|
|
|
+
|
|
|
+# webnovel-review SKILL.md 仍有大量 workflow 命令,需单独重构(Phase 1 遗漏)
|
|
|
+_KNOWN_CLI_EXCEPTIONS = {
|
|
|
+ "webnovel-review": {"workflow"},
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+@pytest.mark.parametrize("prompt_file", ALL_PROMPT_FILES, ids=lambda f: f.name)
|
|
|
+def test_no_stale_references(prompt_file: Path):
|
|
|
+ """不得引用已知已删除的文件。"""
|
|
|
+ text = _read_text(prompt_file)
|
|
|
+ found = [name for name in KNOWN_DELETED_FILES if name in text]
|
|
|
+ assert not found, f"{prompt_file.name}: 残留引用已删除文件 {found}"
|