Kaynağa Gözat

test(cross-val): adopt artifact_ownership behavior eval + prompt-level ownership assertions (from Codex, adapted to zh wording)

lingfengQAQ 2 hafta önce
ebeveyn
işleme
cd0f509e2d

+ 5 - 0
webnovel-writer/evals/fixtures/behavior/fast.json

@@ -116,6 +116,11 @@
       "type": "data_agent_boundary",
       "description": "data-agent produces commit artifacts and does not directly write projection read-models."
     },
+    {
+      "id": "artifact_ownership",
+      "type": "artifact_ownership",
+      "description": "reviewer returns JSON while main flow writes review_results.json; data-agent writes only tmp artifacts."
+    },
     {
       "id": "commit_drives_projection",
       "type": "commit_projection_runtime",

+ 15 - 0
webnovel-writer/scripts/data_modules/tests/test_prompt_integrity.py

@@ -651,3 +651,18 @@ def test_write_skill_has_readonly_git_diff_change_surface_check():
     assert "diff --check" in text, (
         "write SKILL 缺少 git diff --check 空白/冲突标记校验"
     )
+
+
+# B 类红线(写入所有权·prompt 层):write/review 必须在文本层声明所有权,
+# 与 frontmatter(test_agent_write_ownership_matches_tools_frontmatter)+ behavior eval(artifact_ownership)三处互守。
+def test_write_review_skills_state_artifact_ownership():
+    """reviewer 返回 JSON、主流程落盘 review_results.json、data-agent 唯一写入者。"""
+    write_text = _read_text(SKILLS_DIR / "webnovel-write" / "SKILL.md")
+    review_text = _read_text(SKILLS_DIR / "webnovel-review" / "SKILL.md")
+    for name, text in (("webnovel-write", write_text), ("webnovel-review", review_text)):
+        assert "主流程" in text and ".webnovel/tmp/review_results.json" in text, (
+            f"{name}: 缺 reviewer→主流程落盘 review_results.json 的所有权说明"
+        )
+    assert "唯一写入者" in write_text, "webnovel-write 缺 data-agent 唯一写入者说明"
+    assert "主流程只检查文件存在与 schema" in write_text
+    assert "不直接写 state/index/summaries/memory/vectors/projection" in write_text

+ 30 - 0
webnovel-writer/scripts/run_behavior_evals.py

@@ -152,6 +152,35 @@ def _eval_data_agent_boundary(root: Path, case: dict[str, Any]) -> dict[str, Any
     )
 
 
+def _eval_artifact_ownership(root: Path, case: dict[str, Any]) -> dict[str, Any]:
+    plugin_root = _plugin_root(root)
+    write_text = _read(plugin_root / "skills" / "webnovel-write" / "SKILL.md")
+    review_text = _read(plugin_root / "skills" / "webnovel-review" / "SKILL.md")
+    reviewer_tools = _frontmatter(_read(plugin_root / "agents" / "reviewer.md")).get("tools", "")
+    data_tools = _frontmatter(_read(plugin_root / "agents" / "data-agent.md")).get("tools", "")
+    missing: list[str] = []
+    if "Write" in reviewer_tools:
+        missing.append("reviewer 不应持 Write(review_results.json 由主流程落盘)")
+    if "Write" not in data_tools:
+        missing.append("data-agent 应持 Write(它是 tmp artifact 的唯一写入者)")
+    for text, owner in ((write_text, "webnovel-write"), (review_text, "webnovel-review")):
+        if "主流程" not in text or ".webnovel/tmp/review_results.json" not in text:
+            missing.append(f"{owner}: 缺 reviewer→主流程落盘 review_results.json 的所有权说明")
+    for item in (
+        "唯一写入者",
+        "主流程只检查文件存在与 schema",
+        "不直接写 state/index/summaries/memory/vectors/projection",
+    ):
+        if item not in write_text:
+            missing.append(f"webnovel-write 缺写入所有权红线:{item}")
+    return _result(
+        case,
+        passed=not missing,
+        reason="artifact ownership matches tools and prompts" if not missing else "artifact ownership drifted",
+        evidence=missing or ["reviewer→主流程 review_results.json;data-agent→tmp artifacts"],
+    )
+
+
 def _eval_commit_projection_runtime(root: Path, case: dict[str, Any]) -> dict[str, Any]:
     scripts_dir = _plugin_root(root) / "scripts"
     if str(scripts_dir) not in sys.path:
@@ -206,6 +235,7 @@ EVALUATORS = {
     "skill_contract": _eval_skill_contract,
     "write_blocking_gate": _eval_write_blocking_gate,
     "data_agent_boundary": _eval_data_agent_boundary,
+    "artifact_ownership": _eval_artifact_ownership,
     "commit_projection_runtime": _eval_commit_projection_runtime,
     "dashboard_read_only": _eval_dashboard_read_only,
 }