docs: add evaluator harness config scenario

This commit is contained in:
Affaan Mustafa
2026-05-12 17:48:21 -04:00
committed by Affaan Mustafa
parent cd90c84c32
commit 3dddfc8270
8 changed files with 278 additions and 7 deletions

View File

@@ -0,0 +1,49 @@
# Harness Config Quality Playbook
Candidate id: `adapter-matrix-backed-drift-check`
Use this playbook when a PR, install change, or setup recommendation touches
MCP, plugins, hooks, commands, agents, rules, install targets, or harness
adapter surfaces.
## Accepted Path
1. Identify the touched harness/config surface.
2. Retrieve the adapter state from
`docs/architecture/harness-adapter-compliance.md` or
`scripts/lib/harness-adapter-compliance.js`.
3. Record whether the harness is `Native`, `Adapter-backed`,
`Instruction-backed`, or `Reference-only`.
4. Name the install/onramp path and verification command from the matrix.
5. Preserve existing user and project config by using merge, dry-run, or
explicit no-overwrite behavior.
6. Run the relevant validation gate:
- `npm run harness:adapters -- --check`
- `npm run harness:audit -- --format json`
- `node tests/lib/install-targets.test.js`
- `node tests/opencode-plugin-hooks.test.js`
- `node tests/docs/mcp-management-docs.test.js`
7. Promote a config recommendation only when the evidence matches the harness
state and the config preservation behavior is explicit.
## Rejected Path
Do not claim Claude hook parity for Codex, Gemini, Zed, OpenCode, or other
harnesses unless the adapter matrix and tests prove it.
Do not overwrite `settings.json`, MCP configs, plugin manifests, rule files, or
command surfaces without a merge/dry-run path and a rollback note.
Do not toggle live MCP servers, publish plugins, or edit user-level harness
config from the evaluator run.
## Minimum Validation
- `npm run harness:adapters -- --check`
- `npm run harness:audit -- --format json`
- Focused install, plugin, MCP, or hook test for the changed surface
- `git diff --check`
- Markdown lint when docs are touched
Record the adapter state, risk note, validation commands, and config
preservation behavior in the maintainer PR body or handoff.

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.report.v1",
"scenario_id": "harness-config-quality",
"run_id": "2026-05-12-harness-config-quality-prototype",
"result": "prototype_passed",
"read_only": true,
"scores": {
"adapter_evidence": 0.94,
"config_preservation": 0.88,
"verification_specificity": 0.9,
"parity_claim_safety": 1,
"publication_safety": 1
},
"findings": [
{
"id": "adapter-state-required",
"severity": "warning",
"summary": "Harness recommendations must retrieve the adapter state before claiming native support or runtime enforcement."
},
{
"id": "config-overwrite-risk",
"severity": "warning",
"summary": "MCP, hook, plugin, command, and rule changes must preserve existing user/project config and use dry-run or merge behavior when available."
},
{
"id": "verification-command-needed",
"severity": "info",
"summary": "The accepted playbook names harness adapter, harness audit, install-target, or plugin-hook regression gates before a config change can merge."
}
],
"recommended_next_action": {
"candidate_id": "adapter-matrix-backed-drift-check",
"action": "Use the promoted harness-config quality playbook for PRs or setup work touching MCP, plugin, hook, command, agent, rule, or adapter surfaces."
}
}

View File

@@ -0,0 +1,57 @@
{
"schema_version": "ecc.evaluator-rag.scenario.v1",
"scenario_id": "harness-config-quality",
"title": "Detect harness config drift before changing adapters or installs",
"mode": "read_only_prototype",
"objective": "Given a change to MCP, plugin, hook, command, agent, or harness adapter surfaces, retrieve the adapter matrix and validation evidence before promoting a setup recommendation or config change.",
"sources": [
{
"kind": "repo_doc",
"path": "docs/architecture/harness-adapter-compliance.md",
"purpose": "Public adapter matrix that names harness state, install/onramp paths, verification commands, and risk notes"
},
{
"kind": "repo_source",
"path": "scripts/lib/harness-adapter-compliance.js",
"purpose": "Structured source of truth for the adapter compliance matrix"
},
{
"kind": "repo_config",
"path": "hooks/hooks.json",
"purpose": "Claude hook surface that must not be assumed portable without adapter evidence"
},
{
"kind": "repo_config",
"path": "mcp-configs/mcp-servers.json",
"purpose": "Reference MCP config that can drift from harness-specific runtime semantics"
},
{
"kind": "repo_test",
"command": "npm run harness:adapters -- --check",
"purpose": "Adapter matrix consistency gate"
}
],
"retrieval_questions": [
"Which harness or config surface changed: MCP, plugin, hook, command, agent, rule, or adapter?",
"Does the adapter matrix classify this harness as native, adapter-backed, instruction-backed, or reference-only?",
"Which install path, verification command, risk note, owner, and source doc apply?",
"Does the recommendation preserve existing user config rather than overwriting it?",
"Which compatibility regression or harness audit command proves the setup still works?"
],
"forbidden_actions": [
"claiming native support for instruction-backed or reference-only harnesses",
"copying Claude hook semantics into Codex, Gemini, Zed, or OpenCode without adapter evidence",
"silently overwriting existing user MCP, hook, plugin, command, or rule config",
"disabling or enabling live MCP servers from a read-only evaluator run",
"shipping an adapter change without a verification command",
"publishing packages or plugins from this evaluator run"
],
"acceptance_gates": [
"adapter state is retrieved from the matrix",
"install or onramp path is named",
"verification command is named",
"risk note is preserved",
"config-preservation behavior is explicit",
"at least one unsupported parity claim is rejected"
]
}

View File

@@ -0,0 +1,45 @@
{
"schema_version": "ecc.evaluator-rag.trace.v1",
"scenario_id": "harness-config-quality",
"run_id": "2026-05-12-harness-config-quality-prototype",
"read_only": true,
"events": [
{
"phase": "observation",
"summary": "A setup recommendation or PR touches MCP, plugin, hook, command, agent, rule, or adapter surfaces. The evaluator records the surface without editing local or user-level config.",
"evidence": [
"docs/architecture/harness-adapter-compliance.md",
"scripts/lib/harness-adapter-compliance.js"
]
},
{
"phase": "retrieval",
"summary": "Retrieved the adapter state, install/onramp path, verification commands, risk notes, and config-preservation tests for the affected harness.",
"evidence": [
"npm run harness:adapters -- --check",
"npm run harness:audit -- --format json",
"node tests/lib/install-targets.test.js"
]
},
{
"phase": "proposal",
"summary": "Generated two candidate playbooks: adapter-matrix-backed drift check, and unsupported hook parity claim that copies Claude semantics into every harness.",
"candidate_ids": [
"adapter-matrix-backed-drift-check",
"unsupported-hook-parity-claim"
]
},
{
"phase": "verification",
"summary": "Accepted the matrix-backed drift check because it names state, install path, verification, and preservation behavior. Rejected unsupported hook parity because it overclaims portability.",
"evidence": [
"examples/evaluator-rag-prototype/harness-config-quality/verifier-result.json"
]
},
{
"phase": "promotion",
"summary": "Promoted only the read-only harness-config quality playbook. The evaluator does not overwrite configs, toggle MCP servers, publish plugins, or claim native support.",
"promoted_candidate_id": "adapter-matrix-backed-drift-check"
}
]
}

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.verifier.v1",
"scenario_id": "harness-config-quality",
"run_id": "2026-05-12-harness-config-quality-prototype",
"read_only": true,
"candidates": [
{
"candidate_id": "adapter-matrix-backed-drift-check",
"decision": "accepted",
"score": 0.92,
"reasons": [
"retrieves adapter state before making a support claim",
"names install or onramp path and verification commands",
"preserves existing user and project config",
"keeps runtime MCP toggles and plugin publication out of the evaluator run",
"requires focused compatibility regression coverage"
],
"rollback": "Revert the future adapter/config PR or restore the prior config merge behavior; no live user config is changed by this read-only playbook."
},
{
"candidate_id": "unsupported-hook-parity-claim",
"decision": "rejected",
"score": 0.16,
"reasons": [
"claims native support without adapter matrix evidence",
"copies Claude hook semantics into instruction-backed harnesses",
"does not name a verification command",
"does not preserve existing MCP or hook config",
"risks publishing or installing unsupported plugin behavior"
],
"rollback": "Do not publish this setup recommendation; restart from adapter state, risk note, and config-preservation evidence."
}
],
"promoted_candidate_id": "adapter-matrix-backed-drift-check"
}