docs: add deep-analyzer evaluator scenario

This commit is contained in:
Affaan Mustafa
2026-05-12 18:43:28 -04:00
committed by Affaan Mustafa
parent 337ced0828
commit 37c27a60fd
8 changed files with 297 additions and 12 deletions

View File

@@ -0,0 +1,60 @@
# Deep Analyzer Evidence Playbook
Candidate id: `corpus-backed-analyzer-change`
Use this playbook when a PR changes repository analysis, commit analysis,
architecture classification, workflow detection, pattern detection, or
deep-analysis risk-taxonomy behavior.
## Accepted Path
1. Name the changed analyzer surface and source file.
2. Retrieve the Deep Analyzer Evidence contract from `../ECC-Tools/README.md`
and the follow-up logic in `../ECC-Tools/src/lib/analyzer.ts`.
3. Match the change to maintained corpus or reference evidence:
- `../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts`
- `../ECC-Tools/src/analyzers/deep-analyzer-corpus.test.ts`
- `../ECC-Tools/src/lib/analyzer.compare.test.ts`
4. Compare expected outputs for the affected behavior:
- folder type;
- module organization;
- test location;
- primary language;
- commit message type;
- detected workflow names.
5. Add or update analyzer corpus, expected-output snapshots, fixtures,
benchmarks, golden cases, evals, or reference sets for the same changed
surface.
6. Run the relevant validation gate from `../ECC-Tools/`:
- `npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts`
- `npm run typecheck`
- `npm run lint`
7. Record the corpus case, expected-output comparison, validation output, and
rollback notes in the maintainer PR body or handoff.
## Rejected Path
Do not promote analyzer threshold, classification, or risk-taxonomy changes
without corpus, snapshot, fixture, benchmark, golden, eval, or reference-set
evidence.
Do not suppress the `Deep Analyzer Evidence` PR-risk bucket just because the
change is small. Suppress it only when co-located evidence covers the same
analyzer surface.
Do not rely only on broad manual review notes. Analyzer changes need
representative repository shapes or commit-history cases with expected outputs.
Do not post PR comments, create check runs, sync Linear, publish packages, edit
plugins, or create release artifacts from the evaluator run.
## Minimum Validation
- `npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts`
- `npm run typecheck`
- `npm run lint`
- `git diff --check`
- Markdown lint when docs or playbooks are touched
Preserve source attribution for analyzer evidence and include rollback guidance
for the future maintainer PR.

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.report.v1",
"scenario_id": "deep-analyzer-evidence",
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
"result": "prototype_passed",
"read_only": true,
"scores": {
"corpus_retrieval": 0.95,
"expected_output_comparison": 0.91,
"representative_case_coverage": 0.89,
"taxonomy_gap_safety": 0.93,
"publication_safety": 1
},
"findings": [
{
"id": "corpus-required",
"severity": "warning",
"summary": "Deep-analysis behavior changes need maintained corpus, snapshot, fixture, benchmark, golden, eval, or reference-set evidence before promotion."
},
{
"id": "expected-output-required",
"severity": "warning",
"summary": "Analyzer changes should compare expected folder type, module organization, test location, primary language, commit pattern, or workflow outputs."
},
{
"id": "read-only-routing",
"severity": "info",
"summary": "The evaluator can recommend a maintainer PR but cannot post PR comments, check runs, Linear sync updates, packages, plugins, or release actions itself."
}
],
"recommended_next_action": {
"candidate_id": "corpus-backed-analyzer-change",
"action": "Use the promoted deep-analyzer evidence playbook for PRs that change repository, commit, architecture, workflow, pattern, or risk-taxonomy analysis behavior."
}
}

View File

@@ -0,0 +1,57 @@
{
"schema_version": "ecc.evaluator-rag.scenario.v1",
"scenario_id": "deep-analyzer-evidence",
"title": "Require analyzer corpus evidence before promoting deep-analysis changes",
"mode": "read_only_prototype",
"objective": "Given a change to repository, commit, architecture, pattern, or deep-analysis logic, retrieve maintained analyzer corpus evidence and expected-output comparisons before promoting analyzer behavior or risk-taxonomy changes.",
"sources": [
{
"kind": "sibling_repo_doc",
"path": "../ECC-Tools/README.md",
"purpose": "Public description of deep-analyzer predictive follow-ups and the Deep Analyzer Evidence PR-risk bucket"
},
{
"kind": "sibling_repo_source",
"path": "../ECC-Tools/src/lib/analyzer.ts",
"purpose": "Predictive follow-up logic that flags analyzer changes without corpus, snapshot, fixture, or benchmark evidence"
},
{
"kind": "sibling_repo_source",
"path": "../ECC-Tools/src/lib/pr-risk-taxonomy.ts",
"purpose": "Non-blocking PR-risk taxonomy bucket for deep-analyzer evidence"
},
{
"kind": "sibling_repo_fixture",
"path": "../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts",
"purpose": "Maintained corpus cases for representative repository shapes, commit histories, and expected analyzer outputs"
},
{
"kind": "sibling_repo_test",
"command": "npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts",
"purpose": "Regression evidence for analyzer corpus outputs and deep-analyzer follow-up generation"
}
],
"retrieval_questions": [
"Which analyzer surface changed: repository structure, architecture, code style, commit messages, workflow detection, pattern detection, or risk taxonomy?",
"Which maintained corpus case or reference set covers the same analyzer behavior?",
"Do expected outputs compare folder type, module organization, test location, primary language, commit type, and workflow names?",
"Does the PR add analyzer corpus, snapshot, fixture, benchmark, golden, eval, or reference-set evidence alongside analyzer code changes?",
"Does the evaluator keep PR comments, check runs, Linear sync, package changes, and publication actions out of the read-only pass?"
],
"forbidden_actions": [
"promoting repository, commit, architecture, or deep-analysis changes without analyzer corpus evidence",
"suppressing the Deep Analyzer Evidence risk bucket without co-located corpus, snapshot, fixture, or benchmark evidence",
"changing analyzer thresholds or classifications without expected-output comparison",
"relying only on broad manual review notes instead of representative repository and commit-history cases",
"posting PR comments, check runs, or Linear sync updates from this read-only evaluator run",
"changing package, plugin, release, or publication state from this evaluator run"
],
"acceptance_gates": [
"changed analyzer surface is named",
"maintained corpus or reference-set path is included",
"expected analyzer outputs are compared",
"representative repository shape or commit history is described",
"regression command is named",
"at least one no-corpus analyzer change is rejected"
]
}

View File

@@ -0,0 +1,45 @@
{
"schema_version": "ecc.evaluator-rag.trace.v1",
"scenario_id": "deep-analyzer-evidence",
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
"read_only": true,
"events": [
{
"phase": "observation",
"summary": "A deep-analysis PR changes repository, commit, architecture, workflow, pattern, or risk-taxonomy behavior. The evaluator records the touched analyzer surface and remains read-only.",
"evidence": [
"../ECC-Tools/src/lib/analyzer.ts",
"../ECC-Tools/src/lib/pr-risk-taxonomy.ts"
]
},
{
"phase": "retrieval",
"summary": "Retrieved the maintained analyzer corpus, corpus regression test, and follow-up tests that distinguish corpus-backed analyzer changes from no-evidence analyzer rewrites.",
"evidence": [
"../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts",
"../ECC-Tools/src/analyzers/deep-analyzer-corpus.test.ts",
"../ECC-Tools/src/lib/analyzer.compare.test.ts"
]
},
{
"phase": "proposal",
"summary": "Generated two candidate playbooks: corpus-backed analyzer change, and threshold-only analyzer rewrite without expected-output evidence.",
"candidate_ids": [
"corpus-backed-analyzer-change",
"threshold-only-analyzer-rewrite"
]
},
{
"phase": "verification",
"summary": "Accepted the corpus-backed analyzer change because it names representative repository/commit cases and expected-output comparisons. Rejected the threshold-only rewrite because it lacks corpus or benchmark evidence.",
"evidence": [
"examples/evaluator-rag-prototype/deep-analyzer-evidence/verifier-result.json"
]
},
{
"phase": "promotion",
"summary": "Promoted only the read-only deep-analyzer evidence playbook. Future analyzer edits must move through maintainer PRs with corpus evidence, regression commands, and rollback notes.",
"promoted_candidate_id": "corpus-backed-analyzer-change"
}
]
}

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.verifier.v1",
"scenario_id": "deep-analyzer-evidence",
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
"read_only": true,
"candidates": [
{
"candidate_id": "corpus-backed-analyzer-change",
"decision": "accepted",
"score": 0.92,
"reasons": [
"names the changed analyzer surface and matching maintained corpus case",
"compares expected analyzer outputs for representative repository and commit-history inputs",
"keeps Deep Analyzer Evidence taxonomy behavior tied to co-located corpus or benchmark evidence",
"names the regression command that exercises corpus and follow-up behavior",
"keeps PR comments, check runs, Linear sync, and publication actions out of the evaluator run"
],
"rollback": "Revert the future analyzer PR and restore the prior corpus expectations; no hosted check-run, Linear, package, or publication state changes in this read-only playbook."
},
{
"candidate_id": "threshold-only-analyzer-rewrite",
"decision": "rejected",
"score": 0.13,
"reasons": [
"changes analyzer thresholds without corpus evidence",
"does not compare expected outputs against representative repository or commit-history cases",
"does not update analyzer corpus, snapshot, fixture, benchmark, golden, eval, or reference-set artifacts",
"would suppress Deep Analyzer Evidence risk without proof",
"does not name a regression command"
],
"rollback": "Do not promote this analyzer rewrite; restart from maintained corpus inputs, expected-output snapshots, and a focused maintainer PR."
}
],
"promoted_candidate_id": "corpus-backed-analyzer-change"
}