mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-05-13 16:13:03 +08:00
docs: add deep-analyzer evaluator scenario
This commit is contained in:
committed by
Affaan Mustafa
parent
337ced0828
commit
37c27a60fd
@@ -0,0 +1,60 @@
|
||||
# Deep Analyzer Evidence Playbook
|
||||
|
||||
Candidate id: `corpus-backed-analyzer-change`
|
||||
|
||||
Use this playbook when a PR changes repository analysis, commit analysis,
|
||||
architecture classification, workflow detection, pattern detection, or
|
||||
deep-analysis risk-taxonomy behavior.
|
||||
|
||||
## Accepted Path
|
||||
|
||||
1. Name the changed analyzer surface and source file.
|
||||
2. Retrieve the Deep Analyzer Evidence contract from `../ECC-Tools/README.md`
|
||||
and the follow-up logic in `../ECC-Tools/src/lib/analyzer.ts`.
|
||||
3. Match the change to maintained corpus or reference evidence:
|
||||
- `../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts`
|
||||
- `../ECC-Tools/src/analyzers/deep-analyzer-corpus.test.ts`
|
||||
- `../ECC-Tools/src/lib/analyzer.compare.test.ts`
|
||||
4. Compare expected outputs for the affected behavior:
|
||||
- folder type;
|
||||
- module organization;
|
||||
- test location;
|
||||
- primary language;
|
||||
- commit message type;
|
||||
- detected workflow names.
|
||||
5. Add or update analyzer corpus, expected-output snapshots, fixtures,
|
||||
benchmarks, golden cases, evals, or reference sets for the same changed
|
||||
surface.
|
||||
6. Run the relevant validation gate from `../ECC-Tools/`:
|
||||
- `npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts`
|
||||
- `npm run typecheck`
|
||||
- `npm run lint`
|
||||
7. Record the corpus case, expected-output comparison, validation output, and
|
||||
rollback notes in the maintainer PR body or handoff.
|
||||
|
||||
## Rejected Path
|
||||
|
||||
Do not promote analyzer threshold, classification, or risk-taxonomy changes
|
||||
without corpus, snapshot, fixture, benchmark, golden, eval, or reference-set
|
||||
evidence.
|
||||
|
||||
Do not suppress the `Deep Analyzer Evidence` PR-risk bucket just because the
|
||||
change is small. Suppress it only when co-located evidence covers the same
|
||||
analyzer surface.
|
||||
|
||||
Do not rely only on broad manual review notes. Analyzer changes need
|
||||
representative repository shapes or commit-history cases with expected outputs.
|
||||
|
||||
Do not post PR comments, create check runs, sync Linear, publish packages, edit
|
||||
plugins, or create release artifacts from the evaluator run.
|
||||
|
||||
## Minimum Validation
|
||||
|
||||
- `npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts`
|
||||
- `npm run typecheck`
|
||||
- `npm run lint`
|
||||
- `git diff --check`
|
||||
- Markdown lint when docs or playbooks are touched
|
||||
|
||||
Preserve source attribution for analyzer evidence and include rollback guidance
|
||||
for the future maintainer PR.
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.report.v1",
|
||||
"scenario_id": "deep-analyzer-evidence",
|
||||
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
|
||||
"result": "prototype_passed",
|
||||
"read_only": true,
|
||||
"scores": {
|
||||
"corpus_retrieval": 0.95,
|
||||
"expected_output_comparison": 0.91,
|
||||
"representative_case_coverage": 0.89,
|
||||
"taxonomy_gap_safety": 0.93,
|
||||
"publication_safety": 1
|
||||
},
|
||||
"findings": [
|
||||
{
|
||||
"id": "corpus-required",
|
||||
"severity": "warning",
|
||||
"summary": "Deep-analysis behavior changes need maintained corpus, snapshot, fixture, benchmark, golden, eval, or reference-set evidence before promotion."
|
||||
},
|
||||
{
|
||||
"id": "expected-output-required",
|
||||
"severity": "warning",
|
||||
"summary": "Analyzer changes should compare expected folder type, module organization, test location, primary language, commit pattern, or workflow outputs."
|
||||
},
|
||||
{
|
||||
"id": "read-only-routing",
|
||||
"severity": "info",
|
||||
"summary": "The evaluator can recommend a maintainer PR but cannot post PR comments, check runs, Linear sync updates, packages, plugins, or release actions itself."
|
||||
}
|
||||
],
|
||||
"recommended_next_action": {
|
||||
"candidate_id": "corpus-backed-analyzer-change",
|
||||
"action": "Use the promoted deep-analyzer evidence playbook for PRs that change repository, commit, architecture, workflow, pattern, or risk-taxonomy analysis behavior."
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.scenario.v1",
|
||||
"scenario_id": "deep-analyzer-evidence",
|
||||
"title": "Require analyzer corpus evidence before promoting deep-analysis changes",
|
||||
"mode": "read_only_prototype",
|
||||
"objective": "Given a change to repository, commit, architecture, pattern, or deep-analysis logic, retrieve maintained analyzer corpus evidence and expected-output comparisons before promoting analyzer behavior or risk-taxonomy changes.",
|
||||
"sources": [
|
||||
{
|
||||
"kind": "sibling_repo_doc",
|
||||
"path": "../ECC-Tools/README.md",
|
||||
"purpose": "Public description of deep-analyzer predictive follow-ups and the Deep Analyzer Evidence PR-risk bucket"
|
||||
},
|
||||
{
|
||||
"kind": "sibling_repo_source",
|
||||
"path": "../ECC-Tools/src/lib/analyzer.ts",
|
||||
"purpose": "Predictive follow-up logic that flags analyzer changes without corpus, snapshot, fixture, or benchmark evidence"
|
||||
},
|
||||
{
|
||||
"kind": "sibling_repo_source",
|
||||
"path": "../ECC-Tools/src/lib/pr-risk-taxonomy.ts",
|
||||
"purpose": "Non-blocking PR-risk taxonomy bucket for deep-analyzer evidence"
|
||||
},
|
||||
{
|
||||
"kind": "sibling_repo_fixture",
|
||||
"path": "../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts",
|
||||
"purpose": "Maintained corpus cases for representative repository shapes, commit histories, and expected analyzer outputs"
|
||||
},
|
||||
{
|
||||
"kind": "sibling_repo_test",
|
||||
"command": "npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts",
|
||||
"purpose": "Regression evidence for analyzer corpus outputs and deep-analyzer follow-up generation"
|
||||
}
|
||||
],
|
||||
"retrieval_questions": [
|
||||
"Which analyzer surface changed: repository structure, architecture, code style, commit messages, workflow detection, pattern detection, or risk taxonomy?",
|
||||
"Which maintained corpus case or reference set covers the same analyzer behavior?",
|
||||
"Do expected outputs compare folder type, module organization, test location, primary language, commit type, and workflow names?",
|
||||
"Does the PR add analyzer corpus, snapshot, fixture, benchmark, golden, eval, or reference-set evidence alongside analyzer code changes?",
|
||||
"Does the evaluator keep PR comments, check runs, Linear sync, package changes, and publication actions out of the read-only pass?"
|
||||
],
|
||||
"forbidden_actions": [
|
||||
"promoting repository, commit, architecture, or deep-analysis changes without analyzer corpus evidence",
|
||||
"suppressing the Deep Analyzer Evidence risk bucket without co-located corpus, snapshot, fixture, or benchmark evidence",
|
||||
"changing analyzer thresholds or classifications without expected-output comparison",
|
||||
"relying only on broad manual review notes instead of representative repository and commit-history cases",
|
||||
"posting PR comments, check runs, or Linear sync updates from this read-only evaluator run",
|
||||
"changing package, plugin, release, or publication state from this evaluator run"
|
||||
],
|
||||
"acceptance_gates": [
|
||||
"changed analyzer surface is named",
|
||||
"maintained corpus or reference-set path is included",
|
||||
"expected analyzer outputs are compared",
|
||||
"representative repository shape or commit history is described",
|
||||
"regression command is named",
|
||||
"at least one no-corpus analyzer change is rejected"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.trace.v1",
|
||||
"scenario_id": "deep-analyzer-evidence",
|
||||
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
|
||||
"read_only": true,
|
||||
"events": [
|
||||
{
|
||||
"phase": "observation",
|
||||
"summary": "A deep-analysis PR changes repository, commit, architecture, workflow, pattern, or risk-taxonomy behavior. The evaluator records the touched analyzer surface and remains read-only.",
|
||||
"evidence": [
|
||||
"../ECC-Tools/src/lib/analyzer.ts",
|
||||
"../ECC-Tools/src/lib/pr-risk-taxonomy.ts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "retrieval",
|
||||
"summary": "Retrieved the maintained analyzer corpus, corpus regression test, and follow-up tests that distinguish corpus-backed analyzer changes from no-evidence analyzer rewrites.",
|
||||
"evidence": [
|
||||
"../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts",
|
||||
"../ECC-Tools/src/analyzers/deep-analyzer-corpus.test.ts",
|
||||
"../ECC-Tools/src/lib/analyzer.compare.test.ts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "proposal",
|
||||
"summary": "Generated two candidate playbooks: corpus-backed analyzer change, and threshold-only analyzer rewrite without expected-output evidence.",
|
||||
"candidate_ids": [
|
||||
"corpus-backed-analyzer-change",
|
||||
"threshold-only-analyzer-rewrite"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "verification",
|
||||
"summary": "Accepted the corpus-backed analyzer change because it names representative repository/commit cases and expected-output comparisons. Rejected the threshold-only rewrite because it lacks corpus or benchmark evidence.",
|
||||
"evidence": [
|
||||
"examples/evaluator-rag-prototype/deep-analyzer-evidence/verifier-result.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "promotion",
|
||||
"summary": "Promoted only the read-only deep-analyzer evidence playbook. Future analyzer edits must move through maintainer PRs with corpus evidence, regression commands, and rollback notes.",
|
||||
"promoted_candidate_id": "corpus-backed-analyzer-change"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.verifier.v1",
|
||||
"scenario_id": "deep-analyzer-evidence",
|
||||
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
|
||||
"read_only": true,
|
||||
"candidates": [
|
||||
{
|
||||
"candidate_id": "corpus-backed-analyzer-change",
|
||||
"decision": "accepted",
|
||||
"score": 0.92,
|
||||
"reasons": [
|
||||
"names the changed analyzer surface and matching maintained corpus case",
|
||||
"compares expected analyzer outputs for representative repository and commit-history inputs",
|
||||
"keeps Deep Analyzer Evidence taxonomy behavior tied to co-located corpus or benchmark evidence",
|
||||
"names the regression command that exercises corpus and follow-up behavior",
|
||||
"keeps PR comments, check runs, Linear sync, and publication actions out of the evaluator run"
|
||||
],
|
||||
"rollback": "Revert the future analyzer PR and restore the prior corpus expectations; no hosted check-run, Linear, package, or publication state changes in this read-only playbook."
|
||||
},
|
||||
{
|
||||
"candidate_id": "threshold-only-analyzer-rewrite",
|
||||
"decision": "rejected",
|
||||
"score": 0.13,
|
||||
"reasons": [
|
||||
"changes analyzer thresholds without corpus evidence",
|
||||
"does not compare expected outputs against representative repository or commit-history cases",
|
||||
"does not update analyzer corpus, snapshot, fixture, benchmark, golden, eval, or reference-set artifacts",
|
||||
"would suppress Deep Analyzer Evidence risk without proof",
|
||||
"does not name a regression command"
|
||||
],
|
||||
"rollback": "Do not promote this analyzer rewrite; restart from maintained corpus inputs, expected-output snapshots, and a focused maintainer PR."
|
||||
}
|
||||
],
|
||||
"promoted_candidate_id": "corpus-backed-analyzer-change"
|
||||
}
|
||||
Reference in New Issue
Block a user