feat: review army idempotency + cross-review dedup resolver

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-05 11:02:27 -07:00
parent bd8d44d641
commit 5c1b22dbeb
3 changed files with 70 additions and 10 deletions

View File

@@ -11,7 +11,7 @@ import { generateTestFailureTriage } from './preamble';
import { generateCommandReference, generateSnapshotFlags, generateBrowseSetup } from './browse'; import { generateCommandReference, generateSnapshotFlags, generateBrowseSetup } from './browse';
import { generateDesignMethodology, generateDesignHardRules, generateDesignOutsideVoices, generateDesignReviewLite, generateDesignSketch, generateDesignSetup, generateDesignMockup, generateDesignShotgunLoop } from './design'; import { generateDesignMethodology, generateDesignHardRules, generateDesignOutsideVoices, generateDesignReviewLite, generateDesignSketch, generateDesignSetup, generateDesignMockup, generateDesignShotgunLoop } from './design';
import { generateTestBootstrap, generateTestCoverageAuditPlan, generateTestCoverageAuditShip, generateTestCoverageAuditReview } from './testing'; import { generateTestBootstrap, generateTestCoverageAuditPlan, generateTestCoverageAuditShip, generateTestCoverageAuditReview } from './testing';
import { generateReviewDashboard, generatePlanFileReviewReport, generateSpecReviewLoop, generateBenefitsFrom, generateCodexSecondOpinion, generateAdversarialStep, generateCodexPlanReview, generatePlanCompletionAuditShip, generatePlanCompletionAuditReview, generatePlanVerificationExec, generateScopeDrift } from './review'; import { generateReviewDashboard, generatePlanFileReviewReport, generateSpecReviewLoop, generateBenefitsFrom, generateCodexSecondOpinion, generateAdversarialStep, generateCodexPlanReview, generatePlanCompletionAuditShip, generatePlanCompletionAuditReview, generatePlanVerificationExec, generateScopeDrift, generateCrossReviewDedup } from './review';
import { generateSlugEval, generateSlugSetup, generateBaseBranchDetect, generateDeployBootstrap, generateQAMethodology, generateCoAuthorTrailer, generateChangelogWorkflow } from './utility'; import { generateSlugEval, generateSlugSetup, generateBaseBranchDetect, generateDeployBootstrap, generateQAMethodology, generateCoAuthorTrailer, generateChangelogWorkflow } from './utility';
import { generateLearningsSearch, generateLearningsLog } from './learnings'; import { generateLearningsSearch, generateLearningsLog } from './learnings';
import { generateConfidenceCalibration } from './confidence'; import { generateConfidenceCalibration } from './confidence';
@@ -60,5 +60,6 @@ export const RESOLVERS: Record<string, ResolverFn> = {
INVOKE_SKILL: generateInvokeSkill, INVOKE_SKILL: generateInvokeSkill,
CHANGELOG_WORKFLOW: generateChangelogWorkflow, CHANGELOG_WORKFLOW: generateChangelogWorkflow,
REVIEW_ARMY: generateReviewArmy, REVIEW_ARMY: generateReviewArmy,
CROSS_REVIEW_DEDUP: generateCrossReviewDedup,
DX_FRAMEWORK: generateDxFramework, DX_FRAMEWORK: generateDxFramework,
}; };

View File

@@ -12,7 +12,11 @@
import type { TemplateContext } from './types'; import type { TemplateContext } from './types';
function generateSpecialistSelection(ctx: TemplateContext): string { function generateSpecialistSelection(ctx: TemplateContext): string {
return `## Step 4.5: Review Army — Specialist Dispatch const isShip = ctx.skillName === 'ship';
const stepSel = isShip ? '3.55' : '4.5';
const stepMerge = isShip ? '3.56' : '4.6';
const nextStep = isShip ? 'the Fix-First flow (item 4)' : 'Step 5';
return `## Step ${stepSel}: Review Army — Specialist Dispatch
### Detect stack and scope ### Detect stack and scope
@@ -26,7 +30,9 @@ STACK=""
[ -f go.mod ] && STACK="\${STACK}go " [ -f go.mod ] && STACK="\${STACK}go "
[ -f Cargo.toml ] && STACK="\${STACK}rust " [ -f Cargo.toml ] && STACK="\${STACK}rust "
echo "STACK: \${STACK:-unknown}" echo "STACK: \${STACK:-unknown}"
DIFF_LINES=$(git diff origin/<base> --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") DIFF_INS=$(git diff origin/<base> --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0")
DIFF_DEL=$(git diff origin/<base> --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0")
DIFF_LINES=$((DIFF_INS + DIFF_DEL))
echo "DIFF_LINES: $DIFF_LINES" echo "DIFF_LINES: $DIFF_LINES"
# Detect test framework for specialist test stub generation # Detect test framework for specialist test stub generation
TEST_FW="" TEST_FW=""
@@ -52,7 +58,7 @@ Based on the scope signals above, select which specialists to dispatch.
1. **Testing** — read \`${ctx.paths.skillRoot}/review/specialists/testing.md\` 1. **Testing** — read \`${ctx.paths.skillRoot}/review/specialists/testing.md\`
2. **Maintainability** — read \`${ctx.paths.skillRoot}/review/specialists/maintainability.md\` 2. **Maintainability** — read \`${ctx.paths.skillRoot}/review/specialists/maintainability.md\`
**If DIFF_LINES < 50:** Skip all specialists. Print: "Small diff ($DIFF_LINES lines) — specialists skipped." Continue to Step 5. **If DIFF_LINES < 50:** Skip all specialists. Print: "Small diff ($DIFF_LINES lines) — specialists skipped." Continue to ${nextStep}.
**Conditional (dispatch if the matching scope signal is true):** **Conditional (dispatch if the matching scope signal is true):**
3. **Security** — if SCOPE_AUTH=true, OR if SCOPE_BACKEND=true AND DIFF_LINES > 100. Read \`${ctx.paths.skillRoot}/review/specialists/security.md\` 3. **Security** — if SCOPE_AUTH=true, OR if SCOPE_BACKEND=true AND DIFF_LINES > 100. Read \`${ctx.paths.skillRoot}/review/specialists/security.md\`
@@ -126,8 +132,14 @@ CHECKLIST:
- If any specialist subagent fails or times out, log the failure and continue with results from successful specialists. Specialists are additive — partial results are better than no results.`; - If any specialist subagent fails or times out, log the failure and continue with results from successful specialists. Specialists are additive — partial results are better than no results.`;
} }
function generateFindingsMerge(_ctx: TemplateContext): string { function generateFindingsMerge(ctx: TemplateContext): string {
return `### Step 4.6: Collect and merge findings const isShip = ctx.skillName === 'ship';
const stepMerge = isShip ? '3.56' : '4.6';
const stepSel = isShip ? '3.55' : '4.5';
const fixFirstRef = isShip ? 'the Fix-First flow (item 4)' : 'Step 5 Fix-First';
const critPassRef = isShip ? 'the checklist pass (Step 3.5)' : 'the CRITICAL pass findings from Step 4';
const persistRef = isShip ? 'the review-log persist' : 'the review-log entry in Step 5.8';
return `### Step ${stepMerge}: Collect and merge findings
After all specialist subagents complete, collect their outputs. After all specialist subagents complete, collect their outputs.
@@ -173,11 +185,11 @@ SPECIALIST REVIEW: N findings (X critical, Y informational) from Z specialists
PR Quality Score: X/10 PR Quality Score: X/10
\`\`\` \`\`\`
These findings flow into Step 5 Fix-First alongside the CRITICAL pass findings from Step 4. These findings flow into ${fixFirstRef} alongside ${critPassRef}.
The Fix-First heuristic applies identically — specialist findings follow the same AUTO-FIX vs ASK classification. The Fix-First heuristic applies identically — specialist findings follow the same AUTO-FIX vs ASK classification.
**Compile per-specialist stats:** **Compile per-specialist stats:**
After merging findings, compile a \`specialists\` object for the review-log entry in Step 5.8. After merging findings, compile a \`specialists\` object for ${persistRef}.
For each specialist (testing, maintainability, security, performance, data-migration, api-contract, design, red-team): For each specialist (testing, maintainability, security, performance, data-migration, api-contract, design, red-team):
- If dispatched: \`{"dispatched": true, "findings": N, "critical": N, "informational": N}\` - If dispatched: \`{"dispatched": true, "findings": N, "critical": N, "informational": N}\`
- If skipped by scope: \`{"dispatched": false, "reason": "scope"}\` - If skipped by scope: \`{"dispatched": false, "reason": "scope"}\`
@@ -189,6 +201,9 @@ Remember these stats — you will need them for the review-log entry in Step 5.8
} }
function generateRedTeam(ctx: TemplateContext): string { function generateRedTeam(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship';
const stepMerge = isShip ? '3.56' : '4.6';
const fixFirstRef = isShip ? 'the Fix-First flow (item 4)' : 'Step 5 Fix-First';
return `### Red Team dispatch (conditional) return `### Red Team dispatch (conditional)
**Activation:** Only if DIFF_LINES > 200 OR any specialist produced a CRITICAL finding. **Activation:** Only if DIFF_LINES > 200 OR any specialist produced a CRITICAL finding.
@@ -197,7 +212,7 @@ If activated, dispatch one more subagent via the Agent tool (foreground, not bac
The Red Team subagent receives: The Red Team subagent receives:
1. The red-team checklist from \`${ctx.paths.skillRoot}/review/specialists/red-team.md\` 1. The red-team checklist from \`${ctx.paths.skillRoot}/review/specialists/red-team.md\`
2. The merged specialist findings from Step 4.6 (so it knows what was already caught) 2. The merged specialist findings from Step ${stepMerge} (so it knows what was already caught)
3. The git diff command 3. The git diff command
Prompt: "You are a red team reviewer. The code has already been reviewed by N specialists Prompt: "You are a red team reviewer. The code has already been reviewed by N specialists
@@ -208,7 +223,7 @@ concerns, integration boundary issues, and failure modes that specialist checkli
don't cover." don't cover."
If the Red Team finds additional issues, merge them into the findings list before If the Red Team finds additional issues, merge them into the findings list before
Step 5 Fix-First. Red Team findings are tagged with \`"specialist":"red-team"\`. ${fixFirstRef}. Red Team findings are tagged with \`"specialist":"red-team"\`.
If the Red Team returns NO FINDINGS, note: "Red Team review: no additional issues found." If the Red Team returns NO FINDINGS, note: "Red Team review: no additional issues found."
If the Red Team subagent fails or times out, skip silently and continue.`; If the Red Team subagent fails or times out, skip silently and continue.`;

View File

@@ -975,3 +975,47 @@ Add a \`## Verification Results\` section to the PR body (Step 8):
- If verification ran: summary of results (N PASS, M FAIL, K SKIPPED) - If verification ran: summary of results (N PASS, M FAIL, K SKIPPED)
- If skipped: reason for skipping (no plan, no server, no verification section)`; - If skipped: reason for skipping (no plan, no server, no verification section)`;
} }
// ─── Cross-Review Finding Dedup ──────────────────────────────────────
export function generateCrossReviewDedup(ctx: TemplateContext): string {
const isShip = ctx.skillName === 'ship';
const stepNum = isShip ? '3.57' : '5.0';
const findingsRef = isShip
? 'the checklist pass (Step 3.5) and specialist review (Step 3.55-3.56)'
: 'Step 4 critical pass and Step 4.5-4.6 specialists';
return `### Step ${stepNum}: Cross-review finding dedup
Before classifying findings, check if any were previously skipped by the user in a prior review on this branch.
\`\`\`bash
~/.claude/skills/gstack/bin/gstack-review-read
\`\`\`
Parse the output: only lines BEFORE \`---CONFIG---\` are JSONL entries (the output also contains \`---CONFIG---\` and \`---HEAD---\` footer sections that are not JSONL — ignore those).
For each JSONL entry that has a \`findings\` array:
1. Collect all fingerprints where \`action: "skipped"\`
2. Note the \`commit\` field from that entry
If skipped fingerprints exist, get the list of files changed since that review:
\`\`\`bash
git diff --name-only <prior-review-commit> HEAD
\`\`\`
For each current finding (from both ${findingsRef}), check:
- Does its fingerprint match a previously skipped finding?
- Is the finding's file path NOT in the changed-files set?
If both conditions are true: suppress the finding. It was intentionally skipped and the relevant code hasn't changed.
Print: "Suppressed N findings from prior reviews (previously skipped by user)"
**Only suppress \`skipped\` findings — never \`fixed\` or \`auto-fixed\`** (those might regress and should be re-checked).
If no prior reviews exist or none have a \`findings\` array, skip this step silently.
Output a summary header: \`Pre-Landing Review: N issues (X critical, Y informational)\``;
}