From 422f172fbbcb75774c86bbe5d7c097adaf561380 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 5 Apr 2026 11:43:13 -0700 Subject: [PATCH 1/3] feat: ship re-run executes all verification checks (v0.15.10.0) (#833) * feat: review army idempotency + cross-review dedup resolver Co-Authored-By: Claude Opus 4.6 (1M context) * feat: ship re-run executes all checks, adds review army + dedup Co-Authored-By: Claude Opus 4.6 (1M context) * test: regression guards for ship specialist dispatch + dedup + idempotency Co-Authored-By: Claude Opus 4.6 (1M context) * chore: bump version and changelog (v0.15.10.0) Co-Authored-By: Claude Opus 4.6 (1M context) --------- Co-authored-by: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 18 +++ VERSION | 2 +- review/SKILL.md | 4 +- review/SKILL.md.tmpl | 34 +--- scripts/resolvers/index.ts | 3 +- scripts/resolvers/review-army.ts | 33 ++-- scripts/resolvers/review.ts | 44 ++++++ ship/SKILL.md | 262 ++++++++++++++++++++++++++++++- ship/SKILL.md.tmpl | 29 +++- test/gen-skill-docs.test.ts | 16 ++ 10 files changed, 390 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 46c552cd..7ae03519 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Changelog +## [0.15.11.0] - 2026-04-05 + +### Changed +- `/ship` re-runs now execute every verification step (tests, coverage audit, review, adversarial, TODOS, document-release) regardless of prior runs. Only actions (push, PR creation, VERSION bump) are idempotent. Re-running `/ship` means "run the whole checklist again." +- `/ship` now runs the full Review Army specialist dispatch (testing, maintainability, security, performance, data-migration, api-contract, design, red-team) during pre-landing review, matching `/review`'s depth. + +### Added +- Cross-review finding dedup in `/ship`: findings the user already skipped in a prior `/review` or `/ship` are automatically suppressed on re-run (unless the relevant code changed). +- PR body refresh after `/document-release`: the PR body is re-edited to include the docs commit, so it always reflects the truly final state. + +### Fixed +- Review Army diff size heuristic now counts insertions + deletions (was insertions-only, which missed deletion-heavy refactors). + +### For contributors +- Extracted cross-review dedup to shared `{{CROSS_REVIEW_DEDUP}}` resolver (DRY between `/review` and `/ship`). +- Review Army step numbers adapt per-skill via `ctx.skillName` (ship: 3.55/3.56, review: 4.5/4.6), including prose references. +- Added 3 regression guard tests for new ship template content. + ## [0.15.10.0] - 2026-04-05 — Native OpenClaw Skills + ClawHub Publishing Four methodology skills you can install directly in your OpenClaw agent via ClawHub, no Claude Code session needed. Your agent runs them conversationally via Telegram. diff --git a/VERSION b/VERSION index ac8e8d88..91b13ea2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.15.10.0 +0.15.11.0 diff --git a/review/SKILL.md b/review/SKILL.md index 43b6c200..9bf514e2 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -901,7 +901,9 @@ STACK="" [ -f go.mod ] && STACK="${STACK}go " [ -f Cargo.toml ] && STACK="${STACK}rust " echo "STACK: ${STACK:-unknown}" -DIFF_LINES=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") +DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") +DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0") +DIFF_LINES=$((DIFF_INS + DIFF_DEL)) echo "DIFF_LINES: $DIFF_LINES" # Detect test framework for specialist test stub generation TEST_FW="" diff --git a/review/SKILL.md.tmpl b/review/SKILL.md.tmpl index e09d7154..9ccb1ec2 100644 --- a/review/SKILL.md.tmpl +++ b/review/SKILL.md.tmpl @@ -103,39 +103,7 @@ Follow the output format specified in the checklist. Respect the suppressions **Every finding gets action — not just critical ones.** -### Step 5.0: Cross-review finding dedup - -Before classifying findings, check if any were previously skipped by the user in a prior review on this branch. - -```bash -~/.claude/skills/gstack/bin/gstack-review-read -``` - -Parse the output: only lines BEFORE `---CONFIG---` are JSONL entries (the output also contains `---CONFIG---` and `---HEAD---` footer sections that are not JSONL — ignore those). - -For each JSONL entry that has a `findings` array: -1. Collect all fingerprints where `action: "skipped"` -2. Note the `commit` field from that entry - -If skipped fingerprints exist, get the list of files changed since that review: - -```bash -git diff --name-only HEAD -``` - -For each current finding (from both Step 4 critical pass and Step 4.5-4.6 specialists), check: -- Does its fingerprint match a previously skipped finding? -- Is the finding's file path NOT in the changed-files set? - -If both conditions are true: suppress the finding. It was intentionally skipped and the relevant code hasn't changed. - -Print: "Suppressed N findings from prior reviews (previously skipped by user)" - -**Only suppress `skipped` findings — never `fixed` or `auto-fixed`** (those might regress and should be re-checked). - -If no prior reviews exist or none have a `findings` array, skip this step silently. - -Output a summary header: `Pre-Landing Review: N issues (X critical, Y informational)` +{{CROSS_REVIEW_DEDUP}} ### Step 5a: Classify each finding diff --git a/scripts/resolvers/index.ts b/scripts/resolvers/index.ts index a13e7b6b..072b1a3d 100644 --- a/scripts/resolvers/index.ts +++ b/scripts/resolvers/index.ts @@ -11,7 +11,7 @@ import { generateTestFailureTriage } from './preamble'; import { generateCommandReference, generateSnapshotFlags, generateBrowseSetup } from './browse'; import { generateDesignMethodology, generateDesignHardRules, generateDesignOutsideVoices, generateDesignReviewLite, generateDesignSketch, generateDesignSetup, generateDesignMockup, generateDesignShotgunLoop } from './design'; import { generateTestBootstrap, generateTestCoverageAuditPlan, generateTestCoverageAuditShip, generateTestCoverageAuditReview } from './testing'; -import { generateReviewDashboard, generatePlanFileReviewReport, generateSpecReviewLoop, generateBenefitsFrom, generateCodexSecondOpinion, generateAdversarialStep, generateCodexPlanReview, generatePlanCompletionAuditShip, generatePlanCompletionAuditReview, generatePlanVerificationExec, generateScopeDrift } from './review'; +import { generateReviewDashboard, generatePlanFileReviewReport, generateSpecReviewLoop, generateBenefitsFrom, generateCodexSecondOpinion, generateAdversarialStep, generateCodexPlanReview, generatePlanCompletionAuditShip, generatePlanCompletionAuditReview, generatePlanVerificationExec, generateScopeDrift, generateCrossReviewDedup } from './review'; import { generateSlugEval, generateSlugSetup, generateBaseBranchDetect, generateDeployBootstrap, generateQAMethodology, generateCoAuthorTrailer, generateChangelogWorkflow } from './utility'; import { generateLearningsSearch, generateLearningsLog } from './learnings'; import { generateConfidenceCalibration } from './confidence'; @@ -60,5 +60,6 @@ export const RESOLVERS: Record = { INVOKE_SKILL: generateInvokeSkill, CHANGELOG_WORKFLOW: generateChangelogWorkflow, REVIEW_ARMY: generateReviewArmy, + CROSS_REVIEW_DEDUP: generateCrossReviewDedup, DX_FRAMEWORK: generateDxFramework, }; diff --git a/scripts/resolvers/review-army.ts b/scripts/resolvers/review-army.ts index cb35b9e7..1240b839 100644 --- a/scripts/resolvers/review-army.ts +++ b/scripts/resolvers/review-army.ts @@ -12,7 +12,11 @@ import type { TemplateContext } from './types'; function generateSpecialistSelection(ctx: TemplateContext): string { - return `## Step 4.5: Review Army — Specialist Dispatch + const isShip = ctx.skillName === 'ship'; + const stepSel = isShip ? '3.55' : '4.5'; + const stepMerge = isShip ? '3.56' : '4.6'; + const nextStep = isShip ? 'the Fix-First flow (item 4)' : 'Step 5'; + return `## Step ${stepSel}: Review Army — Specialist Dispatch ### Detect stack and scope @@ -26,7 +30,9 @@ STACK="" [ -f go.mod ] && STACK="\${STACK}go " [ -f Cargo.toml ] && STACK="\${STACK}rust " echo "STACK: \${STACK:-unknown}" -DIFF_LINES=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") +DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") +DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0") +DIFF_LINES=$((DIFF_INS + DIFF_DEL)) echo "DIFF_LINES: $DIFF_LINES" # Detect test framework for specialist test stub generation TEST_FW="" @@ -52,7 +58,7 @@ Based on the scope signals above, select which specialists to dispatch. 1. **Testing** — read \`${ctx.paths.skillRoot}/review/specialists/testing.md\` 2. **Maintainability** — read \`${ctx.paths.skillRoot}/review/specialists/maintainability.md\` -**If DIFF_LINES < 50:** Skip all specialists. Print: "Small diff ($DIFF_LINES lines) — specialists skipped." Continue to Step 5. +**If DIFF_LINES < 50:** Skip all specialists. Print: "Small diff ($DIFF_LINES lines) — specialists skipped." Continue to ${nextStep}. **Conditional (dispatch if the matching scope signal is true):** 3. **Security** — if SCOPE_AUTH=true, OR if SCOPE_BACKEND=true AND DIFF_LINES > 100. Read \`${ctx.paths.skillRoot}/review/specialists/security.md\` @@ -126,8 +132,14 @@ CHECKLIST: - If any specialist subagent fails or times out, log the failure and continue with results from successful specialists. Specialists are additive — partial results are better than no results.`; } -function generateFindingsMerge(_ctx: TemplateContext): string { - return `### Step 4.6: Collect and merge findings +function generateFindingsMerge(ctx: TemplateContext): string { + const isShip = ctx.skillName === 'ship'; + const stepMerge = isShip ? '3.56' : '4.6'; + const stepSel = isShip ? '3.55' : '4.5'; + const fixFirstRef = isShip ? 'the Fix-First flow (item 4)' : 'Step 5 Fix-First'; + const critPassRef = isShip ? 'the checklist pass (Step 3.5)' : 'the CRITICAL pass findings from Step 4'; + const persistRef = isShip ? 'the review-log persist' : 'the review-log entry in Step 5.8'; + return `### Step ${stepMerge}: Collect and merge findings After all specialist subagents complete, collect their outputs. @@ -173,11 +185,11 @@ SPECIALIST REVIEW: N findings (X critical, Y informational) from Z specialists PR Quality Score: X/10 \`\`\` -These findings flow into Step 5 Fix-First alongside the CRITICAL pass findings from Step 4. +These findings flow into ${fixFirstRef} alongside ${critPassRef}. The Fix-First heuristic applies identically — specialist findings follow the same AUTO-FIX vs ASK classification. **Compile per-specialist stats:** -After merging findings, compile a \`specialists\` object for the review-log entry in Step 5.8. +After merging findings, compile a \`specialists\` object for ${persistRef}. For each specialist (testing, maintainability, security, performance, data-migration, api-contract, design, red-team): - If dispatched: \`{"dispatched": true, "findings": N, "critical": N, "informational": N}\` - If skipped by scope: \`{"dispatched": false, "reason": "scope"}\` @@ -189,6 +201,9 @@ Remember these stats — you will need them for the review-log entry in Step 5.8 } function generateRedTeam(ctx: TemplateContext): string { + const isShip = ctx.skillName === 'ship'; + const stepMerge = isShip ? '3.56' : '4.6'; + const fixFirstRef = isShip ? 'the Fix-First flow (item 4)' : 'Step 5 Fix-First'; return `### Red Team dispatch (conditional) **Activation:** Only if DIFF_LINES > 200 OR any specialist produced a CRITICAL finding. @@ -197,7 +212,7 @@ If activated, dispatch one more subagent via the Agent tool (foreground, not bac The Red Team subagent receives: 1. The red-team checklist from \`${ctx.paths.skillRoot}/review/specialists/red-team.md\` -2. The merged specialist findings from Step 4.6 (so it knows what was already caught) +2. The merged specialist findings from Step ${stepMerge} (so it knows what was already caught) 3. The git diff command Prompt: "You are a red team reviewer. The code has already been reviewed by N specialists @@ -208,7 +223,7 @@ concerns, integration boundary issues, and failure modes that specialist checkli don't cover." If the Red Team finds additional issues, merge them into the findings list before -Step 5 Fix-First. Red Team findings are tagged with \`"specialist":"red-team"\`. +${fixFirstRef}. Red Team findings are tagged with \`"specialist":"red-team"\`. If the Red Team returns NO FINDINGS, note: "Red Team review: no additional issues found." If the Red Team subagent fails or times out, skip silently and continue.`; diff --git a/scripts/resolvers/review.ts b/scripts/resolvers/review.ts index bfe600b6..cbc8053c 100644 --- a/scripts/resolvers/review.ts +++ b/scripts/resolvers/review.ts @@ -975,3 +975,47 @@ Add a \`## Verification Results\` section to the PR body (Step 8): - If verification ran: summary of results (N PASS, M FAIL, K SKIPPED) - If skipped: reason for skipping (no plan, no server, no verification section)`; } + +// ─── Cross-Review Finding Dedup ────────────────────────────────────── + +export function generateCrossReviewDedup(ctx: TemplateContext): string { + const isShip = ctx.skillName === 'ship'; + const stepNum = isShip ? '3.57' : '5.0'; + const findingsRef = isShip + ? 'the checklist pass (Step 3.5) and specialist review (Step 3.55-3.56)' + : 'Step 4 critical pass and Step 4.5-4.6 specialists'; + + return `### Step ${stepNum}: Cross-review finding dedup + +Before classifying findings, check if any were previously skipped by the user in a prior review on this branch. + +\`\`\`bash +~/.claude/skills/gstack/bin/gstack-review-read +\`\`\` + +Parse the output: only lines BEFORE \`---CONFIG---\` are JSONL entries (the output also contains \`---CONFIG---\` and \`---HEAD---\` footer sections that are not JSONL — ignore those). + +For each JSONL entry that has a \`findings\` array: +1. Collect all fingerprints where \`action: "skipped"\` +2. Note the \`commit\` field from that entry + +If skipped fingerprints exist, get the list of files changed since that review: + +\`\`\`bash +git diff --name-only HEAD +\`\`\` + +For each current finding (from both ${findingsRef}), check: +- Does its fingerprint match a previously skipped finding? +- Is the finding's file path NOT in the changed-files set? + +If both conditions are true: suppress the finding. It was intentionally skipped and the relevant code hasn't changed. + +Print: "Suppressed N findings from prior reviews (previously skipped by user)" + +**Only suppress \`skipped\` findings — never \`fixed\` or \`auto-fixed\`** (those might regress and should be re-checked). + +If no prior reviews exist or none have a \`findings\` array, skip this step silently. + +Output a summary header: \`Pre-Landing Review: N issues (X critical, Y informational)\``; +} diff --git a/ship/SKILL.md b/ship/SKILL.md index 34cfaa7b..05fff987 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -580,6 +580,16 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat - Auto-fixable review findings (dead code, N+1, stale comments — fixed automatically) - Test coverage gaps within target threshold (auto-generate and commit, or flag in PR body) +**Re-run behavior (idempotency):** +Re-running `/ship` means "run the whole checklist again." Every verification step +(tests, coverage audit, plan completion, pre-landing review, adversarial review, +VERSION/CHANGELOG check, TODOS, document-release) runs on every invocation. +Only *actions* are idempotent: +- Step 4: If VERSION already bumped, skip the bump but still read the version +- Step 7: If already pushed, skip the push command +- Step 8: If PR exists, update the body instead of creating a new PR +Never skip a verification step because a prior `/ship` run already performed it. + --- ## Step 1: Pre-flight @@ -1658,7 +1668,244 @@ Present Codex output under a `CODEX (design):` header, merged with the checklist Include any design findings alongside the code review findings. They follow the same Fix-First flow below. -4. **Classify each finding as AUTO-FIX or ASK** per the Fix-First Heuristic in +## Step 3.55: Review Army — Specialist Dispatch + +### Detect stack and scope + +```bash +source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null) || true +# Detect stack for specialist context +STACK="" +[ -f Gemfile ] && STACK="${STACK}ruby " +[ -f package.json ] && STACK="${STACK}node " +[ -f requirements.txt ] || [ -f pyproject.toml ] && STACK="${STACK}python " +[ -f go.mod ] && STACK="${STACK}go " +[ -f Cargo.toml ] && STACK="${STACK}rust " +echo "STACK: ${STACK:-unknown}" +DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") +DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0") +DIFF_LINES=$((DIFF_INS + DIFF_DEL)) +echo "DIFF_LINES: $DIFF_LINES" +# Detect test framework for specialist test stub generation +TEST_FW="" +{ [ -f jest.config.ts ] || [ -f jest.config.js ]; } && TEST_FW="jest" +[ -f vitest.config.ts ] && TEST_FW="vitest" +{ [ -f spec/spec_helper.rb ] || [ -f .rspec ]; } && TEST_FW="rspec" +{ [ -f pytest.ini ] || [ -f conftest.py ]; } && TEST_FW="pytest" +[ -f go.mod ] && TEST_FW="go-test" +echo "TEST_FW: ${TEST_FW:-unknown}" +``` + +### Read specialist hit rates (adaptive gating) + +```bash +~/.claude/skills/gstack/bin/gstack-specialist-stats 2>/dev/null || true +``` + +### Select specialists + +Based on the scope signals above, select which specialists to dispatch. + +**Always-on (dispatch on every review with 50+ changed lines):** +1. **Testing** — read `~/.claude/skills/gstack/review/specialists/testing.md` +2. **Maintainability** — read `~/.claude/skills/gstack/review/specialists/maintainability.md` + +**If DIFF_LINES < 50:** Skip all specialists. Print: "Small diff ($DIFF_LINES lines) — specialists skipped." Continue to the Fix-First flow (item 4). + +**Conditional (dispatch if the matching scope signal is true):** +3. **Security** — if SCOPE_AUTH=true, OR if SCOPE_BACKEND=true AND DIFF_LINES > 100. Read `~/.claude/skills/gstack/review/specialists/security.md` +4. **Performance** — if SCOPE_BACKEND=true OR SCOPE_FRONTEND=true. Read `~/.claude/skills/gstack/review/specialists/performance.md` +5. **Data Migration** — if SCOPE_MIGRATIONS=true. Read `~/.claude/skills/gstack/review/specialists/data-migration.md` +6. **API Contract** — if SCOPE_API=true. Read `~/.claude/skills/gstack/review/specialists/api-contract.md` +7. **Design** — if SCOPE_FRONTEND=true. Use the existing design review checklist at `~/.claude/skills/gstack/review/design-checklist.md` + +### Adaptive gating + +After scope-based selection, apply adaptive gating based on specialist hit rates: + +For each conditional specialist that passed scope gating, check the `gstack-specialist-stats` output above: +- If tagged `[GATE_CANDIDATE]` (0 findings in 10+ dispatches): skip it. Print: "[specialist] auto-gated (0 findings in N reviews)." +- If tagged `[NEVER_GATE]`: always dispatch regardless of hit rate. Security and data-migration are insurance policy specialists — they should run even when silent. + +**Force flags:** If the user's prompt includes `--security`, `--performance`, `--testing`, `--maintainability`, `--data-migration`, `--api-contract`, `--design`, or `--all-specialists`, force-include that specialist regardless of gating. + +Note which specialists were selected, gated, and skipped. Print the selection: +"Dispatching N specialists: [names]. Skipped: [names] (scope not detected). Gated: [names] (0 findings in N+ reviews)." + +--- + +### Dispatch specialists in parallel + +For each selected specialist, launch an independent subagent via the Agent tool. +**Launch ALL selected specialists in a single message** (multiple Agent tool calls) +so they run in parallel. Each subagent has fresh context — no prior review bias. + +**Each specialist subagent prompt:** + +Construct the prompt for each specialist. The prompt includes: + +1. The specialist's checklist content (you already read the file above) +2. Stack context: "This is a {STACK} project." +3. Past learnings for this domain (if any exist): + +```bash +~/.claude/skills/gstack/bin/gstack-learnings-search --type pitfall --query "{specialist domain}" --limit 5 2>/dev/null || true +``` + +If learnings are found, include them: "Past learnings for this domain: {learnings}" + +4. Instructions: + +"You are a specialist code reviewer. Read the checklist below, then run +`git diff origin/` to get the full diff. Apply the checklist against the diff. + +For each finding, output a JSON object on its own line: +{\"severity\":\"CRITICAL|INFORMATIONAL\",\"confidence\":N,\"path\":\"file\",\"line\":N,\"category\":\"category\",\"summary\":\"description\",\"fix\":\"recommended fix\",\"fingerprint\":\"path:line:category\",\"specialist\":\"name\"} + +Required fields: severity, confidence, path, category, summary, specialist. +Optional: line, fix, fingerprint, evidence, test_stub. + +If you can write a test that would catch this issue, include it in the `test_stub` field. +Use the detected test framework ({TEST_FW}). Write a minimal skeleton — describe/it/test +blocks with clear intent. Skip test_stub for architectural or design-only findings. + +If no findings: output `NO FINDINGS` and nothing else. +Do not output anything else — no preamble, no summary, no commentary. + +Stack context: {STACK} +Past learnings: {learnings or 'none'} + +CHECKLIST: +{checklist content}" + +**Subagent configuration:** +- Use `subagent_type: "general-purpose"` +- Do NOT use `run_in_background` — all specialists must complete before merge +- If any specialist subagent fails or times out, log the failure and continue with results from successful specialists. Specialists are additive — partial results are better than no results. + +--- + +### Step 3.56: Collect and merge findings + +After all specialist subagents complete, collect their outputs. + +**Parse findings:** +For each specialist's output: +1. If output is "NO FINDINGS" — skip, this specialist found nothing +2. Otherwise, parse each line as a JSON object. Skip lines that are not valid JSON. +3. Collect all parsed findings into a single list, tagged with their specialist name. + +**Fingerprint and deduplicate:** +For each finding, compute its fingerprint: +- If `fingerprint` field is present, use it +- Otherwise: `{path}:{line}:{category}` (if line is present) or `{path}:{category}` + +Group findings by fingerprint. For findings sharing the same fingerprint: +- Keep the finding with the highest confidence score +- Tag it: "MULTI-SPECIALIST CONFIRMED ({specialist1} + {specialist2})" +- Boost confidence by +1 (cap at 10) +- Note the confirming specialists in the output + +**Apply confidence gates:** +- Confidence 7+: show normally in the findings output +- Confidence 5-6: show with caveat "Medium confidence — verify this is actually an issue" +- Confidence 3-4: move to appendix (suppress from main findings) +- Confidence 1-2: suppress entirely + +**Compute PR Quality Score:** +After merging, compute the quality score: +`quality_score = max(0, 10 - (critical_count * 2 + informational_count * 0.5))` +Cap at 10. Log this in the review result at the end. + +**Output merged findings:** +Present the merged findings in the same format as the current review: + +``` +SPECIALIST REVIEW: N findings (X critical, Y informational) from Z specialists + +[For each finding, in order: CRITICAL first, then INFORMATIONAL, sorted by confidence descending] +[SEVERITY] (confidence: N/10, specialist: name) path:line — summary + Fix: recommended fix + [If MULTI-SPECIALIST CONFIRMED: show confirmation note] + +PR Quality Score: X/10 +``` + +These findings flow into the Fix-First flow (item 4) alongside the checklist pass (Step 3.5). +The Fix-First heuristic applies identically — specialist findings follow the same AUTO-FIX vs ASK classification. + +**Compile per-specialist stats:** +After merging findings, compile a `specialists` object for the review-log persist. +For each specialist (testing, maintainability, security, performance, data-migration, api-contract, design, red-team): +- If dispatched: `{"dispatched": true, "findings": N, "critical": N, "informational": N}` +- If skipped by scope: `{"dispatched": false, "reason": "scope"}` +- If skipped by gating: `{"dispatched": false, "reason": "gated"}` +- If not applicable (e.g., red-team not activated): omit from the object + +Include the Design specialist even though it uses `design-checklist.md` instead of the specialist schema files. +Remember these stats — you will need them for the review-log entry in Step 5.8. + +--- + +### Red Team dispatch (conditional) + +**Activation:** Only if DIFF_LINES > 200 OR any specialist produced a CRITICAL finding. + +If activated, dispatch one more subagent via the Agent tool (foreground, not background). + +The Red Team subagent receives: +1. The red-team checklist from `~/.claude/skills/gstack/review/specialists/red-team.md` +2. The merged specialist findings from Step 3.56 (so it knows what was already caught) +3. The git diff command + +Prompt: "You are a red team reviewer. The code has already been reviewed by N specialists +who found the following issues: {merged findings summary}. Your job is to find what they +MISSED. Read the checklist, run `git diff origin/`, and look for gaps. +Output findings as JSON objects (same schema as the specialists). Focus on cross-cutting +concerns, integration boundary issues, and failure modes that specialist checklists +don't cover." + +If the Red Team finds additional issues, merge them into the findings list before +the Fix-First flow (item 4). Red Team findings are tagged with `"specialist":"red-team"`. + +If the Red Team returns NO FINDINGS, note: "Red Team review: no additional issues found." +If the Red Team subagent fails or times out, skip silently and continue. + +### Step 3.57: Cross-review finding dedup + +Before classifying findings, check if any were previously skipped by the user in a prior review on this branch. + +```bash +~/.claude/skills/gstack/bin/gstack-review-read +``` + +Parse the output: only lines BEFORE `---CONFIG---` are JSONL entries (the output also contains `---CONFIG---` and `---HEAD---` footer sections that are not JSONL — ignore those). + +For each JSONL entry that has a `findings` array: +1. Collect all fingerprints where `action: "skipped"` +2. Note the `commit` field from that entry + +If skipped fingerprints exist, get the list of files changed since that review: + +```bash +git diff --name-only HEAD +``` + +For each current finding (from both the checklist pass (Step 3.5) and specialist review (Step 3.55-3.56)), check: +- Does its fingerprint match a previously skipped finding? +- Is the finding's file path NOT in the changed-files set? + +If both conditions are true: suppress the finding. It was intentionally skipped and the relevant code hasn't changed. + +Print: "Suppressed N findings from prior reviews (previously skipped by user)" + +**Only suppress `skipped` findings — never `fixed` or `auto-fixed`** (those might regress and should be re-checked). + +If no prior reviews exist or none have a `findings` array, skip this step silently. + +Output a summary header: `Pre-Landing Review: N issues (X critical, Y informational)` + +4. **Classify each finding from both the checklist pass and specialist review (Step 3.55-3.56) as AUTO-FIX or ASK** per the Fix-First Heuristic in checklist.md. Critical findings lean toward ASK; informational lean toward AUTO-FIX. 5. **Auto-fix all AUTO-FIX items.** Apply each fix. Output one line per fix: @@ -1680,10 +1927,13 @@ Present Codex output under a `CODEX (design):` header, merged with the checklist 9. Persist the review result to the review log: ```bash -~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"review","timestamp":"TIMESTAMP","status":"STATUS","issues_found":N,"critical":N,"informational":N,"commit":"'"$(git rev-parse --short HEAD)"'","via":"ship"}' +~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"review","timestamp":"TIMESTAMP","status":"STATUS","issues_found":N,"critical":N,"informational":N,"quality_score":SCORE,"specialists":SPECIALISTS_JSON,"findings":FINDINGS_JSON,"commit":"'"$(git rev-parse --short HEAD)"'","via":"ship"}' ``` Substitute TIMESTAMP (ISO 8601), STATUS ("clean" if no issues, "issues_found" otherwise), and N values from the summary counts above. The `via:"ship"` distinguishes from standalone `/review` runs. +- `quality_score` = the PR Quality Score computed in Step 3.56 (e.g., 7.5). If specialists were skipped (small diff), use `10.0` +- `specialists` = the per-specialist stats object compiled in Step 3.56. Each specialist that was considered gets an entry: `{"dispatched":true/false,"findings":N,"critical":N,"informational":N}` if dispatched, or `{"dispatched":false,"reason":"scope|gated"}` if skipped. Example: `{"testing":{"dispatched":true,"findings":2,"critical":0,"informational":2},"security":{"dispatched":false,"reason":"scope"}}` +- `findings` = array of per-finding records. For each finding (from checklist pass and specialists), include: `{"fingerprint":"path:line:category","severity":"CRITICAL|INFORMATIONAL","action":"ACTION"}`. ACTION is `"auto-fixed"`, `"fixed"` (user approved), or `"skipped"` (user chose Skip). Save the review output — it goes into the PR body in Step 8. @@ -1889,7 +2139,7 @@ echo "BASE: $BASE_VERSION HEAD: $CURRENT_VERSION" if [ "$CURRENT_VERSION" != "$BASE_VERSION" ]; then echo "ALREADY_BUMPED"; fi ``` -If output shows `ALREADY_BUMPED`, VERSION was already bumped on this branch (prior `/ship` run). Skip the rest of Step 4 and use the current VERSION. Otherwise proceed with the bump. +If output shows `ALREADY_BUMPED`, VERSION was already bumped on this branch (prior `/ship` run). Skip the bump action (do not modify VERSION), but read the current VERSION value — it is needed for CHANGELOG and PR body. Continue to the next step. Otherwise proceed with the bump. 1. Read the current `VERSION` file (4-digit format: `MAJOR.MINOR.PATCH.MICRO`) @@ -2080,7 +2330,7 @@ echo "LOCAL: $LOCAL REMOTE: $REMOTE" [ "$LOCAL" = "$REMOTE" ] && echo "ALREADY_PUSHED" || echo "PUSH_NEEDED" ``` -If `ALREADY_PUSHED`, skip the push. Otherwise push with upstream tracking: +If `ALREADY_PUSHED`, skip the push but continue to Step 8. Otherwise push with upstream tracking: ```bash git push -u origin @@ -2102,7 +2352,7 @@ gh pr view --json url,number,state -q 'if .state == "OPEN" then "PR #\(.number): glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS" else "NO_MR" end' 2>/dev/null || echo "NO_MR" ``` -If an **open** PR/MR already exists: **update** the PR body with the latest test results, coverage, and review findings using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Print the existing URL and continue to Step 8.5. +If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary). Never reuse stale PR body content from a prior run. Print the existing URL and continue to Step 8.5. If no PR/MR exists: create a pull request (GitHub) or merge request (GitLab) using the platform detected in Step 0. @@ -2207,6 +2457,8 @@ execute its full workflow: This step is automatic. Do not ask the user for confirmation. The goal is zero-friction doc updates — the user runs `/ship` and documentation stays current without a separate command. +If Step 8.5 created a docs commit, re-edit the PR/MR body to include the latest commit SHA in the summary. This ensures the PR body reflects the truly final state after document-release. + --- ## Step 8.75: Persist ship metrics diff --git a/ship/SKILL.md.tmpl b/ship/SKILL.md.tmpl index de2ee4b9..76e4873d 100644 --- a/ship/SKILL.md.tmpl +++ b/ship/SKILL.md.tmpl @@ -52,6 +52,16 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat - Auto-fixable review findings (dead code, N+1, stale comments — fixed automatically) - Test coverage gaps within target threshold (auto-generate and commit, or flag in PR body) +**Re-run behavior (idempotency):** +Re-running `/ship` means "run the whole checklist again." Every verification step +(tests, coverage audit, plan completion, pre-landing review, adversarial review, +VERSION/CHANGELOG check, TODOS, document-release) runs on every invocation. +Only *actions* are idempotent: +- Step 4: If VERSION already bumped, skip the bump but still read the version +- Step 7: If already pushed, skip the push command +- Step 8: If PR exists, update the body instead of creating a new PR +Never skip a verification step because a prior `/ship` run already performed it. + --- ## Step 1: Pre-flight @@ -254,7 +264,11 @@ Review the diff for structural issues that tests don't catch. Include any design findings alongside the code review findings. They follow the same Fix-First flow below. -4. **Classify each finding as AUTO-FIX or ASK** per the Fix-First Heuristic in +{{REVIEW_ARMY}} + +{{CROSS_REVIEW_DEDUP}} + +4. **Classify each finding from both the checklist pass and specialist review (Step 3.55-3.56) as AUTO-FIX or ASK** per the Fix-First Heuristic in checklist.md. Critical findings lean toward ASK; informational lean toward AUTO-FIX. 5. **Auto-fix all AUTO-FIX items.** Apply each fix. Output one line per fix: @@ -276,10 +290,13 @@ Review the diff for structural issues that tests don't catch. 9. Persist the review result to the review log: ```bash -~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"review","timestamp":"TIMESTAMP","status":"STATUS","issues_found":N,"critical":N,"informational":N,"commit":"'"$(git rev-parse --short HEAD)"'","via":"ship"}' +~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"review","timestamp":"TIMESTAMP","status":"STATUS","issues_found":N,"critical":N,"informational":N,"quality_score":SCORE,"specialists":SPECIALISTS_JSON,"findings":FINDINGS_JSON,"commit":"'"$(git rev-parse --short HEAD)"'","via":"ship"}' ``` Substitute TIMESTAMP (ISO 8601), STATUS ("clean" if no issues, "issues_found" otherwise), and N values from the summary counts above. The `via:"ship"` distinguishes from standalone `/review` runs. +- `quality_score` = the PR Quality Score computed in Step 3.56 (e.g., 7.5). If specialists were skipped (small diff), use `10.0` +- `specialists` = the per-specialist stats object compiled in Step 3.56. Each specialist that was considered gets an entry: `{"dispatched":true/false,"findings":N,"critical":N,"informational":N}` if dispatched, or `{"dispatched":false,"reason":"scope|gated"}` if skipped. Example: `{"testing":{"dispatched":true,"findings":2,"critical":0,"informational":2},"security":{"dispatched":false,"reason":"scope"}}` +- `findings` = array of per-finding records. For each finding (from checklist pass and specialists), include: `{"fingerprint":"path:line:category","severity":"CRITICAL|INFORMATIONAL","action":"ACTION"}`. ACTION is `"auto-fixed"`, `"fixed"` (user approved), or `"skipped"` (user chose Skip). Save the review output — it goes into the PR body in Step 8. @@ -339,7 +356,7 @@ echo "BASE: $BASE_VERSION HEAD: $CURRENT_VERSION" if [ "$CURRENT_VERSION" != "$BASE_VERSION" ]; then echo "ALREADY_BUMPED"; fi ``` -If output shows `ALREADY_BUMPED`, VERSION was already bumped on this branch (prior `/ship` run). Skip the rest of Step 4 and use the current VERSION. Otherwise proceed with the bump. +If output shows `ALREADY_BUMPED`, VERSION was already bumped on this branch (prior `/ship` run). Skip the bump action (do not modify VERSION), but read the current VERSION value — it is needed for CHANGELOG and PR body. Continue to the next step. Otherwise proceed with the bump. 1. Read the current `VERSION` file (4-digit format: `MAJOR.MINOR.PATCH.MICRO`) @@ -490,7 +507,7 @@ echo "LOCAL: $LOCAL REMOTE: $REMOTE" [ "$LOCAL" = "$REMOTE" ] && echo "ALREADY_PUSHED" || echo "PUSH_NEEDED" ``` -If `ALREADY_PUSHED`, skip the push. Otherwise push with upstream tracking: +If `ALREADY_PUSHED`, skip the push but continue to Step 8. Otherwise push with upstream tracking: ```bash git push -u origin @@ -512,7 +529,7 @@ gh pr view --json url,number,state -q 'if .state == "OPEN" then "PR #\(.number): glab mr view -F json 2>/dev/null | jq -r 'if .state == "opened" then "MR_EXISTS" else "NO_MR" end' 2>/dev/null || echo "NO_MR" ``` -If an **open** PR/MR already exists: **update** the PR body with the latest test results, coverage, and review findings using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Print the existing URL and continue to Step 8.5. +If an **open** PR/MR already exists: **update** the PR body using `gh pr edit --body "..."` (GitHub) or `glab mr update -d "..."` (GitLab). Always regenerate the PR body from scratch using this run's fresh results (test output, coverage audit, review findings, adversarial review, TODOS summary). Never reuse stale PR body content from a prior run. Print the existing URL and continue to Step 8.5. If no PR/MR exists: create a pull request (GitHub) or merge request (GitLab) using the platform detected in Step 0. @@ -617,6 +634,8 @@ execute its full workflow: This step is automatic. Do not ask the user for confirmation. The goal is zero-friction doc updates — the user runs `/ship` and documentation stays current without a separate command. +If Step 8.5 created a docs commit, re-edit the PR/MR body to include the latest commit SHA in the summary. This ensures the PR body reflects the truly final state after document-release. + --- ## Step 8.75: Persist ship metrics diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 93c2dfc9..3cf2d043 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -749,6 +749,22 @@ describe('TEST_COVERAGE_AUDIT placeholders', () => { expect(shipSkill).toContain(phrase); } }); + + test('ship SKILL.md contains review army specialist dispatch', () => { + expect(shipSkill).toContain('Specialist Dispatch'); + expect(shipSkill).toContain('Step 3.55'); + expect(shipSkill).toContain('Step 3.56'); + }); + + test('ship SKILL.md contains cross-review finding dedup', () => { + expect(shipSkill).toContain('Cross-review finding dedup'); + expect(shipSkill).toContain('Step 3.57'); + }); + + test('ship SKILL.md contains re-run idempotency behavior', () => { + expect(shipSkill).toContain('Re-run behavior (idempotency)'); + expect(shipSkill).toContain('Never skip a verification step'); + }); }); // --- {{TEST_FAILURE_TRIAGE}} resolver tests --- From 542e7836d09c8bae7e0266b39750938a54298f59 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 5 Apr 2026 20:25:12 -0700 Subject: [PATCH 2/3] fix: snapshot -i auto-detects dropdown/popover interactive elements (#844) - Auto-enable cursor-interactive scan (-C) when -i flag is used - Add floating container detection (portals, popovers, dropdowns) - Detects position:fixed/absolute with high z-index - Recognizes data-floating-ui-portal, data-radix-* attributes - Recognizes role=listbox, role=menu containers - Elements inside floating containers bypass the hasRole skip - Catches dropdown items missed by the accessibility tree - Role=option/menuitem elements in floating containers captured even without cursor:pointer/onclick - Tag floating container items with 'popover-child' reason - Include role name in @c ref reasons when present - Add dropdown.html test fixture - Add dropdown/popover detection test suite (6 tests) - Add test: -i alone includes cursor-interactive elements Fixes: Bookface autocomplete, Radix UI combobox, React portals, and similar dynamic dropdown patterns where ariaSnapshot() misses the floating content. Co-authored-by: root --- browse/PLAN-snapshot-dropdown-interactive.md | 102 +++++++++++++++++++ browse/src/snapshot.ts | 43 +++++++- browse/test/fixtures/dropdown.html | 61 +++++++++++ browse/test/snapshot.test.ts | 71 +++++++++++++ 4 files changed, 273 insertions(+), 4 deletions(-) create mode 100644 browse/PLAN-snapshot-dropdown-interactive.md create mode 100644 browse/test/fixtures/dropdown.html diff --git a/browse/PLAN-snapshot-dropdown-interactive.md b/browse/PLAN-snapshot-dropdown-interactive.md new file mode 100644 index 00000000..75356911 --- /dev/null +++ b/browse/PLAN-snapshot-dropdown-interactive.md @@ -0,0 +1,102 @@ +# Plan: Snapshot Dropdown/Autocomplete Interactive Element Detection + +## Problem + +`snapshot -i` misses dropdown/autocomplete items on modern web apps. These elements: +1. Are often `
`/`
  • ` with click handlers but no semantic ARIA roles +2. Live inside dynamically-created portals/popovers (floating containers) +3. Don't appear in Playwright's accessibility tree (`ariaSnapshot()`) + +The `-C` flag (cursor-interactive scan) was designed for this but: +- Requires separate flag — agents using `-i` don't get it automatically +- Skips elements that HAVE an ARIA role (even if the ARIA tree missed them) +- Doesn't prioritize popover/portal containers where dropdown items live + +## Root Cause + +Playwright's `ariaSnapshot()` builds from the browser's accessibility tree. Dynamically-rendered popovers (React portals, Radix Popover, etc.) may not be in the accessibility tree if: +- The component doesn't set ARIA roles +- The portal renders outside the scoped `body` locator's subtree timing +- The browser hasn't updated the accessibility tree yet after DOM mutation + +## Changes + +### 1. Auto-enable cursor-interactive scan with `-i` flag + +**File:** `browse/src/snapshot.ts` + +When `-i` (interactive) is passed, automatically include the cursor-interactive scan. This means agents always see clickable non-ARIA elements when they ask for interactive elements. + +The `-C` flag remains as a standalone option for non-interactive snapshots. + +``` +if (opts.interactive) { + opts.cursorInteractive = true; +} +``` + +### 2. Add popover/portal priority scanning + +**File:** `browse/src/snapshot.ts` (inside cursor-interactive evaluate block) + +Before the general cursor:pointer scan, specifically scan for visible floating containers (popovers, dropdowns, menus) and include ALL their direct children as interactive: + +Detection heuristics for floating containers: +- `position: fixed` or `position: absolute` with `z-index >= 10` +- Has `role="listbox"`, `role="menu"`, `role="dialog"`, `role="tooltip"`, `[data-radix-popper-content-wrapper]`, `[data-floating-ui-portal]`, etc. +- Appeared recently in the DOM (not in initial page load) +- Is visible (`offsetParent !== null` or `position: fixed`) + +For each floating container, include child elements that: +- Have text content +- Are visible +- Have cursor:pointer OR onclick OR role="option" OR role="menuitem" +- Tag with reason `popover-child` for clarity + +### 3. Remove the `hasRole` skip in cursor-interactive scan + +**File:** `browse/src/snapshot.ts` + +Currently: `if (hasRole) continue;` — skips any element with an ARIA role, assuming the ARIA tree already captured it. + +Problem: if the ARIA tree MISSED the element (timing, portal, bad DOM structure), it falls through both systems. + +Fix: Only skip if the element's role is in `INTERACTIVE_ROLES` AND it was actually captured in the main refMap. Otherwise include it. + +Since we can't easily check the refMap from inside `page.evaluate()`, the simpler fix: remove the `hasRole` skip entirely for elements inside detected floating containers. For elements outside floating containers, keep the `hasRole` skip as-is (to avoid duplicates in normal page content). + +### 4. Add dropdown test fixture and tests + +**File:** `browse/test/fixtures/dropdown.html` + +HTML page with: +- A combobox input that shows a dropdown on focus/type +- Dropdown items as `
    ` with click handlers (no ARIA roles) +- Dropdown items as `
  • ` with `role="option"` +- A React-portal-style container (`position: fixed`, high z-index) + +**File:** `browse/test/snapshot.test.ts` + +New test cases: +- `snapshot -i` on dropdown page finds dropdown items via cursor scan +- `snapshot -i` on dropdown page includes popover-child elements +- `@c` refs from dropdown scan are clickable +- Elements inside floating containers with ARIA roles are captured even when ARIA tree misses them + +## Rollout Risk + +**Low.** The `-C` scan is additive — it only adds `@c` refs, never removes `@e` refs. The change to auto-enable it with `-i` increases output size but agents already handle mixed ref types. + +**One concern:** The `-C` scan queries ALL elements (`document.querySelectorAll('*')`) which can be slow on heavy pages. For the popover-specific scan, we limit to elements inside detected floating containers, which is fast (small subtree). + +## Testing + +```bash +cd /data/gstack/browse && bun test snapshot +``` + +## Files Changed + +1. `browse/src/snapshot.ts` — auto-enable -C with -i, popover scanning, remove hasRole skip in floating containers +2. `browse/test/fixtures/dropdown.html` — new test fixture +3. `browse/test/snapshot.test.ts` — new dropdown/popover test cases diff --git a/browse/src/snapshot.ts b/browse/src/snapshot.ts index 840cd686..5581fe6e 100644 --- a/browse/src/snapshot.ts +++ b/browse/src/snapshot.ts @@ -233,7 +233,12 @@ export async function handleSnapshot( output.push(outputLine); } - // ─── Cursor-interactive scan (-C) ───────────────────────── + // ─── Cursor-interactive scan (-C, or auto with -i) ──────── + // Auto-enable cursor scan when interactive mode is on — agents asking for + // interactive elements should always see clickable non-ARIA items too. + if (opts.interactive && !opts.cursorInteractive) { + opts.cursorInteractive = true; + } if (opts.cursorInteractive) { try { const cursorElements = await target.evaluate(() => { @@ -256,9 +261,37 @@ export async function handleSnapshot( const hasTabindex = el.hasAttribute('tabindex') && parseInt(el.getAttribute('tabindex')!, 10) >= 0; const hasRole = el.hasAttribute('role'); - if (!hasCursorPointer && !hasOnclick && !hasTabindex) continue; - // Skip if it has an ARIA role (likely already captured) - if (hasRole) continue; + // Check if element is inside a floating container (portal/popover/dropdown) + const isInFloating = (() => { + let parent: Element | null = el; + while (parent && parent !== document.documentElement) { + const pStyle = getComputedStyle(parent); + const isFloating = (pStyle.position === 'fixed' || pStyle.position === 'absolute') && + parseInt(pStyle.zIndex || '0', 10) >= 10; + const hasPortalAttr = parent.hasAttribute('data-floating-ui-portal') || + parent.hasAttribute('data-radix-popper-content-wrapper') || + parent.hasAttribute('data-radix-portal') || + parent.hasAttribute('data-popper-placement') || + parent.getAttribute('role') === 'listbox' || + parent.getAttribute('role') === 'menu'; + if (isFloating || hasPortalAttr) return true; + parent = parent.parentElement; + } + return false; + })(); + + if (!hasCursorPointer && !hasOnclick && !hasTabindex) { + // For elements inside floating containers, also check for role="option"/"menuitem" + if (isInFloating && hasRole) { + const role = el.getAttribute('role'); + if (role !== 'option' && role !== 'menuitem' && role !== 'menuitemcheckbox' && role !== 'menuitemradio') continue; + } else { + continue; + } + } + // Skip elements with ARIA roles UNLESS they're inside a floating container + // (floating container items may be missed by the accessibility tree) + if (hasRole && !isInFloating) continue; // Build deterministic nth-child CSS path const parts: string[] = []; @@ -275,9 +308,11 @@ export async function handleSnapshot( const text = (el as HTMLElement).innerText?.trim().slice(0, 80) || el.tagName.toLowerCase(); const reasons: string[] = []; + if (isInFloating) reasons.push('popover-child'); if (hasCursorPointer) reasons.push('cursor:pointer'); if (hasOnclick) reasons.push('onclick'); if (hasTabindex) reasons.push(`tabindex=${el.getAttribute('tabindex')}`); + if (hasRole) reasons.push(`role=${el.getAttribute('role')}`); results.push({ selector, text, reason: reasons.join(', ') }); } diff --git a/browse/test/fixtures/dropdown.html b/browse/test/fixtures/dropdown.html new file mode 100644 index 00000000..7919bceb --- /dev/null +++ b/browse/test/fixtures/dropdown.html @@ -0,0 +1,61 @@ + + + + + Test Page - Dropdown/Autocomplete + + + +

    Dropdown Test

    + +
    + +
    + + + + + + + Normal Link + + + + diff --git a/browse/test/snapshot.test.ts b/browse/test/snapshot.test.ts index db5e8004..bcbf8cd7 100644 --- a/browse/test/snapshot.test.ts +++ b/browse/test/snapshot.test.ts @@ -386,6 +386,77 @@ describe('Cursor-interactive', () => { // And cursor-interactive section expect(result).toContain('cursor-interactive'); }); + + test('snapshot -i alone also includes cursor-interactive elements', async () => { + await handleWriteCommand('goto', [baseUrl + '/cursor-interactive.html'], bm); + const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + // -i now auto-enables -C + expect(result).toContain('[button]'); + expect(result).toContain('[link]'); + expect(result).toContain('cursor-interactive'); + expect(result).toContain('@c'); + }); +}); + +// ─── Dropdown/Popover Detection ───────────────────────────────── + +describe('Dropdown/popover detection', () => { + test('snapshot -i auto-enables cursor scan and finds dropdown items', async () => { + await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); + const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + // Should find standard interactive elements + expect(result).toContain('[button]'); + expect(result).toContain('[link]'); + expect(result).toContain('[textbox]'); + // Should also find cursor-interactive dropdown items + expect(result).toContain('cursor-interactive'); + expect(result).toContain('@c'); + expect(result).toContain('Alice Johnson'); + expect(result).toContain('Bob Smith'); + }); + + test('dropdown items in floating container are tagged as popover-child', async () => { + await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); + const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + expect(result).toContain('popover-child'); + }); + + test('dropdown items with role="option" in portal are captured', async () => { + await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); + const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + // Dave Wilson has role="option" — should be captured even though it has a role + expect(result).toContain('Dave Wilson'); + }); + + test('static text in dropdown without interactivity is NOT captured', async () => { + await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); + const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + // "No results? Try a different search." has no cursor:pointer, no onclick, no tabindex + expect(result).not.toContain('No results'); + }); + + test('@c ref from dropdown is clickable', async () => { + await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); + const snap = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); + // Find a @c ref for Alice + const aliceLine = snap.split('\n').find(l => l.includes('@c') && l.includes('Alice')); + if (aliceLine) { + const refMatch = aliceLine.match(/@(c\d+)/); + if (refMatch) { + const result = await handleWriteCommand('click', [`@${refMatch[1]}`], bm); + expect(result).toContain('Clicked'); + } + } + }); + + test('snapshot -C still works standalone without -i', async () => { + await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); + const result = await handleMetaCommand('snapshot', ['-C'], bm, shutdown); + expect(result).toContain('cursor-interactive'); + expect(result).toContain('Alice Johnson'); + // Without -i, should include non-interactive ARIA elements too + expect(result).toContain('[heading]'); + }); }); // ─── Snapshot Error Paths ─────────────────────────────────────── From 237ae2abbec3a05ae5ab4ea29b2f977ceb06bd5f Mon Sep 17 00:00:00 2001 From: root Date: Mon, 6 Apr 2026 03:27:13 +0000 Subject: [PATCH 3/3] Revert "fix: snapshot -i auto-detects dropdown/popover interactive elements (#844)" This reverts commit 542e7836d09c8bae7e0266b39750938a54298f59. --- browse/PLAN-snapshot-dropdown-interactive.md | 102 ------------------- browse/src/snapshot.ts | 43 +------- browse/test/fixtures/dropdown.html | 61 ----------- browse/test/snapshot.test.ts | 71 ------------- 4 files changed, 4 insertions(+), 273 deletions(-) delete mode 100644 browse/PLAN-snapshot-dropdown-interactive.md delete mode 100644 browse/test/fixtures/dropdown.html diff --git a/browse/PLAN-snapshot-dropdown-interactive.md b/browse/PLAN-snapshot-dropdown-interactive.md deleted file mode 100644 index 75356911..00000000 --- a/browse/PLAN-snapshot-dropdown-interactive.md +++ /dev/null @@ -1,102 +0,0 @@ -# Plan: Snapshot Dropdown/Autocomplete Interactive Element Detection - -## Problem - -`snapshot -i` misses dropdown/autocomplete items on modern web apps. These elements: -1. Are often `
    `/`
  • ` with click handlers but no semantic ARIA roles -2. Live inside dynamically-created portals/popovers (floating containers) -3. Don't appear in Playwright's accessibility tree (`ariaSnapshot()`) - -The `-C` flag (cursor-interactive scan) was designed for this but: -- Requires separate flag — agents using `-i` don't get it automatically -- Skips elements that HAVE an ARIA role (even if the ARIA tree missed them) -- Doesn't prioritize popover/portal containers where dropdown items live - -## Root Cause - -Playwright's `ariaSnapshot()` builds from the browser's accessibility tree. Dynamically-rendered popovers (React portals, Radix Popover, etc.) may not be in the accessibility tree if: -- The component doesn't set ARIA roles -- The portal renders outside the scoped `body` locator's subtree timing -- The browser hasn't updated the accessibility tree yet after DOM mutation - -## Changes - -### 1. Auto-enable cursor-interactive scan with `-i` flag - -**File:** `browse/src/snapshot.ts` - -When `-i` (interactive) is passed, automatically include the cursor-interactive scan. This means agents always see clickable non-ARIA elements when they ask for interactive elements. - -The `-C` flag remains as a standalone option for non-interactive snapshots. - -``` -if (opts.interactive) { - opts.cursorInteractive = true; -} -``` - -### 2. Add popover/portal priority scanning - -**File:** `browse/src/snapshot.ts` (inside cursor-interactive evaluate block) - -Before the general cursor:pointer scan, specifically scan for visible floating containers (popovers, dropdowns, menus) and include ALL their direct children as interactive: - -Detection heuristics for floating containers: -- `position: fixed` or `position: absolute` with `z-index >= 10` -- Has `role="listbox"`, `role="menu"`, `role="dialog"`, `role="tooltip"`, `[data-radix-popper-content-wrapper]`, `[data-floating-ui-portal]`, etc. -- Appeared recently in the DOM (not in initial page load) -- Is visible (`offsetParent !== null` or `position: fixed`) - -For each floating container, include child elements that: -- Have text content -- Are visible -- Have cursor:pointer OR onclick OR role="option" OR role="menuitem" -- Tag with reason `popover-child` for clarity - -### 3. Remove the `hasRole` skip in cursor-interactive scan - -**File:** `browse/src/snapshot.ts` - -Currently: `if (hasRole) continue;` — skips any element with an ARIA role, assuming the ARIA tree already captured it. - -Problem: if the ARIA tree MISSED the element (timing, portal, bad DOM structure), it falls through both systems. - -Fix: Only skip if the element's role is in `INTERACTIVE_ROLES` AND it was actually captured in the main refMap. Otherwise include it. - -Since we can't easily check the refMap from inside `page.evaluate()`, the simpler fix: remove the `hasRole` skip entirely for elements inside detected floating containers. For elements outside floating containers, keep the `hasRole` skip as-is (to avoid duplicates in normal page content). - -### 4. Add dropdown test fixture and tests - -**File:** `browse/test/fixtures/dropdown.html` - -HTML page with: -- A combobox input that shows a dropdown on focus/type -- Dropdown items as `
    ` with click handlers (no ARIA roles) -- Dropdown items as `
  • ` with `role="option"` -- A React-portal-style container (`position: fixed`, high z-index) - -**File:** `browse/test/snapshot.test.ts` - -New test cases: -- `snapshot -i` on dropdown page finds dropdown items via cursor scan -- `snapshot -i` on dropdown page includes popover-child elements -- `@c` refs from dropdown scan are clickable -- Elements inside floating containers with ARIA roles are captured even when ARIA tree misses them - -## Rollout Risk - -**Low.** The `-C` scan is additive — it only adds `@c` refs, never removes `@e` refs. The change to auto-enable it with `-i` increases output size but agents already handle mixed ref types. - -**One concern:** The `-C` scan queries ALL elements (`document.querySelectorAll('*')`) which can be slow on heavy pages. For the popover-specific scan, we limit to elements inside detected floating containers, which is fast (small subtree). - -## Testing - -```bash -cd /data/gstack/browse && bun test snapshot -``` - -## Files Changed - -1. `browse/src/snapshot.ts` — auto-enable -C with -i, popover scanning, remove hasRole skip in floating containers -2. `browse/test/fixtures/dropdown.html` — new test fixture -3. `browse/test/snapshot.test.ts` — new dropdown/popover test cases diff --git a/browse/src/snapshot.ts b/browse/src/snapshot.ts index 5581fe6e..840cd686 100644 --- a/browse/src/snapshot.ts +++ b/browse/src/snapshot.ts @@ -233,12 +233,7 @@ export async function handleSnapshot( output.push(outputLine); } - // ─── Cursor-interactive scan (-C, or auto with -i) ──────── - // Auto-enable cursor scan when interactive mode is on — agents asking for - // interactive elements should always see clickable non-ARIA items too. - if (opts.interactive && !opts.cursorInteractive) { - opts.cursorInteractive = true; - } + // ─── Cursor-interactive scan (-C) ───────────────────────── if (opts.cursorInteractive) { try { const cursorElements = await target.evaluate(() => { @@ -261,37 +256,9 @@ export async function handleSnapshot( const hasTabindex = el.hasAttribute('tabindex') && parseInt(el.getAttribute('tabindex')!, 10) >= 0; const hasRole = el.hasAttribute('role'); - // Check if element is inside a floating container (portal/popover/dropdown) - const isInFloating = (() => { - let parent: Element | null = el; - while (parent && parent !== document.documentElement) { - const pStyle = getComputedStyle(parent); - const isFloating = (pStyle.position === 'fixed' || pStyle.position === 'absolute') && - parseInt(pStyle.zIndex || '0', 10) >= 10; - const hasPortalAttr = parent.hasAttribute('data-floating-ui-portal') || - parent.hasAttribute('data-radix-popper-content-wrapper') || - parent.hasAttribute('data-radix-portal') || - parent.hasAttribute('data-popper-placement') || - parent.getAttribute('role') === 'listbox' || - parent.getAttribute('role') === 'menu'; - if (isFloating || hasPortalAttr) return true; - parent = parent.parentElement; - } - return false; - })(); - - if (!hasCursorPointer && !hasOnclick && !hasTabindex) { - // For elements inside floating containers, also check for role="option"/"menuitem" - if (isInFloating && hasRole) { - const role = el.getAttribute('role'); - if (role !== 'option' && role !== 'menuitem' && role !== 'menuitemcheckbox' && role !== 'menuitemradio') continue; - } else { - continue; - } - } - // Skip elements with ARIA roles UNLESS they're inside a floating container - // (floating container items may be missed by the accessibility tree) - if (hasRole && !isInFloating) continue; + if (!hasCursorPointer && !hasOnclick && !hasTabindex) continue; + // Skip if it has an ARIA role (likely already captured) + if (hasRole) continue; // Build deterministic nth-child CSS path const parts: string[] = []; @@ -308,11 +275,9 @@ export async function handleSnapshot( const text = (el as HTMLElement).innerText?.trim().slice(0, 80) || el.tagName.toLowerCase(); const reasons: string[] = []; - if (isInFloating) reasons.push('popover-child'); if (hasCursorPointer) reasons.push('cursor:pointer'); if (hasOnclick) reasons.push('onclick'); if (hasTabindex) reasons.push(`tabindex=${el.getAttribute('tabindex')}`); - if (hasRole) reasons.push(`role=${el.getAttribute('role')}`); results.push({ selector, text, reason: reasons.join(', ') }); } diff --git a/browse/test/fixtures/dropdown.html b/browse/test/fixtures/dropdown.html deleted file mode 100644 index 7919bceb..00000000 --- a/browse/test/fixtures/dropdown.html +++ /dev/null @@ -1,61 +0,0 @@ - - - - - Test Page - Dropdown/Autocomplete - - - -

    Dropdown Test

    - -
    - -
    - - - - - - - Normal Link - - - - diff --git a/browse/test/snapshot.test.ts b/browse/test/snapshot.test.ts index bcbf8cd7..db5e8004 100644 --- a/browse/test/snapshot.test.ts +++ b/browse/test/snapshot.test.ts @@ -386,77 +386,6 @@ describe('Cursor-interactive', () => { // And cursor-interactive section expect(result).toContain('cursor-interactive'); }); - - test('snapshot -i alone also includes cursor-interactive elements', async () => { - await handleWriteCommand('goto', [baseUrl + '/cursor-interactive.html'], bm); - const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); - // -i now auto-enables -C - expect(result).toContain('[button]'); - expect(result).toContain('[link]'); - expect(result).toContain('cursor-interactive'); - expect(result).toContain('@c'); - }); -}); - -// ─── Dropdown/Popover Detection ───────────────────────────────── - -describe('Dropdown/popover detection', () => { - test('snapshot -i auto-enables cursor scan and finds dropdown items', async () => { - await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); - const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); - // Should find standard interactive elements - expect(result).toContain('[button]'); - expect(result).toContain('[link]'); - expect(result).toContain('[textbox]'); - // Should also find cursor-interactive dropdown items - expect(result).toContain('cursor-interactive'); - expect(result).toContain('@c'); - expect(result).toContain('Alice Johnson'); - expect(result).toContain('Bob Smith'); - }); - - test('dropdown items in floating container are tagged as popover-child', async () => { - await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); - const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); - expect(result).toContain('popover-child'); - }); - - test('dropdown items with role="option" in portal are captured', async () => { - await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); - const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); - // Dave Wilson has role="option" — should be captured even though it has a role - expect(result).toContain('Dave Wilson'); - }); - - test('static text in dropdown without interactivity is NOT captured', async () => { - await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); - const result = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); - // "No results? Try a different search." has no cursor:pointer, no onclick, no tabindex - expect(result).not.toContain('No results'); - }); - - test('@c ref from dropdown is clickable', async () => { - await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); - const snap = await handleMetaCommand('snapshot', ['-i'], bm, shutdown); - // Find a @c ref for Alice - const aliceLine = snap.split('\n').find(l => l.includes('@c') && l.includes('Alice')); - if (aliceLine) { - const refMatch = aliceLine.match(/@(c\d+)/); - if (refMatch) { - const result = await handleWriteCommand('click', [`@${refMatch[1]}`], bm); - expect(result).toContain('Clicked'); - } - } - }); - - test('snapshot -C still works standalone without -i', async () => { - await handleWriteCommand('goto', [baseUrl + '/dropdown.html'], bm); - const result = await handleMetaCommand('snapshot', ['-C'], bm, shutdown); - expect(result).toContain('cursor-interactive'); - expect(result).toContain('Alice Johnson'); - // Without -i, should include non-interactive ARIA elements too - expect(result).toContain('[heading]'); - }); }); // ─── Snapshot Error Paths ───────────────────────────────────────