From 4f9e43c7cb8ba860bcde32d489f2464103ae2700 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Tue, 5 May 2026 17:29:37 -0700 Subject: [PATCH] test: static template assertions for delete-then-append + revert autoplan E2E shape 5 new static tests in test/gen-skill-docs.test.ts (4 plan-review SKILL.md files + 1 source resolver) verify the new prompt language is present and the old contradictory bullets are absent. Synthetic regression check confirmed all 5 fail when the prompt fix is reverted. The autoplan E2E (skill-e2e-autoplan-auto-mode.test.ts) reverts to its original AUQ-blocked-gate-surface shape. The mid-file regression scenario the plan briefly proposed isn't reachable in the current PTY harness because --disallowedTools AskUserQuestion makes autoplan bail at the Phase 1 premise gate before any review-write code path runs. Static prompt-text verification covers the load-bearing change. --- test/gen-skill-docs.test.ts | 45 +++++++++++++++++++++++ test/skill-e2e-autoplan-auto-mode.test.ts | 10 +++++ 2 files changed, 55 insertions(+) diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index aa2e2d9f..7249a448 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -2977,3 +2977,48 @@ describe('plan-mode-info resolver (handshake-replacement)', () => { expect(between).toContain('Do NOT proceed to Step 0D or 0F until the user responds to 0C-bis'); }); }); + +// GSTACK REVIEW REPORT report-at-bottom contract — verifies the prompt-text +// fix in scripts/resolvers/review.ts (the load-bearing change for the +// "report not at bottom of plan in plan mode" bug). The bug is in the +// prompt's contradictory write-flow instructions, not in observable +// runtime behavior we can cheaply gate in CI. Verifying the prompt text +// directly is the deterministic equivalent of the regression test the +// PTY harness can't reliably drive (autoplan needs auto-progression of +// AskUserQuestions to reach the report-write step, which the harness +// doesn't support today). +describe('GSTACK REVIEW REPORT delete-then-append flow', () => { + const PLAN_REVIEW_SKILLS = [ + 'plan-ceo-review', + 'plan-design-review', + 'plan-devex-review', + 'plan-eng-review', + ]; + + for (const skill of PLAN_REVIEW_SKILLS) { + test(`${skill}/SKILL.md prescribes delete-then-append, not in-place replace`, () => { + const content = fs.readFileSync(path.join(ROOT, skill, 'SKILL.md'), 'utf-8'); + + // The new (correct) instruction must be present. + expect(content).toContain('delete-then-append flow'); + expect(content).toContain('never mid-file'); + expect(content).toContain('Do NOT replace the section in place'); + + // The old contradictory bullets must be gone. The signature phrase + // from the buggy prompt was 'replace it entirely using the Edit tool' + // which is what allowed mid-file reports to stay mid-file. + expect(content).not.toContain('replace it** entirely using the Edit tool'); + expect(content).not.toContain('If it was found mid-file, move it'); + }); + } + + test('scripts/resolvers/review.ts source has the rewritten flow', () => { + const src = fs.readFileSync(path.join(ROOT, 'scripts', 'resolvers', 'review.ts'), 'utf-8'); + expect(src).toContain('delete-then-append flow'); + expect(src).toContain('never mid-file'); + expect(src).toContain('Do NOT replace the section in place'); + // Old contradictory bullets are gone from the source resolver. + expect(src).not.toContain('replace it** entirely using the Edit tool'); + expect(src).not.toContain('If it was found mid-file, move it'); + }); +}); diff --git a/test/skill-e2e-autoplan-auto-mode.test.ts b/test/skill-e2e-autoplan-auto-mode.test.ts index f5fe84db..0677917b 100644 --- a/test/skill-e2e-autoplan-auto-mode.test.ts +++ b/test/skill-e2e-autoplan-auto-mode.test.ts @@ -18,6 +18,16 @@ * Filename keeps `auto-mode` for branch-history continuity. Auto-mode (the * AUTO_DECIDE preamble path when QUESTION_TUNING=true) is a related but * distinct silencing mechanism; both share the same fix surface. + * + * Note on report-at-bottom contract: the GSTACK REVIEW REPORT delete-then- + * append flow lives in `scripts/resolvers/review.ts` and is exercised when + * reviews actually run. The PTY harness can't drive autoplan through its + * review phases without auto-progression of AUQs (see runPlanSkillCounting), + * and `--disallowedTools AskUserQuestion` makes autoplan bail at the + * premise gate via the plan-file fallback before any review runs. The + * report-at-bottom prompt change is verified statically in + * `test/gen-skill-docs.test.ts` instead — that's the load-bearing + * verification for the contradictory-prompt fix. */ import { describe, test, expect } from 'bun:test';