gstack/test/helpers/claude-pty-runner.unit.test.ts

/**
 * Deterministic unit tests for claude-pty-runner.ts behavior changes.
 *
 * Free-tier (no EVALS=1 needed). Runs in <1s on every `bun test`. Catches
 * harness plumbing bugs before stochastic PTY runs surface them.
 *
 * Two surface areas tested:
 *
 * 1. Permission-dialog short-circuit in 'asked' classification: a TTY frame
 *    that matches BOTH isPermissionDialogVisible AND isNumberedOptionListVisible
 *    must NOT be classified as a skill question — permission dialogs render
 *    as numbered lists too, but they're not what we're guarding.
 *
 * 2. Env passthrough surface: runPlanSkillObservation accepts an `env`
 *    option and threads it to launchClaudePty. We can't fully exercise the
 *    spawn pipeline without paying for a PTY session, but we CAN verify the
 *    option exists in the type signature and that calling without env still
 *    works (no regression).
 *
 * The PTY test (skill-e2e-plan-ceo-plan-mode.test.ts) is the integration
 * check; this file is the cheap deterministic guard for the harness primitives
 * those tests stand on.
 */

import { describe, test, expect } from 'bun:test';
import {
  isPermissionDialogVisible,
  isNumberedOptionListVisible,
  isProseAUQVisible,
  isPlanReadyVisible,
  parseNumberedOptions,
  classifyVisible,
  TAIL_SCAN_BYTES,
  optionsSignature,
  parseQuestionPrompt,
  auqFingerprint,
  COMPLETION_SUMMARY_RE,
  assertReviewReportAtBottom,
  ceoStep0Boundary,
  engStep0Boundary,
  designStep0Boundary,
  devexStep0Boundary,
  type ClaudePtyOptions,
  type AskUserQuestionFingerprint,
} from './claude-pty-runner';

describe('isPermissionDialogVisible', () => {
  test('matches "Bash command requires permission" prompts', () => {
    const sample = `
      Some preamble output

      Bash command \`gstack-config get telemetry\` requires permission to run.

      ❯ 1. Yes
        2. Yes, and always allow
        3. No, abort
    `;
    expect(isPermissionDialogVisible(sample)).toBe(true);
  });

  test('matches "allow all edits" file-edit prompts', () => {
    // Isolated to the "allow all edits" clause only — no overlapping
    // "Do you want to proceed?" co-trigger, so this asserts the clause works.
    const sample = `
      Edit to ~/.gstack/config.yaml

      ❯ 1. Yes
        2. Yes, allow all edits during this session
        3. No
    `;
    expect(isPermissionDialogVisible(sample)).toBe(true);
  });

  test('matches the "Do you want to proceed?" file-edit confirmation by itself', () => {
    // Separate fixture so weakening this clause is detected by a dedicated test.
    const sample = `
      Edit to ~/.gstack/config.yaml

      Do you want to proceed?

      ❯ 1. Yes
        2. No
    `;
    expect(isPermissionDialogVisible(sample)).toBe(true);
  });

  test('matches workspace-trust "always allow access to" prompt', () => {
    const sample = `
      Do you trust the files in this folder?

      ❯ 1. Yes, proceed
        2. Yes, and always allow access to /Users/me/repo
        3. No, exit
    `;
    expect(isPermissionDialogVisible(sample)).toBe(true);
  });

  test('does NOT match a skill AskUserQuestion list', () => {
    const sample = `
      D1 — Premise challenge: do users actually want this?

      ❯ 1. Yes, validated
        2. No, premise is wrong
        3. Need more info
    `;
    expect(isPermissionDialogVisible(sample)).toBe(false);
  });

  test('does NOT match a plan-ready confirmation', () => {
    const sample = `
      Ready to execute the plan?

      ❯ 1. Yes
        2. No, keep planning
    `;
    expect(isPermissionDialogVisible(sample)).toBe(false);
  });

  test('does NOT match a skill question that contains the bare phrase "Do you want to proceed?"', () => {
    // Co-trigger requirement: "Do you want to proceed?" alone is not enough.
    // It must appear with "Edit to <path>" or "Write to <path>" to count as
    // a permission dialog. This guards against a skill question like
    // "Do you want to proceed with HOLD SCOPE?" being mis-classified.
    const sample = `
      Choose your scope mode for this review.
      Do you want to proceed?

      ❯ 1. HOLD SCOPE
        2. SCOPE EXPANSION
        3. SELECTIVE EXPANSION
    `;
    expect(isPermissionDialogVisible(sample)).toBe(false);
  });

  test('does NOT mis-match when adversarial prose includes "Edit to <path>" alongside the bare proceed phrase', () => {
    // Adversarial fixture: a skill question whose body legitimately mentions
    // "Edit to <path>" in prose AND ends with "Do you want to proceed?". The
    // current co-trigger regex would mis-classify this as a permission
    // dialog. We DO want this test to fail until the regex is tightened
    // further (e.g., proximity constraint, or anchoring "Edit to" to a
    // line-start). For now this is documented as a known limitation: a
    // skill question that talks about "Edit to" in prose IS still treated
    // as a permission dialog. The test asserts the current behavior so a
    // future fix can flip it intentionally.
    const sample = `
      Plan: I will Edit to ./plan.md to capture the decision.
      Do you want to proceed?

      ❯ 1. HOLD SCOPE
        2. SCOPE EXPANSION
    `;
    // KNOWN LIMITATION: the co-trigger fires here. Documented as a
    // post-merge follow-up. Flip this assertion once the regex tightens.
    expect(isPermissionDialogVisible(sample)).toBe(true);
  });
});

describe('isNumberedOptionListVisible', () => {
  test('matches a basic ❯ 1. + 2. cursor list', () => {
    const sample = `
      ❯ 1. Option one
        2. Option two
        3. Option three
    `;
    expect(isNumberedOptionListVisible(sample)).toBe(true);
  });

  test('returns false on a single-option prompt', () => {
    const sample = `
      ❯ 1. Only option
    `;
    expect(isNumberedOptionListVisible(sample)).toBe(false);
  });

  test('returns false when no cursor renders', () => {
    const sample = `
      Just some prose with 1. a numbered point and 2. another.
    `;
    expect(isNumberedOptionListVisible(sample)).toBe(false);
  });

  test('overlaps permission dialogs (this is why D5 short-circuits)', () => {
    // The whole point of D5: this string matches BOTH classifiers, so the
    // runner must consult isPermissionDialogVisible to disambiguate.
    const sample = `
      Bash command \`do-thing\` requires permission to run.

      ❯ 1. Yes
        2. No
    `;
    expect(isNumberedOptionListVisible(sample)).toBe(true);
    expect(isPermissionDialogVisible(sample)).toBe(true);
  });
});

describe('isProseAUQVisible', () => {
  test('matches 4 lettered options A) B) C) D) at line starts (plan-eng prose AUQ shape)', () => {
    const sample = `
What would you like me to review? Options:
A) Point me at an existing design doc or plan file (path).
B) Describe new work you're planning — I'll explore the codebase.
C) You meant /review for the diff already on this branch.
D) Something else (tell me).
Recommendation: A if you have a doc in mind, otherwise B.
❯
`;
    expect(isProseAUQVisible(sample)).toBe(true);
  });

  test('matches 2 lettered options (minimum threshold)', () => {
    const sample = `
A) First option
B) Second option
`;
    expect(isProseAUQVisible(sample)).toBe(true);
  });

  test('matches 3 numbered options 1. 2. 3. without ❯ 1. cursor (autoplan prose AUQ shape)', () => {
    const sample = `
What's the task? A few options:
  1. You have a plan idea in mind — describe it.
  2. You want to review an existing plan elsewhere.
  3. You meant a different command — /plan-ceo-review etc.
❯
`;
    expect(isProseAUQVisible(sample)).toBe(true);
  });

  test('returns false when ❯ 1. cursor is present in the recent tail (native UI handled by isNumberedOptionListVisible)', () => {
    const sample = `
❯ 1. First option
  2. Second option
  3. Third option
`;
    expect(isProseAUQVisible(sample)).toBe(false);
  });

  test('does NOT suppress numbered-prose detection when ❯ 1. is only in early scrollback (trust dialog)', () => {
    // Boot trust dialog rendered ❯ 1. Yes at startup, then a long body of
    // model output, then prose-rendered numbered options now. The historic
    // ❯ 1. is in the full buffer but NOT in the recent tail. Should detect
    // the prose AUQ.
    const trustHeader = '❯ 1. Yes, trust\n  2. No\n';
    const filler = 'x'.repeat(5000); // pushes trust dialog out of last 4KB tail
    const proseAUQ = `\n  1. Review the docs\n  2. Investigate the code\n  3. Defer to next session\n❯  \n`;
    const sample = trustHeader + filler + proseAUQ;
    expect(isProseAUQVisible(sample)).toBe(true);
  });

  test('returns false on single lettered option', () => {
    const sample = `
A) Only one option mentioned in passing.
`;
    expect(isProseAUQVisible(sample)).toBe(false);
  });

  test('matches 2 numbered options (threshold matches lettered branch — tails miss option 1)', () => {
    const sample = `
1. First note.
2. Second note.
`;
    expect(isProseAUQVisible(sample)).toBe(true);
  });

  test('returns false on a single numbered option', () => {
    const sample = `
1. Only one option mentioned.
`;
    expect(isProseAUQVisible(sample)).toBe(false);
  });

  test('does not match mid-prose lettered text like "(see option B) above"', () => {
    const sample = `
This refers to (see option B) above and also to point A) earlier.
`;
    // The B) and A) markers are mid-line, not at line starts, so they don't count.
    expect(isProseAUQVisible(sample)).toBe(false);
  });

  test('matches with leading whitespace and ❯ prefix on options', () => {
    const sample = `
   A) Option with whitespace prefix
❯  B) Option with cursor prefix
   C) Another option
`;
    expect(isProseAUQVisible(sample)).toBe(true);
  });

  test('returns false on plain text with no option markers', () => {
    expect(isProseAUQVisible('Just some plain text output from the model.')).toBe(false);
    expect(isProseAUQVisible('')).toBe(false);
  });
});

describe('classifyVisible (runtime path through the runner classifier)', () => {
  // These tests call the actual classifier so a future contributor who
  // reorders branches (e.g. moves the permission short-circuit before
  // isPlanReadyVisible) is caught deterministically.

  test('skill question → returns asked', () => {
    const visible = `
      D1 — Choose your scope mode

      ❯ 1. HOLD SCOPE
        2. SCOPE EXPANSION
        3. SELECTIVE EXPANSION
        4. SCOPE REDUCTION
    `;
    const result = classifyVisible(visible);
    expect(result?.outcome).toBe('asked');
  });

  test('permission dialog (Bash) → returns null (skip, keep polling)', () => {
    const visible = `
      Bash command \`gstack-update-check\` requires permission to run.

      ❯ 1. Yes
        2. No
    `;
    expect(isNumberedOptionListVisible(visible)).toBe(true); // pre-filter
    expect(classifyVisible(visible)).toBeNull(); // post-filter
  });

  test('plan-ready confirmation → returns plan_ready (wins over asked)', () => {
    const visible = `
      Ready to execute the plan?

      ❯ 1. Yes, proceed
        2. No, keep planning
    `;
    const result = classifyVisible(visible);
    expect(result?.outcome).toBe('plan_ready');
  });

  test('silent write to unsanctioned path → returns silent_write', () => {
    const visible = `
      ⏺ Write(src/app/dangerous-write.ts)
      ⎿  Wrote 42 lines
    `;
    const result = classifyVisible(visible);
    expect(result?.outcome).toBe('silent_write');
    expect(result?.summary).toContain('src/app/dangerous-write.ts');
  });

  test('write to sanctioned path (.claude/plans) → returns null (allowed)', () => {
    const visible = `
      ⏺ Write(/Users/me/.claude/plans/some-plan.md)
      ⎿  Wrote 42 lines
    `;
    expect(classifyVisible(visible)).toBeNull();
  });

  test('write while a permission dialog is on screen → returns null (gated, not silent, not asked)', () => {
    const visible = `
      ⏺ Write(src/app/edit-with-permission.ts)

      Edit to src/app/edit-with-permission.ts

      Do you want to proceed?

      ❯ 1. Yes
        2. No
    `;
    // The numbered prompt is a permission dialog (Edit to + Do you want to proceed?);
    // silent_write is suppressed because a numbered prompt is visible, AND
    // 'asked' is suppressed because the prompt is a permission dialog.
    expect(classifyVisible(visible)).toBeNull();
  });

  test('write while a real skill question is on screen → returns asked (write is captured but not silent)', () => {
    const visible = `
      ⏺ Write(src/app/foo.ts)

      D1 — Choose your scope mode

      ❯ 1. HOLD SCOPE
        2. SCOPE EXPANSION
    `;
    // The numbered prompt is a skill question, not a permission dialog;
    // silent_write is suppressed (numbered prompt is visible) and the
    // outcome is 'asked' — Step 0 fired.
    const result = classifyVisible(visible);
    expect(result?.outcome).toBe('asked');
  });

  test('idle / no signals → returns null', () => {
    const visible = `
      Some prose without any classifier signals.
    `;
    expect(classifyVisible(visible)).toBeNull();
  });

  test('TAIL_SCAN_BYTES is exported as 1500', () => {
    // Shared between runner and routing test; a regression that desyncs the
    // recent-tail window would surface here.
    expect(TAIL_SCAN_BYTES).toBe(1500);
  });

  // D4-B: strictPlanWrites detector. Catches the transcript bug where the
  // model writes findings to the plan file before any AskUserQuestion fires.
  test('strictPlanWrites: plan write before any AUQ → wrote_findings_before_asking', () => {
    const visible = `
      ⏺ Edit(/Users/me/.claude/plans/some-plan.md)
      ⎿  Updated 12 lines
    `;
    const result = classifyVisible(visible, { strictPlanWrites: true });
    expect(result?.outcome).toBe('wrote_findings_before_asking');
    expect(result?.summary).toContain('.claude/plans/some-plan.md');
  });

  test('strictPlanWrites: plan write AFTER an AUQ render → not flagged', () => {
    // AUQ renders first, then the model writes the plan post-answer. This is
    // the legitimate end-of-workflow flow and must NOT trigger the detector.
    const visible = `
      D1 — Some scope question

      ❯ 1. Option A
        2. Option B

      ⏺ Edit(/Users/me/.claude/plans/some-plan.md)
      ⎿  Updated 12 lines
    `;
    const result = classifyVisible(visible, { strictPlanWrites: true });
    // Outcome is 'asked' (the numbered list rendered); the post-AUQ plan
    // write is ignored by the detector.
    expect(result?.outcome).toBe('asked');
  });

  test('strictPlanWrites: AUQ first then plan write — write_pos > auq_pos → not flagged', () => {
    // Same scenario, more explicit ordering: the regex finds the write at a
    // position AFTER the numbered list. Detector lets it through.
    const visible = [
      'D1 — Choose your approach',
      '',
      '❯ 1. Approach A',
      '  2. Approach B',
      '',
      '⏺ Write(/Users/me/.claude/plans/draft.md)',
      '⎿  Wrote 42 lines',
    ].join('\n');
    const result = classifyVisible(visible, { strictPlanWrites: true });
    expect(result?.outcome).toBe('asked');
  });

  test('strictPlanWrites: only a permission dialog visible → plan write still flagged', () => {
    // A permission dialog ❯ 1./2. is NOT an AUQ; pre-AUQ plan writes still
    // hit the detector even when a permission prompt is on screen.
    const visible = `
      ⏺ Edit(/Users/me/.claude/plans/some-plan.md)

      Edit to /Users/me/.claude/plans/some-plan.md

      Do you want to proceed?

      ❯ 1. Yes
        2. No
    `;
    const result = classifyVisible(visible, { strictPlanWrites: true });
    expect(result?.outcome).toBe('wrote_findings_before_asking');
  });

  test('strictPlanWrites OFF: plan write before AUQ → returns null (legacy behavior preserved)', () => {
    const visible = `
      ⏺ Edit(/Users/me/.claude/plans/some-plan.md)
      ⎿  Updated 12 lines
    `;
    // Without strictPlanWrites, the sanctioned-path list lets this through.
    expect(classifyVisible(visible)).toBeNull();
  });
});

describe('parseNumberedOptions', () => {
  test('extracts options from a clean cursor list', () => {
    const visible = `
      ❯ 1. HOLD SCOPE
        2. SCOPE EXPANSION
    `;
    const opts = parseNumberedOptions(visible);
    expect(opts).toHaveLength(2);
    expect(opts[0]).toEqual({ index: 1, label: 'HOLD SCOPE' });
    expect(opts[1]).toEqual({ index: 2, label: 'SCOPE EXPANSION' });
  });

  test('returns empty array on prose-with-numbers (no cursor)', () => {
    expect(parseNumberedOptions('text 1. one 2. two')).toEqual([]);
  });

  test('extracts options when the cursor is INLINE with prompt header (box-layout)', () => {
    // Real /plan-ceo-review rendering: the TTY's cursor-positioning escapes
    // collapse divider + header + prompt + cursor onto one logical line.
    // Subsequent options (2..7) still start their own lines.
    const visible = [
      '────────────────────────────────────────',
      '☐ Review scope                                                     What scope do you want me to CEO-review?                                                     ❯ 1. The branch\'s diff vs main',
      '   Review the full branch: ~10K LOC.',
      '2. A specific plan file or design doc',
      '   You point me at a file (path) and I review that.',
      '3. An idea you\'ll describe inline',
      '4. Cancel — wrong skill',
      '5. Type something.',
      '────────────────────────────────────────',
      '6. Chat about this',
      '7. Skip interview and plan immediately',
    ].join('\n');
    const opts = parseNumberedOptions(visible);
    expect(opts).toHaveLength(7);
    expect(opts[0]).toEqual({ index: 1, label: "The branch's diff vs main" });
    expect(opts[1]?.index).toBe(2);
    expect(opts[6]?.index).toBe(7);
    expect(opts[6]?.label).toBe('Skip interview and plan immediately');
  });

  test('inline-cursor and start-of-line cursor both produce 7 options for the box-layout case', () => {
    // The inline path captures option 1 from the cursor line itself; the
    // subsequent-lines path captures 2..7 with the existing optionRe.
    const inlineLayout = [
      'header text                                                     ❯ 1. first option',
      '2. second',
      '3. third',
    ].join('\n');
    expect(parseNumberedOptions(inlineLayout)).toEqual([
      { index: 1, label: 'first option' },
      { index: 2, label: 'second' },
      { index: 3, label: 'third' },
    ]);

    const cleanLayout = [
      '  ❯ 1. first option',
      '    2. second',
      '    3. third',
    ].join('\n');
    expect(parseNumberedOptions(cleanLayout)).toEqual([
      { index: 1, label: 'first option' },
      { index: 2, label: 'second' },
      { index: 3, label: 'third' },
    ]);
  });
});

describe('runPlanSkillObservation env passthrough surface', () => {
  test('ClaudePtyOptions exposes env: Record<string, string>', () => {
    // Type-level guard: this file would fail to compile if the env field
    // were removed or its shape regressed. The actual env merge happens in
    // launchClaudePty's spawn call (`env: { ...process.env, ...opts.env }`),
    // so a regression where `env: opts.env` gets dropped from the
    // runPlanSkillObservation -> launchClaudePty handoff is only caught by
    // the live PTY test, not here.
    const opts: ClaudePtyOptions = {
      env: { QUESTION_TUNING: 'false', EXPLAIN_LEVEL: 'default' },
    };
    expect(opts.env).toEqual({ QUESTION_TUNING: 'false', EXPLAIN_LEVEL: 'default' });
  });
});

// ────────────────────────────────────────────────────────────────────────────
// Per-finding count primitives — Section 3 unit tests #1–#5, #7, #12.
// ────────────────────────────────────────────────────────────────────────────

describe('optionsSignature', () => {
  test('returns a "|"-joined `index:label` string for a clean list', () => {
    const sig = optionsSignature([
      { index: 1, label: 'HOLD SCOPE' },
      { index: 2, label: 'SCOPE EXPANSION' },
    ]);
    expect(sig).toBe('1:HOLD SCOPE|2:SCOPE EXPANSION');
  });

  test('order-independent: shuffled inputs produce the same signature', () => {
    // parseNumberedOptions already returns sorted, but defensive sort means
    // a future caller that hands us shuffled input still produces a stable
    // dedupe signature.
    const a = optionsSignature([
      { index: 2, label: 'B' },
      { index: 1, label: 'A' },
      { index: 3, label: 'C' },
    ]);
    const b = optionsSignature([
      { index: 1, label: 'A' },
      { index: 2, label: 'B' },
      { index: 3, label: 'C' },
    ]);
    expect(a).toBe(b);
  });

  test('empty list returns empty string', () => {
    expect(optionsSignature([])).toBe('');
  });

  test('single-item list returns just that entry', () => {
    expect(optionsSignature([{ index: 1, label: 'Only' }])).toBe('1:Only');
  });
});

describe('parseQuestionPrompt', () => {
  test('captures 1-line prompt above the cursor', () => {
    const visible = `
      D1 — Pick a mode

      ❯ 1. HOLD SCOPE
        2. SCOPE EXPANSION
    `;
    const prompt = parseQuestionPrompt(visible);
    expect(prompt).toBe('D1 — Pick a mode');
  });

  test('captures multi-line prompt above the cursor', () => {
    const visible = `
      D2 — Approach selection

      Which architecture should we follow?

      ❯ 1. Bypass existing helper
        2. Reuse existing helper
    `;
    const prompt = parseQuestionPrompt(visible);
    // Multi-line prompts get joined with single spaces.
    expect(prompt).toContain('D2 — Approach selection');
    expect(prompt).toContain('Which architecture should we follow?');
  });

  test('returns "" when no cursor is rendered', () => {
    expect(parseQuestionPrompt('Just some prose.\nNo cursor.')).toBe('');
  });

  test('truncates to 240 chars', () => {
    const longPrompt = 'A'.repeat(500);
    const visible = `${longPrompt}\n\n      ❯ 1. yes\n        2. no`;
    expect(parseQuestionPrompt(visible).length).toBeLessThanOrEqual(240);
  });

  test('does not pull text from a previous numbered list above', () => {
    const visible = `
      ❯ 1. previous answered question
        2. previous option two

      D2 — A new question text

      ❯ 1. fresh option A
        2. fresh option B
    `;
    const prompt = parseQuestionPrompt(visible);
    // Stops at the previous numbered-list line; should NOT contain "previous answered question".
    expect(prompt).toContain('D2 — A new question text');
    expect(prompt).not.toContain('previous answered question');
  });

  test('normalizes whitespace (collapses runs of spaces and tabs)', () => {
    const visible = `D1   —    Spaced     out

      ❯ 1. yes
        2. no`;
    expect(parseQuestionPrompt(visible)).toBe('D1 — Spaced out');
  });

  test('inline-cursor box-layout: extracts prompt text BEFORE ❯1. on the cursor line', () => {
    // Real /plan-ceo-review rendering: divider + ☐ header + prompt text +
    // cursor are all on one logical line because TTY cursor-positioning
    // escapes collapse the box layout under stripAnsi.
    const visible = [
      '──────────────────',
      '☐ Review scope                                                     What scope do you want me to CEO-review?                                                     ❯ 1. The branch\'s diff vs main',
      '2. A specific plan file',
      '3. An idea inline',
    ].join('\n');
    const prompt = parseQuestionPrompt(visible);
    // Should extract "Review scope" and the prompt text, dropping the ☐ box-drawing sigil.
    expect(prompt).toContain('Review scope');
    expect(prompt).toContain('What scope do you want me to CEO-review?');
    expect(prompt).not.toContain('❯');
    expect(prompt).not.toMatch(/^☐/);
  });
});

describe('auqFingerprint', () => {
  test('returns the same fingerprint for identical inputs', () => {
    const opts = [
      { index: 1, label: 'A' },
      { index: 2, label: 'B' },
    ];
    expect(auqFingerprint('hello', opts)).toBe(auqFingerprint('hello', opts));
  });

  test('different prompts with shared option labels produce DIFFERENT fingerprints', () => {
    // The collision regression Codex F1 caught: option-label-only fingerprints
    // collapsed multiple distinct findings into one when they shared menu shape.
    const sharedOpts = [
      { index: 1, label: 'Add to plan' },
      { index: 2, label: 'Defer' },
      { index: 3, label: 'Build now' },
    ];
    const fpFinding1 = auqFingerprint('D5 — Architecture: bypass helper?', sharedOpts);
    const fpFinding2 = auqFingerprint('D6 — Tests: zero coverage?', sharedOpts);
    expect(fpFinding1).not.toBe(fpFinding2);
  });

  test('same prompt with different options produces DIFFERENT fingerprints', () => {
    const prompt = 'D1 — Pick a mode';
    const fpA = auqFingerprint(prompt, [
      { index: 1, label: 'HOLD SCOPE' },
      { index: 2, label: 'SCOPE EXPANSION' },
    ]);
    const fpB = auqFingerprint(prompt, [
      { index: 1, label: 'HOLD SCOPE' },
      { index: 2, label: 'SCOPE REDUCTION' },
    ]);
    expect(fpA).not.toBe(fpB);
  });

  test('whitespace-only differences in prompt do NOT change the fingerprint', () => {
    // Same content, different rendering whitespace (TTY redraw artifact)
    // must produce the same fingerprint so dedupe survives reflow.
    const opts = [{ index: 1, label: 'A' }, { index: 2, label: 'B' }];
    const fpA = auqFingerprint('Pick   a     mode', opts);
    const fpB = auqFingerprint('Pick a mode', opts);
    expect(fpA).toBe(fpB);
  });

  test('empty prompt + same options collide (caller must guard against this)', () => {
    // Documents the contract: empty-prompt fingerprints WILL collide if the
    // caller fingerprints them. runPlanSkillCounting must skip empty-prompt
    // AUQs and re-poll instead.
    const opts = [{ index: 1, label: 'A' }];
    expect(auqFingerprint('', opts)).toBe(auqFingerprint('', opts));
  });
});

describe('COMPLETION_SUMMARY_RE', () => {
  test('matches GSTACK REVIEW REPORT heading', () => {
    expect(COMPLETION_SUMMARY_RE.test('## GSTACK REVIEW REPORT')).toBe(true);
  });

  test('matches Completion Summary heading (ceo + eng)', () => {
    expect(COMPLETION_SUMMARY_RE.test('## Completion Summary')).toBe(true);
    expect(COMPLETION_SUMMARY_RE.test('## Completion summary')).toBe(true);
  });

  test('matches Status: clean (CEO review-log shape)', () => {
    expect(COMPLETION_SUMMARY_RE.test('Status: clean')).toBe(true);
    expect(COMPLETION_SUMMARY_RE.test('Status: issues_open')).toBe(true);
  });

  test('matches VERDICT: line', () => {
    expect(COMPLETION_SUMMARY_RE.test('VERDICT: CLEARED — Eng Review passed')).toBe(true);
  });

  test('does NOT match prose mentions of "verdict" mid-line', () => {
    // VERDICT must be at the start of a line to count.
    expect(COMPLETION_SUMMARY_RE.test('the final verdict: undecided')).toBe(false);
  });
});

describe('assertReviewReportAtBottom', () => {
  test('passes when REVIEW REPORT is the only/last ## heading', () => {
    const content = `# Plan

## Context
stuff

## Approach
more stuff

## GSTACK REVIEW REPORT

| col | col |
`;
    const r = assertReviewReportAtBottom(content);
    expect(r.ok).toBe(true);
  });

  test('fails when REVIEW REPORT is missing', () => {
    const content = `# Plan

## Context
stuff
`;
    const r = assertReviewReportAtBottom(content);
    expect(r.ok).toBe(false);
    expect(r.reason).toMatch(/no GSTACK REVIEW REPORT/);
  });

  test('fails when REVIEW REPORT exists but a ## heading follows it', () => {
    const content = `# Plan

## GSTACK REVIEW REPORT

| col | col |

## Late Section
oops
`;
    const r = assertReviewReportAtBottom(content);
    expect(r.ok).toBe(false);
    expect(r.reason).toMatch(/trailing ## heading/);
    expect(r.trailingHeadings).toEqual(['## Late Section']);
  });

  test('passes when only ### subheadings follow REVIEW REPORT (deeper nesting allowed)', () => {
    const content = `## GSTACK REVIEW REPORT

### Cross-model tension
- F1: resolved
- F2: resolved
`;
    const r = assertReviewReportAtBottom(content);
    expect(r.ok).toBe(true);
  });

  test('fails with multiple trailing ## headings reported', () => {
    const content = `## GSTACK REVIEW REPORT

## First trailing

## Second trailing
`;
    const r = assertReviewReportAtBottom(content);
    expect(r.ok).toBe(false);
    expect(r.trailingHeadings).toHaveLength(2);
  });
});

describe('Step0BoundaryPredicate per-skill', () => {
  // Helper to build a synthetic fingerprint for predicate tests.
  function fp(promptSnippet: string, optionLabels: string[]): AskUserQuestionFingerprint {
    const options = optionLabels.map((label, i) => ({ index: i + 1, label }));
    return {
      signature: auqFingerprint(promptSnippet, options),
      promptSnippet,
      options,
      observedAtMs: 0,
      preReview: true,
    };
  }

  describe('ceoStep0Boundary', () => {
    test('FIRES on Step 0F mode-pick AUQ (HOLD SCOPE in options)', () => {
      const f = fp('Pick a mode', ['HOLD SCOPE', 'SCOPE EXPANSION', 'SELECTIVE EXPANSION', 'SCOPE REDUCTION']);
      expect(ceoStep0Boundary(f)).toBe(true);
    });

    test('FIRES on scope-selection AUQ with "Skip interview" option (skip-interview path)', () => {
      // After calibration run 1: plan-ceo's first AUQ is scope-selection,
      // and we route via "Skip interview and plan immediately" to bypass
      // Step 0 entirely. Boundary must fire on this AUQ so subsequent
      // AUQs go to reviewCount.
      const f = fp(
        'What scope do you want me to CEO-review?',
        [
          "The branch's diff vs main",
          'A specific plan file',
          "An idea you'll describe inline",
          'Cancel — wrong skill',
          'Type something.',
          'Chat about this',
          'Skip interview and plan immediately',
        ],
      );
      expect(ceoStep0Boundary(f)).toBe(true);
    });

    test('does NOT fire on premise challenge AUQs', () => {
      const f = fp('D1 — Premise check: is this the right problem?', ['Yes', 'No', 'Other']);
      expect(ceoStep0Boundary(f)).toBe(false);
    });

    test('does NOT fire on review-section AUQs', () => {
      const f = fp('Architecture: bypass helper?', ['Reuse existing', 'Roll new', 'Defer']);
      expect(ceoStep0Boundary(f)).toBe(false);
    });
  });

  describe('engStep0Boundary', () => {
    test('FIRES on cross-project learnings prompt', () => {
      const f = fp('Enable cross-project learnings on this machine?', ['Yes', 'No']);
      expect(engStep0Boundary(f)).toBe(true);
    });

    test('FIRES on scope reduction recommendation', () => {
      const f = fp('Scope reduction recommendation: cut to MVP?', ['Reduce', 'Proceed', 'Modify']);
      expect(engStep0Boundary(f)).toBe(true);
    });

    test('does NOT fire on review-section AUQs', () => {
      const f = fp('Architecture: shared mutable state?', ['Refactor', 'Defer', 'Skip']);
      expect(engStep0Boundary(f)).toBe(false);
    });
  });

  describe('designStep0Boundary', () => {
    test('FIRES on design system / posture mention', () => {
      const f = fp('Pick a design posture for this review', ['Polish', 'Triage', 'Expansion']);
      expect(designStep0Boundary(f)).toBe(true);
    });

    test('FIRES on first-dimension prompt', () => {
      const f = fp('First dimension: visual hierarchy. Score?', ['7', '8', '9']);
      expect(designStep0Boundary(f)).toBe(true);
    });

    test('does NOT fire on later dimension AUQs', () => {
      const f = fp('Spacing dimension score?', ['7', '8', '9']);
      expect(designStep0Boundary(f)).toBe(false);
    });
  });

  describe('devexStep0Boundary', () => {
    test('FIRES on developer persona selection', () => {
      const f = fp('Pick the target persona for this review', ['Senior backend', 'Junior frontend', 'Other']);
      expect(devexStep0Boundary(f)).toBe(true);
    });

    test('FIRES on TTHW target prompt', () => {
      const f = fp('What is the TTHW target for first run?', ['<5 min', '<15 min', '<30 min']);
      expect(devexStep0Boundary(f)).toBe(true);
    });

    test('does NOT fire on review-section AUQs', () => {
      const f = fp('Friction point: 5-min CI wait. Address?', ['Now', 'Defer', 'Skip']);
      expect(devexStep0Boundary(f)).toBe(false);
    });
  });
});