Files
gstack/test/model-overlay-opus-4-7.test.ts
Garry Tan a64d70ba35 Merge remote-tracking branch 'origin/main' into garrytan/workspace-aware-ship
Rebumped v1.8.0.0 -> v1.11.0.0 (minor-past main's v1.10.1.0) using
bin/gstack-next-version — the same queue-aware path this branch introduces.
CHANGELOG repositioned so v1.11.0.0 sits above main's new entries
(v1.10.1.0 / v1.10.0.0 / v1.9.0.0).

Conflicts resolved:
- VERSION, package.json: rebumped to v1.11.0.0 (util-picked)
- bin/gstack-config: merged both lists (workspace_root + gbrain keys)
- CHANGELOG.md: hoisted v1.11.0.0 entry above main's new entries

Pre-existing failures in main (4) documented but not fixed in this PR:
1. gstack-brain-sync secret scan > blocks bearer-json (brain-sync tests)
2. no files larger than 2MB (security-bench fixture, already TODO'd)
3. selectTests > skill-specific change (touchfiles scoping)
4. Opus 4.7 overlay pacing directive (expectation stale after v1.10.1.0
   removed the Fan out nudge)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 21:20:25 -07:00

99 lines
3.7 KiB
TypeScript

/**
* Opus 4.7 model overlay — gate-tier assertions on the pacing directive.
*
* v1.6.4.0 regressed plan-review cadence because the Opus 4.7 overlay
* carried a "Batch your questions" directive that physically rendered
* above the skill-level pacing rule. Opus 4.7 read top-to-bottom,
* absorbed batching as the ambient default, and stopped honoring the
* plan-review STOP directives.
*
* v1.7.0.0 replaces that block with "Pace questions to the skill" —
* one-question-at-a-time is now the default when the skill contains
* STOP directives; batching becomes the explicit exception.
*
* This test asserts:
* - The new "Pace questions" directive is present
* - The old "Batch your questions" directive is gone
* - The AUTO_DECIDE-compatible language survives (subordination, skill wins)
*/
import { describe, test, expect } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import type { TemplateContext } from '../scripts/resolvers/types';
import { HOST_PATHS } from '../scripts/resolvers/types';
import { generateModelOverlay } from '../scripts/resolvers/model-overlay';
function makeCtx(model: string): TemplateContext {
return {
skillName: 'test-skill',
tmplPath: 'test.tmpl',
host: 'claude',
paths: HOST_PATHS.claude,
preambleTier: 2,
model,
};
}
const ROOT = path.resolve(__dirname, '..');
describe('Opus 4.7 overlay — pacing directive', () => {
test('raw opus-4-7.md contains "Pace questions to the skill"', () => {
const raw = fs.readFileSync(
path.join(ROOT, 'model-overlays/opus-4-7.md'),
'utf-8',
);
expect(raw).toContain('Pace questions to the skill');
});
test('raw opus-4-7.md does NOT contain "Batch your questions" directive', () => {
const raw = fs.readFileSync(
path.join(ROOT, 'model-overlays/opus-4-7.md'),
'utf-8',
);
expect(raw).not.toContain('**Batch your questions.**');
});
test('resolved overlay output contains "Pace questions to the skill"', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
expect(out).toContain('Pace questions to the skill');
});
test('resolved overlay inherits from claude base (INHERIT:claude)', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
// The claude base contributes the subordination wrapper + Todo discipline
expect(out).toContain('Todo-list discipline');
expect(out).toContain('subordinate');
});
test('resolved overlay says skill STOP directives trigger one-per-turn pacing', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
expect(out).toMatch(/STOP\. AskUserQuestion/);
expect(out).toMatch(/pace one question per turn|one question per turn/i);
});
test('resolved overlay requires AskUserQuestion as tool_use', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
expect(out).toContain('tool_use');
});
test('resolved overlay flags "obvious fix" findings still need user approval', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
expect(out).toMatch(/obvious fix/i);
expect(out).toMatch(/user approval/i);
});
test('resolved overlay keeps Fan out / Effort-match / Literal interpretation nudges', () => {
const out = generateModelOverlay(makeCtx('opus-4-7'));
expect(out).toContain('Fan out explicitly');
expect(out).toContain('Effort-match the step');
expect(out).toContain('Literal interpretation awareness');
});
test('claude overlay (no INHERIT chain) does not carry the pacing directive', () => {
// Claude is the default overlay; opus-4-7 inherits FROM claude.
// The pacing directive belongs to opus-4-7 only.
const out = generateModelOverlay(makeCtx('claude'));
expect(out).not.toContain('Pace questions to the skill');
});
});