mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-21 20:28:24 +08:00
tests: split checkpoint-save-resume into context-save + context-restore E2Es
Renames the combined E2E test to match the new skill split: - checkpoint-save-resume → context-save-writes-file Extracts the Save flow from context-save/SKILL.md, asserts a file gets written with valid YAML frontmatter. - New: context-restore-loads-latest Seeds two saved-context files with different YYYYMMDD-HHMMSS prefixes AND scrambled filesystem mtimes (so mtime DISAGREES with filename order). Hand-feeds the restore flow and asserts the newer- by-filename file is loaded. Locks in the "newest by filename prefix, not mtime" guarantee. touchfiles.ts: old 'checkpoint-save-resume' key removed from both E2E_TOUCHFILES and E2E_TIERS maps; new keys added to both. Leaving a key in one map but not the other silently breaks test selection. Golden baselines (claude/codex/factory ship skill) regenerated to match the new preamble routing rules from the previous commit.
This commit is contained in:
3
test/fixtures/golden/claude-ship-SKILL.md
vendored
3
test/fixtures/golden/claude-ship-SKILL.md
vendored
@@ -216,7 +216,8 @@ Key routing rules:
|
|||||||
- Design system, brand → invoke design-consultation
|
- Design system, brand → invoke design-consultation
|
||||||
- Visual audit, design polish → invoke design-review
|
- Visual audit, design polish → invoke design-review
|
||||||
- Architecture review → invoke plan-eng-review
|
- Architecture review → invoke plan-eng-review
|
||||||
- Save progress, checkpoint, resume → invoke checkpoint
|
- Save progress, save state, save my work → invoke context-save
|
||||||
|
- Resume, where was I, pick up where I left off → invoke context-restore
|
||||||
- Code quality, health check → invoke health
|
- Code quality, health check → invoke health
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
3
test/fixtures/golden/codex-ship-SKILL.md
vendored
3
test/fixtures/golden/codex-ship-SKILL.md
vendored
@@ -205,7 +205,8 @@ Key routing rules:
|
|||||||
- Design system, brand → invoke design-consultation
|
- Design system, brand → invoke design-consultation
|
||||||
- Visual audit, design polish → invoke design-review
|
- Visual audit, design polish → invoke design-review
|
||||||
- Architecture review → invoke plan-eng-review
|
- Architecture review → invoke plan-eng-review
|
||||||
- Save progress, checkpoint, resume → invoke checkpoint
|
- Save progress, save state, save my work → invoke context-save
|
||||||
|
- Resume, where was I, pick up where I left off → invoke context-restore
|
||||||
- Code quality, health check → invoke health
|
- Code quality, health check → invoke health
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
3
test/fixtures/golden/factory-ship-SKILL.md
vendored
3
test/fixtures/golden/factory-ship-SKILL.md
vendored
@@ -207,7 +207,8 @@ Key routing rules:
|
|||||||
- Design system, brand → invoke design-consultation
|
- Design system, brand → invoke design-consultation
|
||||||
- Visual audit, design polish → invoke design-review
|
- Visual audit, design polish → invoke design-review
|
||||||
- Architecture review → invoke plan-eng-review
|
- Architecture review → invoke plan-eng-review
|
||||||
- Save progress, checkpoint, resume → invoke checkpoint
|
- Save progress, save state, save my work → invoke context-save
|
||||||
|
- Resume, where was I, pick up where I left off → invoke context-restore
|
||||||
- Code quality, health check → invoke health
|
- Code quality, health check → invoke health
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -107,10 +107,11 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
|||||||
// Learnings
|
// Learnings
|
||||||
'learnings-show': ['learn/**', 'bin/gstack-learnings-search', 'bin/gstack-learnings-log', 'scripts/resolvers/learnings.ts'],
|
'learnings-show': ['learn/**', 'bin/gstack-learnings-search', 'bin/gstack-learnings-log', 'scripts/resolvers/learnings.ts'],
|
||||||
|
|
||||||
// Session Intelligence (timeline, context recovery, checkpoint)
|
// Session Intelligence (timeline, context recovery, /context-save + /context-restore)
|
||||||
'timeline-event-flow': ['bin/gstack-timeline-log', 'bin/gstack-timeline-read'],
|
'timeline-event-flow': ['bin/gstack-timeline-log', 'bin/gstack-timeline-read'],
|
||||||
'context-recovery-artifacts': ['scripts/resolvers/preamble.ts', 'bin/gstack-timeline-log', 'bin/gstack-slug', 'learn/**'],
|
'context-recovery-artifacts': ['scripts/resolvers/preamble.ts', 'bin/gstack-timeline-log', 'bin/gstack-slug', 'learn/**'],
|
||||||
'checkpoint-save-resume': ['checkpoint/**', 'bin/gstack-slug'],
|
'context-save-writes-file': ['context-save/**', 'bin/gstack-slug'],
|
||||||
|
'context-restore-loads-latest': ['context-restore/**', 'bin/gstack-slug'],
|
||||||
|
|
||||||
// Document-release
|
// Document-release
|
||||||
'document-release': ['document-release/**'],
|
'document-release': ['document-release/**'],
|
||||||
@@ -247,9 +248,10 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
|||||||
'codex-offered-eng-review': 'gate',
|
'codex-offered-eng-review': 'gate',
|
||||||
|
|
||||||
// Session Intelligence — gate for data flow, periodic for agent integration
|
// Session Intelligence — gate for data flow, periodic for agent integration
|
||||||
'timeline-event-flow': 'gate', // Binary data flow (no LLM needed)
|
'timeline-event-flow': 'gate', // Binary data flow (no LLM needed)
|
||||||
'context-recovery-artifacts': 'gate', // Preamble reads seeded artifacts
|
'context-recovery-artifacts': 'gate', // Preamble reads seeded artifacts
|
||||||
'checkpoint-save-resume': 'gate', // Checkpoint round-trip
|
'context-save-writes-file': 'gate', // /context-save writes a file
|
||||||
|
'context-restore-loads-latest': 'gate', // Cross-branch newest-by-filename restore
|
||||||
|
|
||||||
// Ship — gate (end-to-end ship path)
|
// Ship — gate (end-to-end ship path)
|
||||||
'ship-base-branch': 'gate',
|
'ship-base-branch': 'gate',
|
||||||
|
|||||||
@@ -15,10 +15,11 @@ const evalCollector = createEvalCollector('e2e-session-intelligence');
|
|||||||
|
|
||||||
// --- Session Intelligence E2E ---
|
// --- Session Intelligence E2E ---
|
||||||
// Tests the core contract: timeline events flow in, context recovery flows out,
|
// Tests the core contract: timeline events flow in, context recovery flows out,
|
||||||
// checkpoints round-trip.
|
// /context-save + /context-restore round-trip.
|
||||||
|
|
||||||
describeIfSelected('Session Intelligence E2E', [
|
describeIfSelected('Session Intelligence E2E', [
|
||||||
'timeline-event-flow', 'context-recovery-artifacts', 'checkpoint-save-resume',
|
'timeline-event-flow', 'context-recovery-artifacts',
|
||||||
|
'context-save-writes-file', 'context-restore-loads-latest',
|
||||||
], () => {
|
], () => {
|
||||||
let workDir: string;
|
let workDir: string;
|
||||||
let gstackHome: string;
|
let gstackHome: string;
|
||||||
@@ -194,28 +195,28 @@ IMPORTANT:
|
|||||||
console.log(`Context recovery: artifacts=${foundArtifacts}, lastSession=${foundLastSession}, timeline=${foundTimeline}`);
|
console.log(`Context recovery: artifacts=${foundArtifacts}, lastSession=${foundLastSession}, timeline=${foundTimeline}`);
|
||||||
}, 180_000);
|
}, 180_000);
|
||||||
|
|
||||||
// --- Test 3: Checkpoint save and resume ---
|
// --- Test 3: /context-save writes a file ---
|
||||||
// Run /checkpoint save via claude -p, verify file created. Then run /checkpoint resume
|
// Hand-feed the save section of context-save/SKILL.md to claude -p and verify
|
||||||
// and verify it reads the checkpoint back.
|
// a file gets written to the project's checkpoints dir with valid frontmatter.
|
||||||
testConcurrentIfSelected('checkpoint-save-resume', async () => {
|
testConcurrentIfSelected('context-save-writes-file', async () => {
|
||||||
const projectDir = path.join(gstackHome, 'projects', slug);
|
const projectDir = path.join(gstackHome, 'projects', slug);
|
||||||
fs.mkdirSync(path.join(projectDir, 'checkpoints'), { recursive: true });
|
fs.mkdirSync(path.join(projectDir, 'checkpoints'), { recursive: true });
|
||||||
|
|
||||||
// Copy the /checkpoint skill
|
// Copy the /context-save skill
|
||||||
copyDirSync(path.join(ROOT, 'checkpoint'), path.join(workDir, 'checkpoint'));
|
copyDirSync(path.join(ROOT, 'context-save'), path.join(workDir, 'context-save'));
|
||||||
|
|
||||||
// Add a staged change so /checkpoint has something to capture
|
// Add a staged change so /context-save has something to capture
|
||||||
fs.writeFileSync(path.join(workDir, 'feature.ts'), 'export function newFeature() { return true; }\n');
|
fs.writeFileSync(path.join(workDir, 'feature.ts'), 'export function newFeature() { return true; }\n');
|
||||||
spawnSync('git', ['add', 'feature.ts'], { cwd: workDir, stdio: 'pipe', timeout: 5000 });
|
spawnSync('git', ['add', 'feature.ts'], { cwd: workDir, stdio: 'pipe', timeout: 5000 });
|
||||||
|
|
||||||
// Extract the checkpoint save section from the skill template
|
// Extract the save section from the skill template (before the List section)
|
||||||
const full = fs.readFileSync(path.join(ROOT, 'checkpoint', 'SKILL.md'), 'utf-8');
|
const full = fs.readFileSync(path.join(ROOT, 'context-save', 'SKILL.md'), 'utf-8');
|
||||||
const saveStart = full.indexOf('## Save');
|
const saveStart = full.indexOf('## Save flow');
|
||||||
const resumeStart = full.indexOf('## Resume');
|
const listStart = full.indexOf('## List flow');
|
||||||
const saveSection = full.slice(saveStart, resumeStart > saveStart ? resumeStart : undefined);
|
const saveSection = full.slice(saveStart, listStart > saveStart ? listStart : undefined);
|
||||||
|
|
||||||
const result = await runSkillTest({
|
const result = await runSkillTest({
|
||||||
prompt: `You are testing the /checkpoint skill. Follow these instructions to save a checkpoint.
|
prompt: `You are testing the /context-save skill. Follow these instructions to save a context file.
|
||||||
|
|
||||||
${saveSection.slice(0, 2000)}
|
${saveSection.slice(0, 2000)}
|
||||||
|
|
||||||
@@ -223,7 +224,7 @@ IMPORTANT:
|
|||||||
- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
|
- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
|
||||||
- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
|
- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
|
||||||
Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
|
Replace any references to ~/.claude/skills/gstack/bin/ with ./bin/ when running commands.
|
||||||
- Save the checkpoint to ${projectDir}/checkpoints/ with a filename like "20260401-test-checkpoint.md".
|
- Save the file to ${projectDir}/checkpoints/ with a filename like "20260401-test-context.md".
|
||||||
- Include YAML frontmatter with status, branch, and timestamp.
|
- Include YAML frontmatter with status, branch, and timestamp.
|
||||||
- Include a summary of what's being worked on (you can see from git status).
|
- Include a summary of what's being worked on (you can see from git status).
|
||||||
- Do NOT use AskUserQuestion.`,
|
- Do NOT use AskUserQuestion.`,
|
||||||
@@ -231,38 +232,134 @@ IMPORTANT:
|
|||||||
maxTurns: 10,
|
maxTurns: 10,
|
||||||
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
allowedTools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
|
||||||
timeout: 120_000,
|
timeout: 120_000,
|
||||||
testName: 'checkpoint-save-resume',
|
testName: 'context-save-writes-file',
|
||||||
runId,
|
runId,
|
||||||
});
|
});
|
||||||
|
|
||||||
logCost('checkpoint save', result);
|
logCost('context-save', result);
|
||||||
|
|
||||||
// Check that a checkpoint file was created
|
// Check that a context file was created
|
||||||
const checkpointDir = path.join(projectDir, 'checkpoints');
|
const checkpointDir = path.join(projectDir, 'checkpoints');
|
||||||
const checkpointFiles = fs.existsSync(checkpointDir)
|
const files = fs.existsSync(checkpointDir)
|
||||||
? fs.readdirSync(checkpointDir).filter(f => f.endsWith('.md'))
|
? fs.readdirSync(checkpointDir).filter(f => f.endsWith('.md'))
|
||||||
: [];
|
: [];
|
||||||
|
|
||||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||||
const checkpointCreated = checkpointFiles.length > 0;
|
const fileCreated = files.length > 0;
|
||||||
|
|
||||||
let checkpointContent = '';
|
let fileContent = '';
|
||||||
if (checkpointCreated) {
|
if (fileCreated) {
|
||||||
checkpointContent = fs.readFileSync(path.join(checkpointDir, checkpointFiles[0]), 'utf-8');
|
fileContent = fs.readFileSync(path.join(checkpointDir, files[0]), 'utf-8');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify checkpoint has expected structure
|
const hasYamlFrontmatter = fileContent.includes('---') && fileContent.includes('status:');
|
||||||
const hasYamlFrontmatter = checkpointContent.includes('---') && checkpointContent.includes('status:');
|
const hasBranch = fileContent.includes('branch:') || fileContent.includes('main');
|
||||||
const hasBranch = checkpointContent.includes('branch:') || checkpointContent.includes('main');
|
|
||||||
|
|
||||||
recordE2E(evalCollector, 'checkpoint save-resume', 'Session Intelligence E2E', result, {
|
recordE2E(evalCollector, 'context-save writes file', 'Session Intelligence E2E', result, {
|
||||||
passed: exitOk && checkpointCreated && hasYamlFrontmatter,
|
passed: exitOk && fileCreated && hasYamlFrontmatter,
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(exitOk).toBe(true);
|
expect(exitOk).toBe(true);
|
||||||
expect(checkpointCreated).toBe(true);
|
expect(fileCreated).toBe(true);
|
||||||
expect(hasYamlFrontmatter).toBe(true);
|
expect(hasYamlFrontmatter).toBe(true);
|
||||||
|
|
||||||
console.log(`Checkpoint: ${checkpointFiles.length} files created, YAML frontmatter: ${hasYamlFrontmatter}, branch: ${hasBranch}`);
|
console.log(`context-save: ${files.length} files created, YAML frontmatter: ${hasYamlFrontmatter}, branch: ${hasBranch}`);
|
||||||
|
}, 180_000);
|
||||||
|
|
||||||
|
// --- Test 4: /context-restore loads the newest file across branches ---
|
||||||
|
// Seed two saved-context files with different YYYYMMDD-HHMMSS prefixes and
|
||||||
|
// different branches in their frontmatter. Hand-feed the restore section to
|
||||||
|
// claude -p. Verify the agent identifies the newer file (by filename prefix)
|
||||||
|
// and presents its content, regardless of the current branch.
|
||||||
|
testConcurrentIfSelected('context-restore-loads-latest', async () => {
|
||||||
|
const projectDir = path.join(gstackHome, 'projects', slug);
|
||||||
|
const checkpointDir = path.join(projectDir, 'checkpoints');
|
||||||
|
fs.mkdirSync(checkpointDir, { recursive: true });
|
||||||
|
|
||||||
|
// Copy the /context-restore skill
|
||||||
|
copyDirSync(path.join(ROOT, 'context-restore'), path.join(workDir, 'context-restore'));
|
||||||
|
|
||||||
|
// Seed two files: older on branch-a (title "old-work"), newer on branch-b
|
||||||
|
// (title "newer-wintermute-work"). Current branch (main) matches neither.
|
||||||
|
const olderFile = path.join(checkpointDir, '20260101-120000-old-work.md');
|
||||||
|
const newerFile = path.join(checkpointDir, '20260202-130000-newer-wintermute-work.md');
|
||||||
|
fs.writeFileSync(olderFile, `---
|
||||||
|
status: in-progress
|
||||||
|
branch: branch-a
|
||||||
|
timestamp: 2026-01-01T12:00:00-07:00
|
||||||
|
---
|
||||||
|
|
||||||
|
## Working on: old work
|
||||||
|
|
||||||
|
### Summary
|
||||||
|
This is older work on branch-a.
|
||||||
|
|
||||||
|
### Remaining Work
|
||||||
|
1. Should NOT be loaded by default restore.
|
||||||
|
`);
|
||||||
|
fs.writeFileSync(newerFile, `---
|
||||||
|
status: in-progress
|
||||||
|
branch: branch-b
|
||||||
|
timestamp: 2026-02-02T13:00:00-07:00
|
||||||
|
---
|
||||||
|
|
||||||
|
## Working on: newer wintermute work
|
||||||
|
|
||||||
|
### Summary
|
||||||
|
This is the newest saved context. Cross-branch restore should load THIS file.
|
||||||
|
|
||||||
|
### Remaining Work
|
||||||
|
1. Finish the wintermute integration.
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Deliberately scramble mtimes so filesystem mtime DISAGREES with filename
|
||||||
|
// prefix — this proves we're using filename ordering, not ls -1t.
|
||||||
|
const pastOlderMtime = Math.floor(Date.now() / 1000); // now (newest mtime)
|
||||||
|
const pastNewerMtime = pastOlderMtime - 60 * 60 * 24 * 30; // 30 days ago
|
||||||
|
fs.utimesSync(olderFile, pastOlderMtime, pastOlderMtime);
|
||||||
|
fs.utimesSync(newerFile, pastNewerMtime, pastNewerMtime);
|
||||||
|
|
||||||
|
// Extract the restore-flow section from the skill template
|
||||||
|
const full = fs.readFileSync(path.join(ROOT, 'context-restore', 'SKILL.md'), 'utf-8');
|
||||||
|
const restoreStart = full.indexOf('## Restore flow');
|
||||||
|
const importantStart = full.indexOf('## Important Rules', restoreStart);
|
||||||
|
const restoreSection = full.slice(restoreStart, importantStart > restoreStart ? importantStart : undefined);
|
||||||
|
|
||||||
|
const result = await runSkillTest({
|
||||||
|
prompt: `You are testing the /context-restore skill. Follow these instructions to restore the most recent saved context.
|
||||||
|
|
||||||
|
${restoreSection.slice(0, 2500)}
|
||||||
|
|
||||||
|
IMPORTANT:
|
||||||
|
- Use GSTACK_HOME="${gstackHome}" as an environment variable when running bin scripts.
|
||||||
|
- The bin scripts are at ./bin/ (relative to this directory), not at ~/.claude/skills/gstack/bin/.
|
||||||
|
- Look in ${checkpointDir} for saved context files.
|
||||||
|
- Current branch is "main" — do NOT filter by current branch. Load across all branches.
|
||||||
|
- The newest file by YYYYMMDD-HHMMSS prefix is the canonical "most recent". Filesystem mtime has been scrambled — do not use it.
|
||||||
|
- Do NOT use AskUserQuestion. Just present the content of the newest file.`,
|
||||||
|
workingDirectory: workDir,
|
||||||
|
maxTurns: 8,
|
||||||
|
allowedTools: ['Bash', 'Read', 'Grep', 'Glob'],
|
||||||
|
timeout: 120_000,
|
||||||
|
testName: 'context-restore-loads-latest',
|
||||||
|
runId,
|
||||||
|
});
|
||||||
|
|
||||||
|
logCost('context-restore', result);
|
||||||
|
|
||||||
|
const output = result.output ?? '';
|
||||||
|
const loadedNewer = output.includes('newer wintermute work') || output.includes('wintermute integration');
|
||||||
|
const loadedOlder = output.includes('old work') && !output.includes('newer');
|
||||||
|
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||||
|
|
||||||
|
recordE2E(evalCollector, 'context-restore loads latest', 'Session Intelligence E2E', result, {
|
||||||
|
passed: exitOk && loadedNewer && !loadedOlder,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(exitOk).toBe(true);
|
||||||
|
expect(loadedNewer).toBe(true);
|
||||||
|
expect(loadedOlder).toBe(false);
|
||||||
|
|
||||||
|
console.log(`context-restore: loadedNewer=${loadedNewer}, loadedOlder=${loadedOlder}`);
|
||||||
}, 180_000);
|
}, 180_000);
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user