mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-17 01:31:26 +08:00
merge: incorporate origin/main into community-mode branch
Conflicts resolved: - README.md: merge skill lists — keep /gstack-submit from our branch, add /plan-devex-review, /devex-review, /pair-agent from main. Accept main's team mode step 2 text. - setup: keep both our install ping (step 9) and main's team mode hook registration (step 10) - supabase/functions/telemetry-ingest/index.ts: keep our deletion (dead code removed earlier on this branch, main modified it) Main brought in: team mode (--team flag, auto-update hook, session tracking), /plan-devex-review + /devex-review skills, /pair-agent skill, open-gstack-browser, /checkpoint, /health, /humanizer. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -20,6 +20,7 @@ let tmpDir: string;
|
||||
describeIfSelected('Skill E2E tests', [
|
||||
'browse-basic', 'browse-snapshot', 'skillmd-setup-discovery',
|
||||
'skillmd-no-local-binary', 'skillmd-outside-git', 'session-awareness',
|
||||
'operational-learning',
|
||||
], () => {
|
||||
beforeAll(() => {
|
||||
testServer = startTestServer();
|
||||
@@ -177,49 +178,96 @@ Report the exact output — either "READY: <path>" or "NEEDS_SETUP".`,
|
||||
try { fs.rmSync(nonGitDir, { recursive: true, force: true }); } catch {}
|
||||
}, 60_000);
|
||||
|
||||
testConcurrentIfSelected('contributor-mode', async () => {
|
||||
const contribDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-contrib-'));
|
||||
const logsDir = path.join(contribDir, 'contributor-logs');
|
||||
fs.mkdirSync(logsDir, { recursive: true });
|
||||
testConcurrentIfSelected('operational-learning', async () => {
|
||||
const opDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-oplearn-'));
|
||||
const gstackHome = path.join(opDir, '.gstack-home');
|
||||
|
||||
// Init git repo
|
||||
const run = (cmd: string, args: string[]) =>
|
||||
spawnSync(cmd, args, { cwd: opDir, stdio: 'pipe', timeout: 5000 });
|
||||
run('git', ['init', '-b', 'main']);
|
||||
run('git', ['config', 'user.email', 'test@test.com']);
|
||||
run('git', ['config', 'user.name', 'Test']);
|
||||
fs.writeFileSync(path.join(opDir, 'app.ts'), 'console.log("hello");\n');
|
||||
run('git', ['add', '.']);
|
||||
run('git', ['commit', '-m', 'initial']);
|
||||
|
||||
// Copy bin scripts
|
||||
const binDir = path.join(opDir, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
for (const script of ['gstack-learnings-log', 'gstack-slug']) {
|
||||
fs.copyFileSync(path.join(ROOT, 'bin', script), path.join(binDir, script));
|
||||
fs.chmodSync(path.join(binDir, script), 0o755);
|
||||
}
|
||||
|
||||
// gstack-learnings-log will create the project dir automatically via gstack-slug
|
||||
|
||||
const result = await runSkillTest({
|
||||
prompt: `You are in contributor mode (gstack_contributor=true). You just ran this browse command and it failed:
|
||||
prompt: `You just ran \`npm test\` in this project and it failed with this error:
|
||||
|
||||
$ /nonexistent/browse goto https://example.com
|
||||
/nonexistent/browse: No such file or directory
|
||||
Error: --experimental-vm-modules flag is required for ESM support in this project.
|
||||
Run: npm test --experimental-vm-modules
|
||||
|
||||
Per the contributor mode instructions, file a field report to ${logsDir}/browse-missing-binary.md using the Write tool. Include all required sections: title, what you tried, what happened, rating, repro steps, raw output, what would make it a 10, and the date/version footer.`,
|
||||
workingDirectory: contribDir,
|
||||
Per the Operational Self-Improvement instructions below, log an operational learning about this failure.
|
||||
|
||||
## Operational Self-Improvement
|
||||
|
||||
Before completing, reflect on this session:
|
||||
- Did any commands fail unexpectedly?
|
||||
|
||||
If yes, log an operational learning for future sessions:
|
||||
|
||||
\`\`\`bash
|
||||
GSTACK_HOME="${gstackHome}" ${binDir}/gstack-learnings-log '{"skill":"qa","type":"operational","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"observed"}'
|
||||
\`\`\`
|
||||
|
||||
Replace SHORT_KEY with a kebab-case key like "esm-vm-modules-flag".
|
||||
Replace DESCRIPTION with a one-sentence description of what you learned.
|
||||
Replace N with a confidence score 1-10.
|
||||
|
||||
Log the operational learning now. Then say what you logged.`,
|
||||
workingDirectory: opDir,
|
||||
maxTurns: 5,
|
||||
timeout: 30_000,
|
||||
testName: 'contributor-mode',
|
||||
testName: 'operational-learning',
|
||||
runId,
|
||||
});
|
||||
|
||||
logCost('contributor mode', result);
|
||||
// Override passed: this test intentionally triggers a browse error (nonexistent binary)
|
||||
// so browseErrors will be non-empty — that's expected, not a failure
|
||||
recordE2E(evalCollector, 'contributor mode report', 'Skill E2E tests', result, {
|
||||
passed: result.exitReason === 'success',
|
||||
logCost('operational learning', result);
|
||||
|
||||
const exitOk = ['success', 'error_max_turns'].includes(result.exitReason);
|
||||
|
||||
// Check if learnings file was created with an operational entry
|
||||
// The slug is derived from the git repo (dirname), so search all project dirs
|
||||
let hasOperational = false;
|
||||
const projectsDir = path.join(gstackHome, 'projects');
|
||||
if (fs.existsSync(projectsDir)) {
|
||||
for (const slug of fs.readdirSync(projectsDir)) {
|
||||
const lPath = path.join(projectsDir, slug, 'learnings.jsonl');
|
||||
if (fs.existsSync(lPath)) {
|
||||
const jsonl = fs.readFileSync(lPath, 'utf-8').trim();
|
||||
if (jsonl) {
|
||||
const entries = jsonl.split('\n').map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
|
||||
const opEntry = entries.find(e => e.type === 'operational');
|
||||
if (opEntry) {
|
||||
hasOperational = true;
|
||||
console.log(`Operational learning logged: key="${opEntry.key}" insight="${opEntry.insight}" (slug: ${slug})`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recordE2E(evalCollector, 'operational learning', 'Skill E2E tests', result, {
|
||||
passed: exitOk && hasOperational,
|
||||
});
|
||||
|
||||
// Verify a contributor log was created with expected format
|
||||
const logFiles = fs.readdirSync(logsDir).filter(f => f.endsWith('.md'));
|
||||
expect(logFiles.length).toBeGreaterThan(0);
|
||||
|
||||
// Verify report has key structural sections (agent may phrase differently)
|
||||
const logContent = fs.readFileSync(path.join(logsDir, logFiles[0]), 'utf-8');
|
||||
// Must have a title (# heading)
|
||||
expect(logContent).toMatch(/^#\s/m);
|
||||
// Must mention the failed command or browse
|
||||
expect(logContent).toMatch(/browse|nonexistent|not found|no such file/i);
|
||||
// Must have some kind of rating
|
||||
expect(logContent).toMatch(/rating|\/10/i);
|
||||
// Must have steps or reproduction info
|
||||
expect(logContent).toMatch(/step|repro|reproduce/i);
|
||||
expect(exitOk).toBe(true);
|
||||
expect(hasOperational).toBe(true);
|
||||
|
||||
// Clean up
|
||||
try { fs.rmSync(contribDir, { recursive: true, force: true }); } catch {}
|
||||
try { fs.rmSync(opDir, { recursive: true, force: true }); } catch {}
|
||||
}, 90_000);
|
||||
|
||||
testConcurrentIfSelected('session-awareness', async () => {
|
||||
|
||||
Reference in New Issue
Block a user