mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-15 08:48:42 +08:00
feat: worktree isolation for E2E tests + infrastructure elegance (v0.11.12.0) (#425)
* refactor: extract gen-skill-docs into modular resolver architecture Break the 3000-line monolith into 10 domain modules under scripts/resolvers/: types, constants, preamble, utility, browse, design, testing, review, codex-helpers, and index. Each module owns one domain of template generation. The preamble module introduces a 4-tier composition system (T1-T4) so skills only pay for the preamble sections they actually need, reducing token usage for lightweight skills by ~40%. Adds a token budget dashboard that prints after every generation run showing per-skill and total token counts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: tiered preamble — skills only pay for what they use Tag all 23 templates with preamble-tier (T1-T4). Lightweight skills like /browse and /benchmark get a minimal preamble (~40% fewer tokens), while review skills get the full stack. Regenerate all SKILL.md files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: migrate eval storage to project-scoped paths Move eval results and E2E run artifacts from ~/.gstack-dev/evals/ to ~/.gstack/projects/$SLUG/evals/ so each project's eval history lives alongside its other gstack data. Falls back to legacy path if slug detection fails. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: sync package.json version with VERSION after merge Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add WorktreeManager for isolated test environments Reusable platform module (lib/worktree.ts) that creates git worktrees for test isolation and harvests useful changes as patches. Includes SHA-256 dedup, original SHA tracking for committed change detection, and automatic gitignored artifact copying (.agents/, browse/dist/). 12 unit tests covering lifecycle, harvest, dedup, and error handling. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: integrate worktree isolation into E2E test infrastructure Add createTestWorktree(), harvestAndCleanup(), and describeWithWorktree() helpers to e2e-helpers.ts. Add harvest field to EvalTestEntry for eval-store integration. Register lib/worktree.ts as a global touchfile. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: run Gemini and Codex E2E tests in worktrees Switch both test suites from cwd: ROOT to worktree isolation. Gemini (--yolo) no longer pollutes the working tree. Codex (read-only) gets worktree for consistency. Useful changes are harvested as patches for cherry-picking. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: skip symlinks in copyDirSync to prevent infinite recursion Adversarial review caught that .claude/skills/gstack may be a symlink back to the repo root, causing copyDirSync to recurse infinitely when copying gitignored artifacts into worktrees. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: bump version and changelog (v0.11.12.0) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: relax session-awareness assertion to accept structured options The LLM consistently presents well-formatted A/B choices with pros/cons but doesn't always use the exact string "RECOMMENDATION". Accept case-insensitive "recommend", "option a", "which do you want", or "which approach" as equivalent signals of a structured recommendation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
132
scripts/resolvers/codex-helpers.ts
Normal file
132
scripts/resolvers/codex-helpers.ts
Normal file
@@ -0,0 +1,132 @@
|
||||
import type { Host } from './types';
|
||||
|
||||
const OPENAI_SHORT_DESCRIPTION_LIMIT = 120;
|
||||
|
||||
export function extractNameAndDescription(content: string): { name: string; description: string } {
|
||||
const fmStart = content.indexOf('---\n');
|
||||
if (fmStart !== 0) return { name: '', description: '' };
|
||||
const fmEnd = content.indexOf('\n---', fmStart + 4);
|
||||
if (fmEnd === -1) return { name: '', description: '' };
|
||||
|
||||
const frontmatter = content.slice(fmStart + 4, fmEnd);
|
||||
const nameMatch = frontmatter.match(/^name:\s*(.+)$/m);
|
||||
const name = nameMatch ? nameMatch[1].trim() : '';
|
||||
|
||||
let description = '';
|
||||
const lines = frontmatter.split('\n');
|
||||
let inDescription = false;
|
||||
const descLines: string[] = [];
|
||||
for (const line of lines) {
|
||||
if (line.match(/^description:\s*\|?\s*$/)) {
|
||||
inDescription = true;
|
||||
continue;
|
||||
}
|
||||
if (line.match(/^description:\s*\S/)) {
|
||||
description = line.replace(/^description:\s*/, '').trim();
|
||||
break;
|
||||
}
|
||||
if (inDescription) {
|
||||
if (line === '' || line.match(/^\s/)) {
|
||||
descLines.push(line.replace(/^ /, ''));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (descLines.length > 0) {
|
||||
description = descLines.join('\n').trim();
|
||||
}
|
||||
|
||||
return { name, description };
|
||||
}
|
||||
|
||||
export function condenseOpenAIShortDescription(description: string): string {
|
||||
const firstParagraph = description.split(/\n\s*\n/)[0] || description;
|
||||
const collapsed = firstParagraph.replace(/\s+/g, ' ').trim();
|
||||
if (collapsed.length <= OPENAI_SHORT_DESCRIPTION_LIMIT) return collapsed;
|
||||
|
||||
const truncated = collapsed.slice(0, OPENAI_SHORT_DESCRIPTION_LIMIT - 3);
|
||||
const lastSpace = truncated.lastIndexOf(' ');
|
||||
const safe = lastSpace > 40 ? truncated.slice(0, lastSpace) : truncated;
|
||||
return `${safe}...`;
|
||||
}
|
||||
|
||||
export function generateOpenAIYaml(displayName: string, shortDescription: string): string {
|
||||
return `interface:
|
||||
display_name: ${JSON.stringify(displayName)}
|
||||
short_description: ${JSON.stringify(shortDescription)}
|
||||
default_prompt: ${JSON.stringify(`Use ${displayName} for this task.`)}
|
||||
policy:
|
||||
allow_implicit_invocation: true
|
||||
`;
|
||||
}
|
||||
|
||||
export function codexSkillName(skillDir: string): string {
|
||||
if (skillDir === '.' || skillDir === '') return 'gstack';
|
||||
// Don't double-prefix: gstack-upgrade → gstack-upgrade (not gstack-gstack-upgrade)
|
||||
if (skillDir.startsWith('gstack-')) return skillDir;
|
||||
return `gstack-${skillDir}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform frontmatter for Codex: keep only name + description.
|
||||
* Strips allowed-tools, hooks, version, and all other fields.
|
||||
* Handles multiline block scalar descriptions (YAML | syntax).
|
||||
*/
|
||||
export function transformFrontmatter(content: string, host: Host): string {
|
||||
if (host === 'claude') return content;
|
||||
|
||||
// Find frontmatter boundaries
|
||||
const fmStart = content.indexOf('---\n');
|
||||
if (fmStart !== 0) return content; // frontmatter must be at the start
|
||||
const fmEnd = content.indexOf('\n---', fmStart + 4);
|
||||
if (fmEnd === -1) return content;
|
||||
|
||||
const body = content.slice(fmEnd + 4); // includes the leading \n after ---
|
||||
const { name, description } = extractNameAndDescription(content);
|
||||
|
||||
// Codex 1024-char description limit — fail build, don't ship broken skills
|
||||
const MAX_DESC = 1024;
|
||||
if (description.length > MAX_DESC) {
|
||||
throw new Error(
|
||||
`Codex description for "${name}" is ${description.length} chars (max ${MAX_DESC}). ` +
|
||||
`Compress the description in the .tmpl file.`
|
||||
);
|
||||
}
|
||||
|
||||
// Re-emit Codex frontmatter (name + description only)
|
||||
const indentedDesc = description.split('\n').map(l => ` ${l}`).join('\n');
|
||||
const codexFm = `---\nname: ${name}\ndescription: |\n${indentedDesc}\n---`;
|
||||
return codexFm + body;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract hook descriptions from frontmatter for inline safety prose.
|
||||
* Returns a description of what the hooks do, or null if no hooks.
|
||||
*/
|
||||
export function extractHookSafetyProse(tmplContent: string): string | null {
|
||||
if (!tmplContent.match(/^hooks:/m)) return null;
|
||||
|
||||
// Parse the hook matchers to build a human-readable safety description
|
||||
const matchers: string[] = [];
|
||||
const matcherRegex = /matcher:\s*"(\w+)"/g;
|
||||
let m;
|
||||
while ((m = matcherRegex.exec(tmplContent)) !== null) {
|
||||
if (!matchers.includes(m[1])) matchers.push(m[1]);
|
||||
}
|
||||
|
||||
if (matchers.length === 0) return null;
|
||||
|
||||
// Build safety prose based on what tools are hooked
|
||||
const toolDescriptions: Record<string, string> = {
|
||||
Bash: 'check bash commands for destructive operations (rm -rf, DROP TABLE, force-push, git reset --hard, etc.) before execution',
|
||||
Edit: 'verify file edits are within the allowed scope boundary before applying',
|
||||
Write: 'verify file writes are within the allowed scope boundary before applying',
|
||||
};
|
||||
|
||||
const safetyChecks = matchers
|
||||
.map(t => toolDescriptions[t] || `check ${t} operations for safety`)
|
||||
.join(', and ');
|
||||
|
||||
return `> **Safety Advisory:** This skill includes safety checks that ${safetyChecks}. When using this skill, always pause and verify before executing potentially destructive operations. If uncertain about a command's safety, ask the user for confirmation before proceeding.`;
|
||||
}
|
||||
Reference in New Issue
Block a user