test(browser-skills): gate-tier E2E for /scrape + /skillify (D4)

Five scenarios cover the productivity loop and the contracts locked during the v1.19.0.0 plan review: scrape-match-path — intent matching bundled hackernews-frontpage routes via $B skill run, no prototype phase scrape-prototype-path — no matching skill, drives $B against a local file:// fixture, returns JSON, suggests /skillify skillify-happy-path — /scrape then /skillify; skill written to ~/.gstack/browser-skills/<name>/ with the full file tree; SKILL.md prose body must not contain conversation fragments (D2) skillify-provenance-refusal — cold /skillify with no prior /scrape refuses with the D1 message; nothing on disk (D1) skillify-approval-reject — /scrape then /skillify but reject in the approval gate; temp dir is removed, nothing at the final tier path (D3) All five gate-tier (~$0.50-$1.50 each, ~$5 total per CI run). Set EVALS=1 to enable. Uses local file:// fixtures so prototype + skillify scenarios run deterministically without network. Touchfiles registers all 5 entries with proper deps on scrape/**, skillify/**, browse/src/browser-skill-write.ts, and the Phase 1 runtime modules. The match-path test depends on the bundled hackernews-frontpage skill so its touchfile includes browser-skills/hackernews-frontpage/**. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-18 18:32:28 +08:00 · 2026-04-27 18:34:07 -07:00
parent e0b454fe58
commit b5904dc11f
2 changed files with 482 additions and 0 deletions
--- a/test/helpers/touchfiles.ts
+++ b/test/helpers/touchfiles.ts
@@ -242,6 +242,29 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
  // Multi-provider benchmark adapters — live API smoke against real claude/codex/gemini CLIs
  'benchmark-providers-live': ['bin/gstack-model-benchmark', 'test/helpers/providers/**', 'test/helpers/benchmark-runner.ts', 'test/helpers/pricing.ts'],

+  // Browser-skills Phase 2a — /scrape + /skillify (v1.19.0.0). Gate-tier
+  // E2E covers the D1 (provenance guard), D3 (atomic write) contracts plus
+  // the basic loop. Shared deps: both skill templates, the D3 helper, the
+  // Phase 1 runtime, and the bundled hackernews-frontpage reference (the
+  // match-path test relies on it).
+  'scrape-match-path': [
+    'scrape/**', 'browse/src/browser-skills.ts', 'browse/src/browser-skill-commands.ts',
+    'browser-skills/hackernews-frontpage/**',
+  ],
+  'scrape-prototype-path': [
+    'scrape/**', 'browse/src/browser-skills.ts', 'browse/src/browser-skill-commands.ts',
+  ],
+  'skillify-happy-path': [
+    'skillify/**', 'scrape/**', 'browse/src/browser-skill-write.ts',
+    'browse/src/browser-skills.ts', 'browse/src/browser-skill-commands.ts',
+  ],
+  'skillify-provenance-refusal': [
+    'skillify/**', 'browse/src/browser-skill-write.ts',
+  ],
+  'skillify-approval-reject': [
+    'skillify/**', 'scrape/**', 'browse/src/browser-skill-write.ts',
+  ],
+
  // Skill routing — journey-stage tests (depend on ALL skill descriptions)
  'journey-ideation':       ['*/SKILL.md.tmpl', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
  'journey-plan-eng':       ['*/SKILL.md.tmpl', 'SKILL.md.tmpl', 'scripts/gen-skill-docs.ts'],
@@ -478,6 +501,13 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
  // Multi-provider benchmark — periodic (requires external CLIs + auth, paid)
  'benchmark-providers-live': 'periodic',

+  // Browser-skills Phase 2a — gate (D1/D3 contracts must not silently break)
+  'scrape-match-path': 'gate',
+  'scrape-prototype-path': 'gate',
+  'skillify-happy-path': 'gate',
+  'skillify-provenance-refusal': 'gate',
+  'skillify-approval-reject': 'gate',
+
  // Skill routing — periodic (LLM routing is non-deterministic)
  'journey-ideation': 'periodic',
  'journey-plan-eng': 'periodic',