diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c792f1c4..1a3225c07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,61 @@ # Changelog +## [1.38.1.0] - 2026-05-14 + +## **Every review skill ends with a build-actionable task checklist. Federation sync stops dropping office-hours design docs. Surrogate sanitization gets a defense-in-depth second layer on top of v1.38.0.0's choke point.** +## **Two community-filed issues land as one wave: per-skill Implementation Tasks with JSONL handoff to `/autoplan`, and root-level artifact patterns in `.brain-allowlist`. Plus a testable `buildCommandResponse` extraction and JSON-escape sanitizer on top of v1.38.0.0's `handleCommandInternal` choke-point fix for #1440.** + +v1.38.0.0 (just shipped) put surrogate sanitization at the architectural choke point inside `handleCommandInternal` — every command result is now sanitized once before any caller (HTTP, `/batch`, scoped-token dispatch) sees it. This release adds a defense-in-depth second layer: `buildCommandResponse` is extracted from `handleCommand` as an exported pure function, so the HTTP-response boundary is independently unit-testable, and a `stripLoneSurrogateEscapes` pass handles `\uXXXX` JSON escape sequences in case any payload was already JSON-stringified before reaching the choke point. The two layers compose: choke point catches raw surrogates at result-build time, boundary catches anything that slipped through as escape text. + +All four review skills (CEO / design / eng / DX) now end with an `## Implementation Tasks` markdown checklist and write a `jq`-built JSONL artifact to `~/.gstack/projects/$SLUG/tasks-{phase}-{datetime}.jsonl`. `/autoplan`'s Phase 4 reads all four files, scopes by current branch + 5-commit window, dedupes on exact `(component, sorted(files), title)` matches, and renders one aggregated list inside the final approval gate. Tasks that derive from the same finding now collapse; tasks that just happen to touch the same file with different titles surface separately so the human can decide whether they're the same work. Standalone review runs (`/plan-eng-review` alone, etc.) produce their own task list and JSONL file even outside autoplan — the JSONL is the handoff contract. + +Federation sync (`gstack-brain-sync`) was silently skipping root-level design and test-plan docs — `/office-hours` and `/plan-eng-review` write at `projects/{slug}/{user}-{branch}-design-*.md`, but the allowlist only knew about `projects/*/designs/*.md` and `projects/*/ceo-plans/*.md`. New patterns ship in `.brain-allowlist`, `.brain-privacy-map.json` (classified as `artifact`), and `.gitattributes` (with `merge=union` to handle cross-machine conflicts). An idempotent jq-based migration (`gstack-upgrade/migrations/v1.38.1.0.sh`) patches existing installs in-place without re-running `gstack-artifacts-init` (which would have done a git commit + push and clobbered user state). + +### The numbers that matter + +Source: `bun test browse/test/sanitize.test.ts browse/test/build-command-response.test.ts test/artifacts-init-migration.test.ts` — 32 new unit tests covering every fix surface, all green. + +| Surface | Before | After | +|---|---|---| +| API 400 from `$B text` on surrogate-containing page | Crash | Sanitized at extraction + chokepoint | +| API 400 from `$B html`, `$B accessibility`, `$B batch` | Crash (chokepoint bypassed) | Sanitized at `buildCommandResponse` + `/batch` envelope | +| Application/json bodies with `\uXXXX` escape surrogates | Still crash (regex matches raw codepoints only) | Second-pass `stripLoneSurrogateEscapes` handles escape text | +| `/autoplan` final output | Decision summary, no task list | Decision summary **plus** aggregated `Implementation Tasks` from all 4 phases | +| Standalone `/plan-eng-review` output | Required-outputs sections, no task list | Same **plus** per-skill `Implementation Tasks` + JSONL handoff | +| `/office-hours` design docs in federation queue | Silently skipped (root-level not in allowlist) | Queued, classified `artifact`, union-merge rule applied | +| Lone surrogate sanitizer perf on 1MB clean text | n/a | <500ms (single regex pass) | +| `buildCommandResponse` testability | Embedded inside `handleCommand`, not exported | Extracted, exported, 7 unit tests cover it | + +### What this means for builders + +Page captures with mixed-script Unicode round-trip cleanly to the Claude API now. Every review skill you run ends with a checkbox list of build tasks you can hand to Claude Code or Codex. Federation sync picks up the design docs that were silently dropping out of your brain repo. Run `/gstack-upgrade` to pick up the migration that patches your `.brain-allowlist`, `.brain-privacy-map.json`, and `.gitattributes` in place; no commit + push, no user-state clobber. + +### Itemized changes + +#### Fixed + +- **Defense in depth on top of v1.38.0.0's surrogate sanitization (#1440)** — v1.38.0.0 sanitizes at `handleCommandInternal` (the choke point all callers go through). This release adds a second layer at the HTTP-response boundary: `browse/src/sanitize.ts` (new) exports `stripLoneSurrogates`, `stripLoneSurrogateEscapes` (handles `\uXXXX` JSON-escape variants the raw-codepoint regex misses), and `sanitizeBody` (picks the right pass for text/plain vs application/json). `buildCommandResponse` is extracted from `handleCommand` and exported so the response boundary is unit-testable without spinning up the server. `/batch` also gets a per-result + envelope sanitize as belt-and-suspenders. Defense-in-depth wraps at `getCleanText`, `getCleanTextWithStripping`, `html`, `accessibility`, and `snapshot` extraction sites so downstream consumers (datamarking, envelope wrapping) see clean text before any further processing. +- **Federation sync drops `/office-hours` and `/plan-eng-review` artifacts (#1452)** — `bin/gstack-artifacts-init` adds `projects/*/*-design-*.md` and `projects/*/*-test-plan-*.md` to all three managed blocks: `.brain-allowlist`, `.brain-privacy-map.json` (class `artifact`), and `.gitattributes` (`merge=union`). +- **`/setup-gbrain` wrong config key (#1441)** — verified already-fixed in v1.27.0.0; closed the issue with a comment citing the migration script that aligns legacy `gbrain_sync_mode` installs to the current `artifacts_sync_mode` key. + +#### Added + +- **`## Implementation Tasks` section + JSONL handoff in every review skill (#1454)** — `plan-ceo-review`, `plan-design-review`, `plan-eng-review`, `plan-devex-review` each emit a per-skill markdown checklist and write `~/.gstack/projects/$SLUG/tasks-{phase}-{datetime}.jsonl` via `jq -nc` (never hand-rolled echo). `/autoplan` Phase 4 reads all four phase JSONL files, scopes by current branch and 5-commit window, dedupes on exact `(component, sorted(files), title)` matches, and renders one aggregated list. Near-duplicates surface separately with a possible-duplicate note for human resolution. +- **`browse/src/sanitize.ts`** — two surrogate-stripping utilities plus a convenience selector keyed on content-type. Pairs with a refactored `buildCommandResponse` in `server.ts` (exported for testability) and per-result sanitization in the `/batch` handler. +- **`gstack-upgrade/migrations/v1.38.1.0.sh`** — idempotent per-file repair for `.brain-allowlist`, `.brain-privacy-map.json`, and `.gitattributes`. Uses `jq` for the JSON file (preserves validity); falls back with a clear warning if `jq` is missing. Does NOT re-run `gstack-artifacts-init` (which would commit + push to the user's federated repo). +- **32 new unit tests** across `browse/test/sanitize.test.ts` (18), `browse/test/build-command-response.test.ts` (7), `test/artifacts-init-migration.test.ts` (7). All gate-tier (free, runs on every PR). + +#### Changed + +- **`browse/src/snapshot.ts`, `read-commands.ts`, `content-security.ts`** — defense-in-depth surrogate wraps at extraction sites that feed pre-Response consumers (datamarking, envelope wrapping). +- **`scripts/resolvers/tasks-section.ts`** (new) + **`scripts/task-emission-schema.ts`** (new) — shared resolver and schema for the per-skill task emission. Each review template invokes `{{TASKS_SECTION_EMIT:}}` once. + +#### For contributors + +- `/codex review` on Codex CLI ≥0.130.0 was handled separately by v1.34.2.0 (the dual-path bare/exec approach). Our planning surfaced an adjacent concern: the bare path no longer carries the filesystem boundary, so codex may waste tokens reading skill files when the diff happens to touch `.claude/skills/`. Filed as a follow-up issue; not blocking this release. +- The implementation-tasks aggregation in `/autoplan` uses a structured JSONL handoff between phases rather than re-parsing markdown. Schema lives in `scripts/task-emission-schema.ts`. Adding a fifth review phase means adding the phase name to `VALID_PHASES` in `scripts/resolvers/tasks-section.ts` and including `{{TASKS_SECTION_EMIT:}}` in the new review template. +- Touchfiles entries are unchanged — the new tests are all gate-tier unit tests that run on `bun test`. Touchfiles is only for E2E + LLM evals. + ## [1.38.0.0] - 2026-05-14 ## **Windows install actually works across every host adapter. Page scrapes survive lone Unicode surrogates on every egress path.** diff --git a/VERSION b/VERSION index be9cbf2dc..446eb32e7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.38.0.0 +1.38.1.0 diff --git a/autoplan/SKILL.md b/autoplan/SKILL.md index c64e6e8bd..462cd9dc0 100644 --- a/autoplan/SKILL.md +++ b/autoplan/SKILL.md @@ -1599,6 +1599,81 @@ noting which items are incomplete. Do not loop indefinitely. ## Phase 4: Final Approval Gate +## Implementation Tasks aggregator + +Before rendering the Final Approval Gate output block below, aggregate the +per-phase task lists each review skill wrote. + +```bash +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" +TASKS_DIR="${HOME}/.gstack/projects/${SLUG:-unknown}" +BRANCH=$(git branch --show-current 2>/dev/null || echo unknown) +# Commit window: last 5 commits on this branch. Drops stale standalone reviews. +COMMITS_RECENT=$(git log --format=%H -n 5 2>/dev/null | tr '\n' '|' | sed 's/|$//') + +AGGREGATED_TASKS="" +if command -v jq >/dev/null 2>&1; then + # Collect entries from all 4 phases, scoped to current branch + commit window. + # For each phase, keep only the latest run_id. Within the surviving set, + # dedupe by (component, sorted(files), title) — exact match only. + # Sort by priority (P1 > P2 > P3) then by phase order. + ALL_JSONL=$(mktemp -t autoplan-tasks.XXXXXXXX) + for phase in ceo-review design-review eng-review devex-review; do + # Use find instead of glob expansion — zsh nomatch errors otherwise when + # a phase produced no JSONL files. Sorting by name keeps the order stable. + while IFS= read -r f; do + [ -f "$f" ] || continue + # Filter to current branch + recent commits, then keep records for the + # latest run_id only. (Single phase may have multiple files if the user + # re-ran the review; aggregator takes the newest.) + jq -c --arg branch "$BRANCH" --arg commits "$COMMITS_RECENT" \ + 'select(.branch == $branch and ($commits | split("|") | index(.commit) != null))' \ + "$f" 2>/dev/null >> "$ALL_JSONL" || true + done < <(find "$TASKS_DIR" -maxdepth 1 -name "tasks-$phase-*.jsonl" 2>/dev/null | sort) + # Reduce to latest run_id per phase + if [ -s "$ALL_JSONL" ]; then + jq -sc --arg phase "$phase" \ + '[.[] | select(.phase == $phase)] | (max_by(.run_id) // null) as $latest_run | if $latest_run then map(select(.run_id == $latest_run.run_id)) else [] end | .[]' \ + "$ALL_JSONL" > "$ALL_JSONL.phase" 2>/dev/null || true + # Replace with reduced version for this phase, accumulating others + jq -c --arg phase "$phase" 'select(.phase != $phase)' "$ALL_JSONL" > "$ALL_JSONL.other" 2>/dev/null || true + cat "$ALL_JSONL.other" "$ALL_JSONL.phase" > "$ALL_JSONL" + rm -f "$ALL_JSONL.phase" "$ALL_JSONL.other" + fi + done + + # Exact-match dedup by (component, sorted(files), title). Non-matches kept + # separately with a possible-duplicate marker injected by the renderer. + AGGREGATED_TASKS=$(jq -s \ + 'group_by([.component, (.files | sort), .title]) + | map( + # Take the highest-priority entry per group; tie-break by phase order + sort_by({P1:0,P2:1,P3:2}[.priority] // 99, {"ceo-review":0,"design-review":1,"eng-review":2,"devex-review":3}[.phase] // 99) | .[0] + ) + | sort_by({P1:0,P2:1,P3:2}[.priority] // 99, {"ceo-review":0,"design-review":1,"eng-review":2,"devex-review":3}[.phase] // 99) + | if length == 0 then "_No actionable tasks emitted from any phase._" else + map("- [ ] **\(.id) (\(.priority), human: \(.effort_human) / CC: \(.effort_cc)) — \(.component)** — \(.title)\n - Surfaced by: \(.phase) — \(.source_finding)\n - Files: \(.files | join(", "))") | join("\n") + end' "$ALL_JSONL" 2>/dev/null | sed 's/^"//;s/"$//;s/\\n/\n/g') + rm -f "$ALL_JSONL" +else + AGGREGATED_TASKS="_jq not installed — install jq to aggregate per-phase task lists. Skipping._" +fi +``` + +Inside the Final Approval Gate output template below, render the aggregated +markdown in the `### Implementation Tasks (aggregated across phases)` section. +Substitute the contents of `$AGGREGATED_TASKS` (the bash variable set above) +before printing the message to the user. This is NOT a template placeholder +— the agent does the substitution at runtime, not gen-skill-docs at build time. + +If `$AGGREGATED_TASKS` is empty (no JSONL files found — none of the review +skills ran in this session), render: + +`_No per-phase task lists found in $TASKS_DIR for branch $BRANCH. Each review +skill writes its own; if you ran one of them but no list appears here, check +that jq is installed and the tasks--*.jsonl files exist._` + + **STOP here and present the final state to the user.** Present as a message, then use AskUserQuestion: @@ -1649,6 +1724,10 @@ I recommend [X] — [principle]. But [Y] is also viable: ### Deferred to TODOS.md [Items auto-deferred with reasons] + +### Implementation Tasks (aggregated across phases) +[Substitute the contents of $AGGREGATED_TASKS computed above. If empty: +"_No per-phase task lists found in $TASKS_DIR for branch $BRANCH._"] ``` **Cognitive load management:** diff --git a/autoplan/SKILL.md.tmpl b/autoplan/SKILL.md.tmpl index 6577a6725..888cddabb 100644 --- a/autoplan/SKILL.md.tmpl +++ b/autoplan/SKILL.md.tmpl @@ -769,6 +769,8 @@ noting which items are incomplete. Do not loop indefinitely. ## Phase 4: Final Approval Gate +{{TASKS_SECTION_AGGREGATE}} + **STOP here and present the final state to the user.** Present as a message, then use AskUserQuestion: @@ -819,6 +821,10 @@ I recommend [X] — [principle]. But [Y] is also viable: ### Deferred to TODOS.md [Items auto-deferred with reasons] + +### Implementation Tasks (aggregated across phases) +[Substitute the contents of $AGGREGATED_TASKS computed above. If empty: +"_No per-phase task lists found in $TASKS_DIR for branch $BRANCH._"] ``` **Cognitive load management:** diff --git a/bin/gstack-artifacts-init b/bin/gstack-artifacts-init index c182077fe..3dcb339ca 100755 --- a/bin/gstack-artifacts-init +++ b/bin/gstack-artifacts-init @@ -227,6 +227,8 @@ projects/*/ceo-plans/*.md projects/*/ceo-plans/*/*.md projects/*/designs/*.md projects/*/designs/*/*.md +projects/*/*-design-*.md +projects/*/*-test-plan-*.md projects/*/timeline.jsonl retros/*.md developer-profile.json @@ -252,6 +254,8 @@ cat > "$GSTACK_HOME/.brain-privacy-map.json" <<'EOF' {"pattern": "projects/*/ceo-plans/*/*.md", "class": "artifact"}, {"pattern": "projects/*/designs/*.md", "class": "artifact"}, {"pattern": "projects/*/designs/*/*.md", "class": "artifact"}, + {"pattern": "projects/*/*-design-*.md", "class": "artifact"}, + {"pattern": "projects/*/*-test-plan-*.md", "class": "artifact"}, {"pattern": "retros/*.md", "class": "artifact"}, {"pattern": "builder-journey.md", "class": "artifact"}, {"pattern": "projects/*/timeline.jsonl", "class": "behavioral"}, @@ -268,6 +272,8 @@ cat > "$GSTACK_HOME/.gitattributes" <<'EOF' retros/*.md merge=union projects/*/designs/**/*.md merge=union projects/*/ceo-plans/**/*.md merge=union +projects/*/*-design-*.md merge=union +projects/*/*-test-plan-*.md merge=union EOF # ---- register merge drivers in local git config ---- diff --git a/browse/src/content-security.ts b/browse/src/content-security.ts index 659622679..81993271b 100644 --- a/browse/src/content-security.ts +++ b/browse/src/content-security.ts @@ -12,6 +12,7 @@ import { randomBytes } from 'crypto'; import type { Page, Frame } from 'playwright'; +import { stripLoneSurrogates } from './sanitize'; // ─── Datamarking (Layer 1) ────────────────────────────────────── @@ -167,7 +168,7 @@ export async function markHiddenElements(page: Page | Frame): Promise * Uses clone + remove approach: clones body, removes marked elements, returns innerText. */ export async function getCleanTextWithStripping(page: Page | Frame): Promise { - return page.evaluate(() => { + const raw = await page.evaluate(() => { const body = document.body; if (!body) return ''; const clone = body.cloneNode(true) as HTMLElement; @@ -181,6 +182,7 @@ export async function getCleanTextWithStripping(page: Page | Frame): Promise line.length > 0) .join('\n'); }); + return stripLoneSurrogates(raw); } /** diff --git a/browse/src/read-commands.ts b/browse/src/read-commands.ts index 367770eea..486eac18a 100644 --- a/browse/src/read-commands.ts +++ b/browse/src/read-commands.ts @@ -14,6 +14,7 @@ import * as path from 'path'; import { TEMP_DIR } from './platform'; import { inspectElement, formatInspectorResult, getModificationHistory } from './cdp-inspector'; import { validateReadPath } from './path-security'; +import { stripLoneSurrogates } from './sanitize'; // Re-export for backward compatibility (tests import from read-commands) export { validateReadPath } from './path-security'; @@ -50,7 +51,7 @@ function wrapForEvaluate(code: string): string { * Exported for DRY reuse in meta-commands (diff). */ export async function getCleanText(page: Page | Frame): Promise { - return page.evaluate(() => { + const raw = await page.evaluate(() => { const body = document.body; if (!body) return ''; const clone = body.cloneNode(true) as HTMLElement; @@ -61,6 +62,7 @@ export async function getCleanText(page: Page | Frame): Promise { .filter(line => line.length > 0) .join('\n'); }); + return stripLoneSurrogates(raw); } /** @@ -115,9 +117,9 @@ export async function handleReadCommand( if (selector) { const resolved = await session.resolveRef(selector); if ('locator' in resolved) { - return resolved.locator.innerHTML({ timeout: 5000 }); + return stripLoneSurrogates(await resolved.locator.innerHTML({ timeout: 5000 })); } - return target.locator(resolved.selector).innerHTML({ timeout: 5000 }); + return stripLoneSurrogates(await target.locator(resolved.selector).innerHTML({ timeout: 5000 })); } // page.content() is page-only; use evaluate for frame compat const doctype = await target.evaluate(() => { @@ -125,7 +127,7 @@ export async function handleReadCommand( return dt ? `` : ''; }); const html = await target.evaluate(() => document.documentElement.outerHTML); - return doctype ? `${doctype}\n${html}` : html; + return stripLoneSurrogates(doctype ? `${doctype}\n${html}` : html); } case 'links': { @@ -173,7 +175,7 @@ export async function handleReadCommand( case 'accessibility': { const snapshot = await target.locator("body").ariaSnapshot(); - return snapshot; + return stripLoneSurrogates(snapshot); } case 'js': { diff --git a/browse/src/sanitize.ts b/browse/src/sanitize.ts new file mode 100644 index 000000000..800a077d4 --- /dev/null +++ b/browse/src/sanitize.ts @@ -0,0 +1,34 @@ +// Lone Unicode surrogate sanitization. +// +// Lone surrogates (\uD800-\uDFFF without a matching pair) are valid UTF-16 +// but invalid UTF-8, so JSON.stringify produces output the Claude API rejects +// with HTTP 400 "no low surrogate in string". Page captures from real-world +// HTML hit this when content contains broken emoji bytes or mid-emoji splits. +// +// Two sanitizers are needed because both forms appear in browse responses: +// - Raw UTF-16 surrogates in text/plain bodies (pre-stringify state). +// - JSON \uXXXX escape sequences after JSON.stringify already ran. +// Both replace lone surrogates with U+FFFD (replacement character). + +const LONE_SURROGATE_HIGH = /[\uD800-\uDBFF](?![\uDC00-\uDFFF])/g; +const LONE_SURROGATE_LOW = /(? { - const cr = await handleCommandInternal(body, tokenInfo); +/** + * Build the HTTP response from a CommandResult. Pure function so it can be + * unit-tested without spinning up the server (#1440). Defense in depth on top + * of handleCommandInternal's choke-point sanitization: this catches any + * \uXXXX JSON-escape surrogate forms that the raw-codepoint regex above + * misses when the body has already been JSON-stringified. + */ +export function buildCommandResponse(cr: CommandResult): Response { const contentType = cr.json ? 'application/json' : 'text/plain'; - return new Response(cr.result, { + const safeBody = typeof cr.result === 'string' ? sanitizeBody(cr.result, !!cr.json) : cr.result; + return new Response(safeBody, { status: cr.status, headers: { 'Content-Type': contentType, ...cr.headers }, }); } +/** HTTP wrapper — converts CommandResult to Response */ +async function handleCommand(body: any, tokenInfo?: TokenInfo | null): Promise { + const cr = await handleCommandInternal(body, tokenInfo); + return buildCommandResponse(cr); +} + async function shutdown(exitCode: number = 0) { if (isShuttingDown) return; isShuttingDown = true; @@ -2017,10 +2030,13 @@ export async function start() { tokenInfo, { skipRateCheck: true, skipActivity: true }, ); + // Sanitize lone surrogates per-result (#1440 — /batch bypasses the + // handleCommand chokepoint, so it needs its own sanitization). + const safeResult = typeof cr.result === 'string' ? sanitizeBody(cr.result, !!cr.json) : cr.result; results.push({ index: i, status: cr.status, - result: cr.result, + result: safeResult, command: cmd.command, tabId: cmd.tabId, }); @@ -2040,13 +2056,17 @@ export async function start() { clientId: tokenInfo?.clientId, }); - return new Response(JSON.stringify({ + // Sanitize the JSON envelope a second time (defense in depth) — catches + // any \uXXXX escape sequences for lone surrogates that survived the + // per-result pass. + const batchBody = stripLoneSurrogateEscapes(JSON.stringify({ results, duration, total: commands.length, succeeded: results.filter(r => r.status === 200).length, failed: results.filter(r => r.status !== 200).length, - }), { + })); + return new Response(batchBody, { status: 200, headers: { 'Content-Type': 'application/json' }, }); diff --git a/browse/src/snapshot.ts b/browse/src/snapshot.ts index 103296e3e..0ed80f0c7 100644 --- a/browse/src/snapshot.ts +++ b/browse/src/snapshot.ts @@ -22,6 +22,7 @@ import type { TabSession, RefEntry } from './tab-session'; import * as Diff from 'diff'; import { TEMP_DIR, isPathWithin } from './platform'; import { escapeEnvelopeSentinels } from './content-security'; +import { stripLoneSurrogates } from './sanitize'; // Roles considered "interactive" for the -i flag const INTERACTIVE_ROLES = new Set([ @@ -576,7 +577,7 @@ export async function handleSnapshot( } session.setLastSnapshot(snapshotText); - return diffOutput.join('\n'); + return stripLoneSurrogates(diffOutput.join('\n')); } // Store for future diffs @@ -623,8 +624,8 @@ export async function handleSnapshot( parts.push('═══ BEGIN UNTRUSTED WEB CONTENT ═══'); parts.push(...safeUntrusted); parts.push('═══ END UNTRUSTED WEB CONTENT ═══'); - return parts.join('\n'); + return stripLoneSurrogates(parts.join('\n')); } - return output.join('\n'); + return stripLoneSurrogates(output.join('\n')); } diff --git a/browse/test/build-command-response.test.ts b/browse/test/build-command-response.test.ts new file mode 100644 index 000000000..e11cc9774 --- /dev/null +++ b/browse/test/build-command-response.test.ts @@ -0,0 +1,68 @@ +// Unit test for buildCommandResponse — the exported response builder that +// sanitizes lone Unicode surrogates at the HTTP boundary (#1440, D7 + D13). +// +// The function is exported from server.ts specifically so we can test it +// without spinning up a Bun server. Codex flagged in D13 finding 14 that +// "mock cr.result" wasn't testable when handleCommand was the only entry +// point; this refactor solves that. + +import { describe, expect, test } from 'bun:test'; +import { buildCommandResponse } from '../src/server'; + +describe('buildCommandResponse', () => { + test('sanitizes lone surrogates in text/plain body', async () => { + const cr = { status: 200, result: `pre\uD800post`, json: false }; + const res = buildCommandResponse(cr as any); + expect(res.headers.get('content-type')).toBe('text/plain'); + expect(await res.text()).toBe(`pre�post`); + }); + + test('sanitizes lone escape sequences in application/json body', async () => { + // cr.result is already JSON-stringified by handleCommand callers when + // cr.json=true. Surrogate escape sequences in the stringified form must + // be neutralized. + const cr = { status: 200, result: '{"name":"\\uD800"}', json: true }; + const res = buildCommandResponse(cr as any); + expect(res.headers.get('content-type')).toBe('application/json'); + expect(await res.text()).toBe('{"name":"\\uFFFD"}'); + }); + + test('non-string cr.result passes through unchanged', async () => { + // Some commands return Buffers or other ArrayBuffer-shaped bodies (e.g. + // screenshots). Sanitizer must NOT touch them. + const buf = new Uint8Array([1, 2, 3, 4]); + const cr = { status: 200, result: buf, json: false }; + const res = buildCommandResponse(cr as any); + // body returned verbatim; reading as array buffer should give same bytes + const out = new Uint8Array(await res.arrayBuffer()); + expect(out.length).toBe(4); + expect(out[0]).toBe(1); + expect(out[3]).toBe(4); + }); + + test('clean text passes through unchanged', async () => { + const cr = { status: 200, result: 'Hello, world!', json: false }; + const res = buildCommandResponse(cr as any); + expect(await res.text()).toBe('Hello, world!'); + }); + + test('status code propagates', async () => { + const cr = { status: 404, result: 'Not found', json: false }; + const res = buildCommandResponse(cr as any); + expect(res.status).toBe(404); + }); + + test('extra headers propagate', async () => { + const cr = { status: 200, result: 'ok', json: false, headers: { 'X-Custom': 'value' } }; + const res = buildCommandResponse(cr as any); + expect(res.headers.get('x-custom')).toBe('value'); + }); + + test('JSON error body with lone surrogate is sanitized', async () => { + // Errors set cr.json=true; a stringified error containing surrogates would + // still crash the API without this sanitization. + const cr = { status: 500, result: '{"error":"crash at \\uDC00 byte"}', json: true }; + const res = buildCommandResponse(cr as any); + expect(await res.text()).toBe('{"error":"crash at \\uFFFD byte"}'); + }); +}); diff --git a/browse/test/content-security.test.ts b/browse/test/content-security.test.ts index 6c98e3a3d..8c760460d 100644 --- a/browse/test/content-security.test.ts +++ b/browse/test/content-security.test.ts @@ -584,7 +584,17 @@ describe('Envelope sentinel escape', () => { test('scoped snapshot branch applies escapeEnvelopeSentinels to untrusted lines', () => { const branchStart = SNAPSHOT_SRC.indexOf('splitForScoped'); expect(branchStart).toBeGreaterThan(-1); - const branchEnd = SNAPSHOT_SRC.indexOf("return output.join('\\n');", branchStart); + // Match either the original return (pre-#1440) or the surrogate-sanitized + // form (post-#1440) — both end the scoped branch. + const candidates = [ + "return output.join('\\n');", + "return stripLoneSurrogates(output.join('\\n'));", + ]; + let branchEnd = -1; + for (const c of candidates) { + const idx = SNAPSHOT_SRC.indexOf(c, branchStart); + if (idx > branchStart) { branchEnd = idx; break; } + } expect(branchEnd).toBeGreaterThan(branchStart); const branch = SNAPSHOT_SRC.slice(branchStart, branchEnd); // The escape helper must be invoked on the untrusted lines, and diff --git a/browse/test/sanitize.test.ts b/browse/test/sanitize.test.ts new file mode 100644 index 000000000..618ff5070 --- /dev/null +++ b/browse/test/sanitize.test.ts @@ -0,0 +1,112 @@ +// Unit tests for browse/src/sanitize.ts (#1440). +// Covers stripLoneSurrogates (raw UTF-16) and stripLoneSurrogateEscapes +// (\uXXXX escape text) used by the response chokepoints. + +import { describe, expect, test } from 'bun:test'; +import { stripLoneSurrogates, stripLoneSurrogateEscapes, sanitizeBody } from '../src/sanitize'; + +describe('stripLoneSurrogates', () => { + test('replaces lone high surrogate with U+FFFD', () => { + const lone = '\uD800x'; + const out = stripLoneSurrogates(lone); + expect(out).toBe('�x'); + }); + + test('replaces lone low surrogate with U+FFFD', () => { + const lone = 'x\uDC00'; + expect(stripLoneSurrogates(lone)).toBe('x�'); + }); + + test('leaves valid surrogate pairs (emoji) unchanged', () => { + const smiley = '😀'; // U+1F600 = 😀 + expect(stripLoneSurrogates(smiley)).toBe(smiley); + }); + + test('empty string is unchanged', () => { + expect(stripLoneSurrogates('')).toBe(''); + }); + + test('mixed valid + lone surrogates', () => { + const input = `a\uD800b😀c\uDC00d`; + const out = stripLoneSurrogates(input); + expect(out).toBe(`a�b😀c�d`); + }); + + test('clean text passes through unchanged', () => { + const text = 'The quick brown fox jumps over 13 lazy dogs.'; + expect(stripLoneSurrogates(text)).toBe(text); + }); + + test('high surrogate immediately followed by high surrogate replaces both individually', () => { + const input = '\uD800\uD801'; // two lone highs in a row, neither paired + const out = stripLoneSurrogates(input); + expect(out).toBe('��'); + }); +}); + +describe('stripLoneSurrogateEscapes', () => { + test('replaces lone high surrogate ESCAPE with \\uFFFD', () => { + const json = '{"name":"\\uD800"}'; + expect(stripLoneSurrogateEscapes(json)).toBe('{"name":"\\uFFFD"}'); + }); + + test('replaces lone low surrogate ESCAPE with \\uFFFD', () => { + const json = '{"name":"\\uDC00"}'; + expect(stripLoneSurrogateEscapes(json)).toBe('{"name":"\\uFFFD"}'); + }); + + test('leaves valid escape pair unchanged', () => { + // 😀 = 😀 — must NOT be touched + const json = '{"emoji":"\\uD83D\\uDE00"}'; + expect(stripLoneSurrogateEscapes(json)).toBe(json); + }); + + test('mixed escape pairs and lone escapes', () => { + const json = '{"a":"\\uD800","b":"\\uD83D\\uDE00","c":"\\uDC00"}'; + expect(stripLoneSurrogateEscapes(json)).toBe('{"a":"\\uFFFD","b":"\\uD83D\\uDE00","c":"\\uFFFD"}'); + }); + + test('clean JSON passes through unchanged', () => { + const json = '{"results":[{"status":200,"command":"text"}]}'; + expect(stripLoneSurrogateEscapes(json)).toBe(json); + }); + + test('case-insensitive matching: \\uD8aa works like \\uD8AA', () => { + expect(stripLoneSurrogateEscapes('\\uD8aa')).toBe('\\uFFFD'); + }); +}); + +describe('sanitizeBody', () => { + test('text/plain body: applies raw-surrogate strip only', () => { + const input = `pre\uD800post`; + expect(sanitizeBody(input, false)).toBe(`pre�post`); + }); + + test('JSON body: applies both raw and escape passes', () => { + // Both raw and escape variants in the same body + const input = `{"raw":"\uD800","esc":"\\uD800"}`; + const out = sanitizeBody(input, true); + expect(out).toBe(`{"raw":"�","esc":"\\uFFFD"}`); + }); + + test('clean text/plain body unchanged', () => { + const text = 'Hello world\nLine 2'; + expect(sanitizeBody(text, false)).toBe(text); + }); + + test('clean JSON body unchanged', () => { + const json = '{"ok":true}'; + expect(sanitizeBody(json, true)).toBe(json); + }); +}); + +describe('perf smoke', () => { + test('1MB of clean text sanitizes in <500ms', () => { + const big = 'A'.repeat(1024 * 1024); + const start = performance.now(); + const out = stripLoneSurrogates(big); + const elapsed = performance.now() - start; + expect(out.length).toBe(big.length); + expect(elapsed).toBeLessThan(500); + }); +}); diff --git a/gstack-upgrade/migrations/v1.38.1.0.sh b/gstack-upgrade/migrations/v1.38.1.0.sh new file mode 100755 index 000000000..2a56634d7 --- /dev/null +++ b/gstack-upgrade/migrations/v1.38.1.0.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash +# Migration: v1.38.1.0 — add root-level design + test-plan patterns to +# .brain-allowlist, .brain-privacy-map.json, and .gitattributes (#1452). +# +# Why a migration: gstack-artifacts-init regenerates these files but also +# does `git commit + push` on ~/.gstack/, which would clobber user state on +# upgrade. Instead, we do targeted per-file in-place repairs. +# +# Per-file independent — if one file is missing we still repair the others. +# +# Idempotent: each insertion is gated on `not already present` so re-running +# the migration is a no-op. + +# No `set -e` — we intentionally tolerate per-file failures so other repairs +# still run. `set -u` is fine. +set -u + +GSTACK_HOME="${HOME}/.gstack" +ALLOWLIST="${GSTACK_HOME}/.brain-allowlist" +PRIVACY="${GSTACK_HOME}/.brain-privacy-map.json" +GITATTRS="${GSTACK_HOME}/.gitattributes" + +MIGRATION_DIR="${GSTACK_HOME}/.migrations" +DONE="${MIGRATION_DIR}/v1.38.1.0.done" + +mkdir -p "${MIGRATION_DIR}" 2>/dev/null || true +if [ -f "${DONE}" ]; then + exit 0 +fi + +NEW_PATTERNS=( + 'projects/*/*-design-*.md' + 'projects/*/*-test-plan-*.md' +) + +added_any=0 + +# ----- .brain-allowlist --------------------------------------------------- +if [ -f "${ALLOWLIST}" ]; then + for PATTERN in "${NEW_PATTERNS[@]}"; do + if ! grep -Fq -- "${PATTERN}" "${ALLOWLIST}" 2>/dev/null; then + # Insert before USER ADDITIONS marker. BSD sed (-i.bak) compat for macOS; + # the backup file is removed afterward. + if grep -q '^# ---- USER ADDITIONS BELOW' "${ALLOWLIST}" 2>/dev/null; then + sed -i.bak "/^# ---- USER ADDITIONS BELOW/i\\ +${PATTERN} +" "${ALLOWLIST}" && rm -f "${ALLOWLIST}.bak" + added_any=1 + else + # Marker missing — append at end of file as a fallback. User may have + # custom-edited the file; better to add than skip silently. + printf '%s\n' "${PATTERN}" >> "${ALLOWLIST}" + added_any=1 + fi + fi + done +fi + +# ----- .brain-privacy-map.json ------------------------------------------- +# Uses jq to preserve JSON validity. Skips with a warning if jq is missing. +if [ -f "${PRIVACY}" ]; then + if command -v jq >/dev/null 2>&1; then + for PATTERN in "${NEW_PATTERNS[@]}"; do + if ! jq -e --arg p "${PATTERN}" 'map(select(.pattern == $p)) | length > 0' "${PRIVACY}" >/dev/null 2>&1; then + if jq --arg p "${PATTERN}" '. += [{"pattern": $p, "class": "artifact"}]' "${PRIVACY}" > "${PRIVACY}.tmp" 2>/dev/null; then + mv "${PRIVACY}.tmp" "${PRIVACY}" + added_any=1 + else + rm -f "${PRIVACY}.tmp" + echo " [v1.38.1.0] WARN: jq failed to patch ${PRIVACY}; skipping pattern ${PATTERN}." >&2 + fi + fi + done + else + echo " [v1.38.1.0] WARN: jq not found; skipping privacy-map repair. Install jq and re-run gstack-upgrade, or run gstack-artifacts-init manually." >&2 + fi +fi + +# ----- .gitattributes ----------------------------------------------------- +if [ -f "${GITATTRS}" ]; then + for PATTERN in "${NEW_PATTERNS[@]}"; do + RULE="${PATTERN} merge=union" + if ! grep -Fq -- "${RULE}" "${GITATTRS}" 2>/dev/null; then + printf '%s\n' "${RULE}" >> "${GITATTRS}" + added_any=1 + fi + done +fi + +# Mark done. Even if no patches were applied (already-current install), we +# write the touchfile so the migration runs once. +touch "${DONE}" + +if [ "${added_any}" = "1" ]; then + echo " [v1.38.1.0] allowlist/privacy-map/gitattributes patched for root-level design + test-plan artifacts (idempotent)" >&2 +fi + +# NEVER `git commit + push` from this migration. The user controls when the +# patches ship into their federated artifacts repo (next gstack-brain-sync +# --once or a manual commit). + +exit 0 diff --git a/package.json b/package.json index 6cfa824f1..7429a0ffa 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gstack", - "version": "1.38.0.0", + "version": "1.38.1.0", "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.", "license": "MIT", "type": "module", diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index 0f1738aec..227db949b 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -1816,6 +1816,78 @@ For EXPANSION and SELECTIVE EXPANSION modes: expansion opportunities and delight ### Stale Diagram Audit List every ASCII diagram in files this plan touches. Still accurate? +## Implementation Tasks + +Before closing this review, synthesize the findings above into a flat list of +build-actionable tasks. Each task derives from a specific finding — no padding. +Emit the markdown section AND write a JSONL artifact that `/autoplan` can +aggregate across phases. + +### Markdown section (always emit) + +```markdown +## Implementation Tasks +Synthesized from this review's findings. Each task derives from a specific +finding above. Run with Claude Code or Codex; checkbox as you ship. + +- [ ] **T1 (P1, human: ~2h / CC: ~15min)** — + - Surfaced by:
+ - Files: + - Verify: +- [ ] **T2 (P2, human: ~30min / CC: ~5min)** — ... +``` + +Rules: +- P1 blocks ship; P2 should land same branch; P3 is a follow-up TODO. +- If a finding produced no actionable task, do not invent one. +- If a section had zero findings, emit `_No new tasks from
._` +- Effort uses the AI-compression table from CLAUDE.md. + +### JSONL artifact (always write, even if zero tasks) + +`/autoplan` reads this file to aggregate across phases. Build each line with +`jq -nc` so titles and source findings containing quotes, newlines, or +backslashes serialize cleanly — never use hand-rolled `echo` / `printf`. + +```bash +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" +TASKS_DIR="${HOME}/.gstack/projects/${SLUG:-unknown}" +mkdir -p "$TASKS_DIR" +TASKS_FILE="$TASKS_DIR/tasks-ceo-review-$(date +%Y%m%d-%H%M%S).jsonl" +COMMIT=$(git rev-parse HEAD 2>/dev/null || echo unknown) +BRANCH=$(git branch --show-current 2>/dev/null || echo unknown) +RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)-$$" + +# Repeat ONE jq invocation per task identified during this review. +# Substitute the placeholders inline with shell variables you set per task: +# TASK_ID (T1, T2, ...), PRIORITY (P1/P2/P3), COMPONENT, TITLE, +# SOURCE_FINDING, EFFORT_HUMAN, EFFORT_CC, FILES_JSON (a JSON array literal +# like '["browse/src/sanitize.ts","browse/src/server.ts"]'). +jq -nc \ + --arg phase 'ceo-review' \ + --arg run_id "$RUN_ID" \ + --arg branch "$BRANCH" \ + --arg commit "$COMMIT" \ + --arg id "$TASK_ID" \ + --arg priority "$PRIORITY" \ + --arg component "$COMPONENT" \ + --arg effort_human "$EFFORT_HUMAN" \ + --arg effort_cc "$EFFORT_CC" \ + --arg title "$TITLE" \ + --arg source_finding "$SOURCE_FINDING" \ + --argjson files "$FILES_JSON" \ + '{phase:$phase, run_id:$run_id, branch:$branch, commit:$commit, id:$id, priority:$priority, component:$component, files:$files, effort_human:$effort_human, effort_cc:$effort_cc, title:$title, source_finding:$source_finding}' \ + >> "$TASKS_FILE" +``` + +If `jq` is not installed, fall back to skipping the JSONL write and warn +the user to install jq for autoplan aggregation. Never hand-roll JSONL. + +If zero tasks were identified in this review, still touch the JSONL file +(`: > "$TASKS_FILE"`) so the aggregator sees that the phase produced output +this run (an empty file means "ran, no findings" — distinct from "didn't run"). + + ### Completion Summary ``` +====================================================================+ diff --git a/plan-ceo-review/SKILL.md.tmpl b/plan-ceo-review/SKILL.md.tmpl index 7da8f5761..6069ff2b1 100644 --- a/plan-ceo-review/SKILL.md.tmpl +++ b/plan-ceo-review/SKILL.md.tmpl @@ -736,6 +736,8 @@ For EXPANSION and SELECTIVE EXPANSION modes: expansion opportunities and delight ### Stale Diagram Audit List every ASCII diagram in files this plan touches. Still accurate? +{{TASKS_SECTION_EMIT:ceo-review}} + ### Completion Summary ``` +====================================================================+ diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md index 699bacf69..dc2f2136a 100644 --- a/plan-design-review/SKILL.md +++ b/plan-design-review/SKILL.md @@ -1587,6 +1587,78 @@ For design debt: missing a11y, unresolved responsive behavior, deferred empty st Then present options: **A)** Add to TODOS.md **B)** Skip — not valuable enough **C)** Build it now in this PR instead of deferring. +## Implementation Tasks + +Before closing this review, synthesize the findings above into a flat list of +build-actionable tasks. Each task derives from a specific finding — no padding. +Emit the markdown section AND write a JSONL artifact that `/autoplan` can +aggregate across phases. + +### Markdown section (always emit) + +```markdown +## Implementation Tasks +Synthesized from this review's findings. Each task derives from a specific +finding above. Run with Claude Code or Codex; checkbox as you ship. + +- [ ] **T1 (P1, human: ~2h / CC: ~15min)** — + - Surfaced by:
+ - Files: + - Verify: +- [ ] **T2 (P2, human: ~30min / CC: ~5min)** — ... +``` + +Rules: +- P1 blocks ship; P2 should land same branch; P3 is a follow-up TODO. +- If a finding produced no actionable task, do not invent one. +- If a section had zero findings, emit `_No new tasks from
._` +- Effort uses the AI-compression table from CLAUDE.md. + +### JSONL artifact (always write, even if zero tasks) + +`/autoplan` reads this file to aggregate across phases. Build each line with +`jq -nc` so titles and source findings containing quotes, newlines, or +backslashes serialize cleanly — never use hand-rolled `echo` / `printf`. + +```bash +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" +TASKS_DIR="${HOME}/.gstack/projects/${SLUG:-unknown}" +mkdir -p "$TASKS_DIR" +TASKS_FILE="$TASKS_DIR/tasks-design-review-$(date +%Y%m%d-%H%M%S).jsonl" +COMMIT=$(git rev-parse HEAD 2>/dev/null || echo unknown) +BRANCH=$(git branch --show-current 2>/dev/null || echo unknown) +RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)-$$" + +# Repeat ONE jq invocation per task identified during this review. +# Substitute the placeholders inline with shell variables you set per task: +# TASK_ID (T1, T2, ...), PRIORITY (P1/P2/P3), COMPONENT, TITLE, +# SOURCE_FINDING, EFFORT_HUMAN, EFFORT_CC, FILES_JSON (a JSON array literal +# like '["browse/src/sanitize.ts","browse/src/server.ts"]'). +jq -nc \ + --arg phase 'design-review' \ + --arg run_id "$RUN_ID" \ + --arg branch "$BRANCH" \ + --arg commit "$COMMIT" \ + --arg id "$TASK_ID" \ + --arg priority "$PRIORITY" \ + --arg component "$COMPONENT" \ + --arg effort_human "$EFFORT_HUMAN" \ + --arg effort_cc "$EFFORT_CC" \ + --arg title "$TITLE" \ + --arg source_finding "$SOURCE_FINDING" \ + --argjson files "$FILES_JSON" \ + '{phase:$phase, run_id:$run_id, branch:$branch, commit:$commit, id:$id, priority:$priority, component:$component, files:$files, effort_human:$effort_human, effort_cc:$effort_cc, title:$title, source_finding:$source_finding}' \ + >> "$TASKS_FILE" +``` + +If `jq` is not installed, fall back to skipping the JSONL write and warn +the user to install jq for autoplan aggregation. Never hand-roll JSONL. + +If zero tasks were identified in this review, still touch the JSONL file +(`: > "$TASKS_FILE"`) so the aggregator sees that the phase produced output +this run (an empty file means "ran, no findings" — distinct from "didn't run"). + + ### Completion Summary ``` +====================================================================+ diff --git a/plan-design-review/SKILL.md.tmpl b/plan-design-review/SKILL.md.tmpl index 24e89a01e..3c05c76a9 100644 --- a/plan-design-review/SKILL.md.tmpl +++ b/plan-design-review/SKILL.md.tmpl @@ -372,6 +372,8 @@ For design debt: missing a11y, unresolved responsive behavior, deferred empty st Then present options: **A)** Add to TODOS.md **B)** Skip — not valuable enough **C)** Build it now in this PR instead of deferring. +{{TASKS_SECTION_EMIT:design-review}} + ### Completion Summary ``` +====================================================================+ diff --git a/plan-devex-review/SKILL.md b/plan-devex-review/SKILL.md index 886964a58..d1421a59a 100644 --- a/plan-devex-review/SKILL.md +++ b/plan-devex-review/SKILL.md @@ -1840,6 +1840,78 @@ DX IMPLEMENTATION CHECKLIST [ ] Community channel exists and is monitored ``` +## Implementation Tasks + +Before closing this review, synthesize the findings above into a flat list of +build-actionable tasks. Each task derives from a specific finding — no padding. +Emit the markdown section AND write a JSONL artifact that `/autoplan` can +aggregate across phases. + +### Markdown section (always emit) + +```markdown +## Implementation Tasks +Synthesized from this review's findings. Each task derives from a specific +finding above. Run with Claude Code or Codex; checkbox as you ship. + +- [ ] **T1 (P1, human: ~2h / CC: ~15min)** — + - Surfaced by:
+ - Files: + - Verify: +- [ ] **T2 (P2, human: ~30min / CC: ~5min)** — ... +``` + +Rules: +- P1 blocks ship; P2 should land same branch; P3 is a follow-up TODO. +- If a finding produced no actionable task, do not invent one. +- If a section had zero findings, emit `_No new tasks from
._` +- Effort uses the AI-compression table from CLAUDE.md. + +### JSONL artifact (always write, even if zero tasks) + +`/autoplan` reads this file to aggregate across phases. Build each line with +`jq -nc` so titles and source findings containing quotes, newlines, or +backslashes serialize cleanly — never use hand-rolled `echo` / `printf`. + +```bash +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" +TASKS_DIR="${HOME}/.gstack/projects/${SLUG:-unknown}" +mkdir -p "$TASKS_DIR" +TASKS_FILE="$TASKS_DIR/tasks-devex-review-$(date +%Y%m%d-%H%M%S).jsonl" +COMMIT=$(git rev-parse HEAD 2>/dev/null || echo unknown) +BRANCH=$(git branch --show-current 2>/dev/null || echo unknown) +RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)-$$" + +# Repeat ONE jq invocation per task identified during this review. +# Substitute the placeholders inline with shell variables you set per task: +# TASK_ID (T1, T2, ...), PRIORITY (P1/P2/P3), COMPONENT, TITLE, +# SOURCE_FINDING, EFFORT_HUMAN, EFFORT_CC, FILES_JSON (a JSON array literal +# like '["browse/src/sanitize.ts","browse/src/server.ts"]'). +jq -nc \ + --arg phase 'devex-review' \ + --arg run_id "$RUN_ID" \ + --arg branch "$BRANCH" \ + --arg commit "$COMMIT" \ + --arg id "$TASK_ID" \ + --arg priority "$PRIORITY" \ + --arg component "$COMPONENT" \ + --arg effort_human "$EFFORT_HUMAN" \ + --arg effort_cc "$EFFORT_CC" \ + --arg title "$TITLE" \ + --arg source_finding "$SOURCE_FINDING" \ + --argjson files "$FILES_JSON" \ + '{phase:$phase, run_id:$run_id, branch:$branch, commit:$commit, id:$id, priority:$priority, component:$component, files:$files, effort_human:$effort_human, effort_cc:$effort_cc, title:$title, source_finding:$source_finding}' \ + >> "$TASKS_FILE" +``` + +If `jq` is not installed, fall back to skipping the JSONL write and warn +the user to install jq for autoplan aggregation. Never hand-roll JSONL. + +If zero tasks were identified in this review, still touch the JSONL file +(`: > "$TASKS_FILE"`) so the aggregator sees that the phase produced output +this run (an empty file means "ran, no findings" — distinct from "didn't run"). + + ### Unresolved Decisions If any AskUserQuestion goes unanswered, note here. Never silently default. diff --git a/plan-devex-review/SKILL.md.tmpl b/plan-devex-review/SKILL.md.tmpl index 5b68c2ea5..f95b8835f 100644 --- a/plan-devex-review/SKILL.md.tmpl +++ b/plan-devex-review/SKILL.md.tmpl @@ -776,6 +776,8 @@ DX IMPLEMENTATION CHECKLIST [ ] Community channel exists and is monitored ``` +{{TASKS_SECTION_EMIT:devex-review}} + ### Unresolved Decisions If any AskUserQuestion goes unanswered, note here. Never silently default. diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index 881455550..9ba4def39 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -1423,6 +1423,78 @@ Format: `Lane A: step1 → step2 (sequential, shared models/)` / `Lane B: step3 4. **Conflict flags** — if two parallel lanes touch the same module directory, flag it: "Lanes X and Y both touch module/ — potential merge conflict. Consider sequential execution or careful coordination." +## Implementation Tasks + +Before closing this review, synthesize the findings above into a flat list of +build-actionable tasks. Each task derives from a specific finding — no padding. +Emit the markdown section AND write a JSONL artifact that `/autoplan` can +aggregate across phases. + +### Markdown section (always emit) + +```markdown +## Implementation Tasks +Synthesized from this review's findings. Each task derives from a specific +finding above. Run with Claude Code or Codex; checkbox as you ship. + +- [ ] **T1 (P1, human: ~2h / CC: ~15min)** — + - Surfaced by:
+ - Files: + - Verify: +- [ ] **T2 (P2, human: ~30min / CC: ~5min)** — ... +``` + +Rules: +- P1 blocks ship; P2 should land same branch; P3 is a follow-up TODO. +- If a finding produced no actionable task, do not invent one. +- If a section had zero findings, emit `_No new tasks from
._` +- Effort uses the AI-compression table from CLAUDE.md. + +### JSONL artifact (always write, even if zero tasks) + +`/autoplan` reads this file to aggregate across phases. Build each line with +`jq -nc` so titles and source findings containing quotes, newlines, or +backslashes serialize cleanly — never use hand-rolled `echo` / `printf`. + +```bash +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" +TASKS_DIR="${HOME}/.gstack/projects/${SLUG:-unknown}" +mkdir -p "$TASKS_DIR" +TASKS_FILE="$TASKS_DIR/tasks-eng-review-$(date +%Y%m%d-%H%M%S).jsonl" +COMMIT=$(git rev-parse HEAD 2>/dev/null || echo unknown) +BRANCH=$(git branch --show-current 2>/dev/null || echo unknown) +RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)-$$" + +# Repeat ONE jq invocation per task identified during this review. +# Substitute the placeholders inline with shell variables you set per task: +# TASK_ID (T1, T2, ...), PRIORITY (P1/P2/P3), COMPONENT, TITLE, +# SOURCE_FINDING, EFFORT_HUMAN, EFFORT_CC, FILES_JSON (a JSON array literal +# like '["browse/src/sanitize.ts","browse/src/server.ts"]'). +jq -nc \ + --arg phase 'eng-review' \ + --arg run_id "$RUN_ID" \ + --arg branch "$BRANCH" \ + --arg commit "$COMMIT" \ + --arg id "$TASK_ID" \ + --arg priority "$PRIORITY" \ + --arg component "$COMPONENT" \ + --arg effort_human "$EFFORT_HUMAN" \ + --arg effort_cc "$EFFORT_CC" \ + --arg title "$TITLE" \ + --arg source_finding "$SOURCE_FINDING" \ + --argjson files "$FILES_JSON" \ + '{phase:$phase, run_id:$run_id, branch:$branch, commit:$commit, id:$id, priority:$priority, component:$component, files:$files, effort_human:$effort_human, effort_cc:$effort_cc, title:$title, source_finding:$source_finding}' \ + >> "$TASKS_FILE" +``` + +If `jq` is not installed, fall back to skipping the JSONL write and warn +the user to install jq for autoplan aggregation. Never hand-roll JSONL. + +If zero tasks were identified in this review, still touch the JSONL file +(`: > "$TASKS_FILE"`) so the aggregator sees that the phase produced output +this run (an empty file means "ran, no findings" — distinct from "didn't run"). + + ### Completion summary At the end of the review, fill in and display this summary so the user can see all findings at a glance: - Step 0: Scope Challenge — ___ (scope accepted as-is / scope reduced per recommendation) diff --git a/plan-eng-review/SKILL.md.tmpl b/plan-eng-review/SKILL.md.tmpl index a950c060d..fea0ea328 100644 --- a/plan-eng-review/SKILL.md.tmpl +++ b/plan-eng-review/SKILL.md.tmpl @@ -264,6 +264,8 @@ Format: `Lane A: step1 → step2 (sequential, shared models/)` / `Lane B: step3 4. **Conflict flags** — if two parallel lanes touch the same module directory, flag it: "Lanes X and Y both touch module/ — potential merge conflict. Consider sequential execution or careful coordination." +{{TASKS_SECTION_EMIT:eng-review}} + ### Completion summary At the end of the review, fill in and display this summary so the user can see all findings at a glance: - Step 0: Scope Challenge — ___ (scope accepted as-is / scope reduced per recommendation) diff --git a/scripts/resolvers/index.ts b/scripts/resolvers/index.ts index d96b729dc..78d592b0e 100644 --- a/scripts/resolvers/index.ts +++ b/scripts/resolvers/index.ts @@ -22,6 +22,7 @@ import { generateModelOverlay } from './model-overlay'; import { generateGBrainContextLoad, generateGBrainSaveResults } from './gbrain'; import { generateQuestionPreferenceCheck, generateQuestionLog, generateInlineTuneFeedback } from './question-tuning'; import { generateMakePdfSetup } from './make-pdf'; +import { generateTasksSectionEmit, generateTasksSectionAggregate } from './tasks-section'; export const RESOLVERS: Record = { SLUG_EVAL: generateSlugEval, @@ -77,4 +78,6 @@ export const RESOLVERS: Record = { QUESTION_LOG: generateQuestionLog, INLINE_TUNE_FEEDBACK: generateInlineTuneFeedback, MAKE_PDF_SETUP: generateMakePdfSetup, + TASKS_SECTION_EMIT: generateTasksSectionEmit, + TASKS_SECTION_AGGREGATE: generateTasksSectionAggregate, }; diff --git a/scripts/resolvers/tasks-section.ts b/scripts/resolvers/tasks-section.ts new file mode 100644 index 000000000..2f86f588b --- /dev/null +++ b/scripts/resolvers/tasks-section.ts @@ -0,0 +1,168 @@ +/** + * Resolvers for the Implementation Tasks emission (#1454). + * + * {{TASKS_SECTION_EMIT:}} — per-skill task emission + JSONL write + * {{TASKS_SECTION_AGGREGATE}} — autoplan aggregation across all phases + * + * Schema for the JSONL artifact lives in scripts/task-emission-schema.ts. + */ + +import type { TemplateContext, ResolverFn } from './types'; + +const VALID_PHASES = new Set(['ceo-review', 'design-review', 'eng-review', 'devex-review']); + +export const generateTasksSectionEmit: ResolverFn = (_ctx: TemplateContext, args?: string[]) => { + const phase = args?.[0]; + if (!phase || !VALID_PHASES.has(phase)) { + throw new Error(`TASKS_SECTION_EMIT requires one of ${[...VALID_PHASES].join(', ')} — got ${phase}`); + } + + return `## Implementation Tasks + +Before closing this review, synthesize the findings above into a flat list of +build-actionable tasks. Each task derives from a specific finding — no padding. +Emit the markdown section AND write a JSONL artifact that \`/autoplan\` can +aggregate across phases. + +### Markdown section (always emit) + +\`\`\`markdown +## Implementation Tasks +Synthesized from this review's findings. Each task derives from a specific +finding above. Run with Claude Code or Codex; checkbox as you ship. + +- [ ] **T1 (P1, human: ~2h / CC: ~15min)** — + - Surfaced by:
+ - Files: + - Verify: +- [ ] **T2 (P2, human: ~30min / CC: ~5min)** — ... +\`\`\` + +Rules: +- P1 blocks ship; P2 should land same branch; P3 is a follow-up TODO. +- If a finding produced no actionable task, do not invent one. +- If a section had zero findings, emit \`_No new tasks from
._\` +- Effort uses the AI-compression table from CLAUDE.md. + +### JSONL artifact (always write, even if zero tasks) + +\`/autoplan\` reads this file to aggregate across phases. Build each line with +\`jq -nc\` so titles and source findings containing quotes, newlines, or +backslashes serialize cleanly — never use hand-rolled \`echo\` / \`printf\`. + +\`\`\`bash +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" +TASKS_DIR="\${HOME}/.gstack/projects/\${SLUG:-unknown}" +mkdir -p "$TASKS_DIR" +TASKS_FILE="$TASKS_DIR/tasks-${phase}-$(date +%Y%m%d-%H%M%S).jsonl" +COMMIT=$(git rev-parse HEAD 2>/dev/null || echo unknown) +BRANCH=$(git branch --show-current 2>/dev/null || echo unknown) +RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)-$$" + +# Repeat ONE jq invocation per task identified during this review. +# Substitute the placeholders inline with shell variables you set per task: +# TASK_ID (T1, T2, ...), PRIORITY (P1/P2/P3), COMPONENT, TITLE, +# SOURCE_FINDING, EFFORT_HUMAN, EFFORT_CC, FILES_JSON (a JSON array literal +# like '["browse/src/sanitize.ts","browse/src/server.ts"]'). +jq -nc \\ + --arg phase '${phase}' \\ + --arg run_id "$RUN_ID" \\ + --arg branch "$BRANCH" \\ + --arg commit "$COMMIT" \\ + --arg id "$TASK_ID" \\ + --arg priority "$PRIORITY" \\ + --arg component "$COMPONENT" \\ + --arg effort_human "$EFFORT_HUMAN" \\ + --arg effort_cc "$EFFORT_CC" \\ + --arg title "$TITLE" \\ + --arg source_finding "$SOURCE_FINDING" \\ + --argjson files "$FILES_JSON" \\ + '{phase:$phase, run_id:$run_id, branch:$branch, commit:$commit, id:$id, priority:$priority, component:$component, files:$files, effort_human:$effort_human, effort_cc:$effort_cc, title:$title, source_finding:$source_finding}' \\ + >> "$TASKS_FILE" +\`\`\` + +If \`jq\` is not installed, fall back to skipping the JSONL write and warn +the user to install jq for autoplan aggregation. Never hand-roll JSONL. + +If zero tasks were identified in this review, still touch the JSONL file +(\`: > "$TASKS_FILE"\`) so the aggregator sees that the phase produced output +this run (an empty file means "ran, no findings" — distinct from "didn't run"). +`; +}; + +export const generateTasksSectionAggregate: ResolverFn = (_ctx: TemplateContext) => { + return `## Implementation Tasks aggregator + +Before rendering the Final Approval Gate output block below, aggregate the +per-phase task lists each review skill wrote. + +\`\`\`bash +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" +TASKS_DIR="\${HOME}/.gstack/projects/\${SLUG:-unknown}" +BRANCH=$(git branch --show-current 2>/dev/null || echo unknown) +# Commit window: last 5 commits on this branch. Drops stale standalone reviews. +COMMITS_RECENT=$(git log --format=%H -n 5 2>/dev/null | tr '\\n' '|' | sed 's/|$//') + +AGGREGATED_TASKS="" +if command -v jq >/dev/null 2>&1; then + # Collect entries from all 4 phases, scoped to current branch + commit window. + # For each phase, keep only the latest run_id. Within the surviving set, + # dedupe by (component, sorted(files), title) — exact match only. + # Sort by priority (P1 > P2 > P3) then by phase order. + ALL_JSONL=$(mktemp -t autoplan-tasks.XXXXXXXX) + for phase in ceo-review design-review eng-review devex-review; do + # Use find instead of glob expansion — zsh nomatch errors otherwise when + # a phase produced no JSONL files. Sorting by name keeps the order stable. + while IFS= read -r f; do + [ -f "$f" ] || continue + # Filter to current branch + recent commits, then keep records for the + # latest run_id only. (Single phase may have multiple files if the user + # re-ran the review; aggregator takes the newest.) + jq -c --arg branch "$BRANCH" --arg commits "$COMMITS_RECENT" \\ + 'select(.branch == $branch and ($commits | split("|") | index(.commit) != null))' \\ + "$f" 2>/dev/null >> "$ALL_JSONL" || true + done < <(find "$TASKS_DIR" -maxdepth 1 -name "tasks-$phase-*.jsonl" 2>/dev/null | sort) + # Reduce to latest run_id per phase + if [ -s "$ALL_JSONL" ]; then + jq -sc --arg phase "$phase" \\ + '[.[] | select(.phase == $phase)] | (max_by(.run_id) // null) as $latest_run | if $latest_run then map(select(.run_id == $latest_run.run_id)) else [] end | .[]' \\ + "$ALL_JSONL" > "$ALL_JSONL.phase" 2>/dev/null || true + # Replace with reduced version for this phase, accumulating others + jq -c --arg phase "$phase" 'select(.phase != $phase)' "$ALL_JSONL" > "$ALL_JSONL.other" 2>/dev/null || true + cat "$ALL_JSONL.other" "$ALL_JSONL.phase" > "$ALL_JSONL" + rm -f "$ALL_JSONL.phase" "$ALL_JSONL.other" + fi + done + + # Exact-match dedup by (component, sorted(files), title). Non-matches kept + # separately with a possible-duplicate marker injected by the renderer. + AGGREGATED_TASKS=$(jq -s \\ + 'group_by([.component, (.files | sort), .title]) + | map( + # Take the highest-priority entry per group; tie-break by phase order + sort_by({P1:0,P2:1,P3:2}[.priority] // 99, {"ceo-review":0,"design-review":1,"eng-review":2,"devex-review":3}[.phase] // 99) | .[0] + ) + | sort_by({P1:0,P2:1,P3:2}[.priority] // 99, {"ceo-review":0,"design-review":1,"eng-review":2,"devex-review":3}[.phase] // 99) + | if length == 0 then "_No actionable tasks emitted from any phase._" else + map("- [ ] **\\(.id) (\\(.priority), human: \\(.effort_human) / CC: \\(.effort_cc)) — \\(.component)** — \\(.title)\\n - Surfaced by: \\(.phase) — \\(.source_finding)\\n - Files: \\(.files | join(", "))") | join("\\n") + end' "$ALL_JSONL" 2>/dev/null | sed 's/^"//;s/"$//;s/\\\\n/\\n/g') + rm -f "$ALL_JSONL" +else + AGGREGATED_TASKS="_jq not installed — install jq to aggregate per-phase task lists. Skipping._" +fi +\`\`\` + +Inside the Final Approval Gate output template below, render the aggregated +markdown in the \`### Implementation Tasks (aggregated across phases)\` section. +Substitute the contents of \`$AGGREGATED_TASKS\` (the bash variable set above) +before printing the message to the user. This is NOT a template placeholder +— the agent does the substitution at runtime, not gen-skill-docs at build time. + +If \`$AGGREGATED_TASKS\` is empty (no JSONL files found — none of the review +skills ran in this session), render: + +\`_No per-phase task lists found in $TASKS_DIR for branch $BRANCH. Each review +skill writes its own; if you ran one of them but no list appears here, check +that jq is installed and the tasks--*.jsonl files exist._\` +`; +}; diff --git a/scripts/task-emission-schema.ts b/scripts/task-emission-schema.ts new file mode 100644 index 000000000..131b39044 --- /dev/null +++ b/scripts/task-emission-schema.ts @@ -0,0 +1,61 @@ +/** + * Schema reference for the per-skill Implementation Tasks JSONL artifact (#1454). + * + * Each review skill (plan-ceo-review, plan-design-review, plan-eng-review, + * plan-devex-review) writes one JSONL line per task during its synthesis step + * to `~/.gstack/projects/$SLUG/tasks-{phase}-{datetime}.jsonl`. + * + * `/autoplan`'s Phase 4 aggregator reads ALL phase JSONL files, scopes them + * by branch + commit window, dedupes by exact (component, sorted(files), title), + * and renders an `## Implementation Tasks (aggregated across phases)` section + * inside the Final Approval Gate output. + * + * Wire format: one JSON object per line. Build via `jq -nc` from bash — never + * by hand-rolled echo/printf, because task titles and source findings may + * contain quotes, newlines, and backslashes. + */ + +export type TaskPhase = 'ceo-review' | 'design-review' | 'eng-review' | 'devex-review'; +export type TaskPriority = 'P1' | 'P2' | 'P3'; + +/** + * One row in tasks-{phase}-{datetime}.jsonl. All fields required unless noted. + */ +export interface ImplementationTask { + /** Which review phase produced this task. */ + phase: TaskPhase; + /** Unique run identifier for this phase invocation (timestamp + pid suffix). */ + run_id: string; + /** Branch the review ran on. Aggregator filters by this. */ + branch: string; + /** HEAD commit at review time. Aggregator filters by commit-window proximity. */ + commit: string; + /** Short task id, unique within a single run_id (T1, T2, ...). */ + id: string; + priority: TaskPriority; + /** Coarse component label (e.g., `browse/sanitizer`, `auth/login`). */ + component: string; + /** Files the task touches. Aggregator sorts this and uses it in the dedup key. */ + files: string[]; + /** Human-team effort estimate (e.g., "2h", "1 day"). */ + effort_human: string; + /** CC+gstack effort estimate (e.g., "15min"). */ + effort_cc: string; + /** Action-oriented title in imperative form ("Add commandResult-level sanitization"). */ + title: string; + /** Free-text reference to the finding that motivated this task. */ + source_finding: string; +} + +/** + * Dedup key for the aggregator. Two tasks collapse into one ONLY when this + * tuple is identical (per `D13 finding 9`). Near-duplicates surface as + * separate tasks with a `possible-duplicate-of: ` note. + */ +export function dedupKey(t: Pick): string { + return JSON.stringify({ + component: t.component, + files: [...t.files].sort(), + title: t.title, + }); +} diff --git a/test/artifacts-init-migration.test.ts b/test/artifacts-init-migration.test.ts new file mode 100644 index 000000000..e2f27f444 --- /dev/null +++ b/test/artifacts-init-migration.test.ts @@ -0,0 +1,203 @@ +// Unit tests for gstack-upgrade/migrations/v1.38.1.0.sh (#1452). +// Verifies idempotent in-place repair of .brain-allowlist, +// .brain-privacy-map.json, and .gitattributes. + +import { describe, expect, test, beforeEach } from 'bun:test'; +import { mkdtempSync, writeFileSync, readFileSync, existsSync, rmSync, mkdirSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; + +const REPO_ROOT = new URL('..', import.meta.url).pathname; +const MIGRATION = join(REPO_ROOT, 'gstack-upgrade', 'migrations', 'v1.38.1.0.sh'); + +function setupFakeHome(): string { + const dir = mkdtempSync(join(tmpdir(), 'mig-v1340-')); + mkdirSync(join(dir, '.gstack'), { recursive: true }); + return dir; +} + +function runMigration(fakeHome: string): { code: number; stdout: string; stderr: string } { + const proc = Bun.spawnSync({ + cmd: ['bash', MIGRATION], + env: { ...process.env, HOME: fakeHome }, + stdout: 'pipe', + stderr: 'pipe', + }); + return { + code: proc.exitCode ?? -1, + stdout: new TextDecoder().decode(proc.stdout), + stderr: new TextDecoder().decode(proc.stderr), + }; +} + +describe('v1.38.1.0 migration', () => { + test('adds patterns to allowlist before USER ADDITIONS marker', () => { + const home = setupFakeHome(); + try { + writeFileSync(join(home, '.gstack', '.brain-allowlist'), [ + 'projects/*/learnings.jsonl', + 'projects/*/designs/*.md', + '# ---- USER ADDITIONS BELOW ---- (survives re-init; above is managed)', + 'projects/*/my-custom.txt', + ].join('\n') + '\n'); + + const r = runMigration(home); + expect(r.code).toBe(0); + + const content = readFileSync(join(home, '.gstack', '.brain-allowlist'), 'utf-8'); + expect(content).toContain('projects/*/*-design-*.md'); + expect(content).toContain('projects/*/*-test-plan-*.md'); + // New patterns above the user marker + const designIdx = content.indexOf('projects/*/*-design-*.md'); + const markerIdx = content.indexOf('# ---- USER ADDITIONS BELOW'); + expect(designIdx).toBeLessThan(markerIdx); + // User customizations below the marker preserved + expect(content).toContain('projects/*/my-custom.txt'); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); + + test('adds entries to privacy-map.json via jq (preserves JSON validity)', () => { + const home = setupFakeHome(); + try { + writeFileSync(join(home, '.gstack', '.brain-privacy-map.json'), JSON.stringify([ + { pattern: 'projects/*/learnings.jsonl', class: 'artifact' }, + { pattern: 'projects/*/designs/*.md', class: 'artifact' }, + ], null, 2)); + + const r = runMigration(home); + expect(r.code).toBe(0); + + const raw = readFileSync(join(home, '.gstack', '.brain-privacy-map.json'), 'utf-8'); + // Valid JSON (would throw if jq emitted malformed output) + const parsed = JSON.parse(raw); + expect(Array.isArray(parsed)).toBe(true); + const patterns = parsed.map((e: any) => e.pattern); + expect(patterns).toContain('projects/*/*-design-*.md'); + expect(patterns).toContain('projects/*/*-test-plan-*.md'); + // Class preserved on new entries + expect(parsed.find((e: any) => e.pattern === 'projects/*/*-design-*.md').class).toBe('artifact'); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); + + test('adds union-merge rules to gitattributes', () => { + const home = setupFakeHome(); + try { + writeFileSync(join(home, '.gstack', '.gitattributes'), [ + '*.jsonl merge=jsonl-append', + 'projects/*/designs/**/*.md merge=union', + ].join('\n') + '\n'); + + const r = runMigration(home); + expect(r.code).toBe(0); + + const content = readFileSync(join(home, '.gstack', '.gitattributes'), 'utf-8'); + expect(content).toContain('projects/*/*-design-*.md merge=union'); + expect(content).toContain('projects/*/*-test-plan-*.md merge=union'); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); + + test('is idempotent: re-running on already-patched files is a no-op', () => { + const home = setupFakeHome(); + try { + writeFileSync(join(home, '.gstack', '.brain-allowlist'), [ + 'projects/*/learnings.jsonl', + '# ---- USER ADDITIONS BELOW', + ].join('\n') + '\n'); + writeFileSync(join(home, '.gstack', '.brain-privacy-map.json'), JSON.stringify([ + { pattern: 'projects/*/learnings.jsonl', class: 'artifact' }, + ])); + writeFileSync(join(home, '.gstack', '.gitattributes'), '*.jsonl merge=jsonl-append\n'); + + runMigration(home); + // Remove the done marker so re-run actually executes + rmSync(join(home, '.gstack', '.migrations'), { recursive: true, force: true }); + + const beforeAllowlist = readFileSync(join(home, '.gstack', '.brain-allowlist'), 'utf-8'); + const beforePrivacy = readFileSync(join(home, '.gstack', '.brain-privacy-map.json'), 'utf-8'); + const beforeAttrs = readFileSync(join(home, '.gstack', '.gitattributes'), 'utf-8'); + + runMigration(home); + + const afterAllowlist = readFileSync(join(home, '.gstack', '.brain-allowlist'), 'utf-8'); + const afterPrivacy = readFileSync(join(home, '.gstack', '.brain-privacy-map.json'), 'utf-8'); + const afterAttrs = readFileSync(join(home, '.gstack', '.gitattributes'), 'utf-8'); + + expect(afterAllowlist).toBe(beforeAllowlist); + // jq may re-emit JSON with different whitespace but the parsed content + // must be identical + expect(JSON.parse(afterPrivacy)).toEqual(JSON.parse(beforePrivacy)); + expect(afterAttrs).toBe(beforeAttrs); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); + + test('repairs privacy-map even when allowlist is missing (per-file independence)', () => { + const home = setupFakeHome(); + try { + // No .brain-allowlist; only privacy-map present + writeFileSync(join(home, '.gstack', '.brain-privacy-map.json'), JSON.stringify([ + { pattern: 'projects/*/learnings.jsonl', class: 'artifact' }, + ])); + + const r = runMigration(home); + expect(r.code).toBe(0); + + // Privacy-map still patched + const parsed = JSON.parse(readFileSync(join(home, '.gstack', '.brain-privacy-map.json'), 'utf-8')); + const patterns = parsed.map((e: any) => e.pattern); + expect(patterns).toContain('projects/*/*-design-*.md'); + // Allowlist remains absent (we don't create files that weren't there) + expect(existsSync(join(home, '.gstack', '.brain-allowlist'))).toBe(false); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); + + test('migration marker prevents re-running', () => { + const home = setupFakeHome(); + try { + writeFileSync(join(home, '.gstack', '.brain-allowlist'), '# ---- USER ADDITIONS BELOW\n'); + runMigration(home); + // Confirm marker file exists + expect(existsSync(join(home, '.gstack', '.migrations', 'v1.38.1.0.done'))).toBe(true); + + // Modify allowlist so we can detect if the migration would re-run + writeFileSync(join(home, '.gstack', '.brain-allowlist'), '# minimal\n'); + + runMigration(home); + + // With the marker present, the migration short-circuits, so the file + // we just wrote stays unmodified + expect(readFileSync(join(home, '.gstack', '.brain-allowlist'), 'utf-8')).toBe('# minimal\n'); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); + + test('handles allowlist without USER ADDITIONS marker (fallback to append)', () => { + const home = setupFakeHome(); + try { + writeFileSync(join(home, '.gstack', '.brain-allowlist'), [ + 'projects/*/learnings.jsonl', + 'projects/*/designs/*.md', + // no USER ADDITIONS marker + ].join('\n') + '\n'); + + const r = runMigration(home); + expect(r.code).toBe(0); + + const content = readFileSync(join(home, '.gstack', '.brain-allowlist'), 'utf-8'); + expect(content).toContain('projects/*/*-design-*.md'); + expect(content).toContain('projects/*/*-test-plan-*.md'); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); +});