diff --git a/CHANGELOG.md b/CHANGELOG.md index a91c9d0de..a8320798d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,79 @@ # Changelog +## [1.40.0.0] - 2026-05-16 + +## **gbrain sync stops biting users across the install path, slug algorithm, federation queue, and `.env.local` footgun.** +## **Eight community-filed bugs land as one consolidated wave with a centralized spawn surface and an upgrade migration that actually reaches existing installs.** + +The eight highest-volume gbrain-sync bugs in the backlog ship as one consolidated release. Conductor sibling worktrees stop stomping each other's per-worktree pin because `.gbrain-source` now lands in the consumer repo's `.gitignore` on every successful sync. Cross-machine federation stops colliding because the source-id hash folds hostname into its key — and existing users get a migration path that renames in place when gbrain supports it, falls back to register-new-then-remove-old when not. Slugs stop truncating mid-word (`skill` → `kill`). `DATABASE_URL` no longer leaks from a host project's `.env.local` into gbrain's auth, at both the parent `gstack-gbrain-sync` and the `gstack-memory-ingest` grandchild. The brain-allowlist finally picks up `/plan-eng-review` test plans alongside `/office-hours` design docs from v1.38.1.0 — with an idempotent migration that runs on top of v1.38.1.0's done-marker so existing users aren't orphaned. The gbrain probe stops shelling through a bash builtin. Windows MSYS/MINGW installs stop crashing on bun postinstall, with a post-install subcommand probe that flags missing native artifacts before they bite at sync time. + +### The numbers that matter + +Source: `bun test test/gstack-gbrain-sync.test.ts test/build-gbrain-env.test.ts test/gbrain-exec-invariant.test.ts test/gbrain-source-gitignore.test.ts test/artifacts-init-migration.test.ts test/gstack-memory-ingest.test.ts` — 100+ unit tests, all green. + +| Surface | Before | After | +|---|---|---| +| `/sync-gbrain` inside a Next.js / Prisma / Rails project with `DATABASE_URL` in `.env.local` | Code stage crashes with "source registration failed: gbrain not configured"; memory stage crashes with "password authentication failed for user 'postgres'"; only brain-sync git push survives | All three stages run. Parent process AND the bun grandchild that runs `gbrain import` both see DATABASE_URL seeded from gbrain's own config | +| Two machines with identical home-dir layouts (chezmoi, ansible) syncing a shared brain | Same source id collides; last-writer-wins on `local_path`; loser's queries return cryptic "Not a git repository" errors | Distinct source ids (`sha1("${hostname}::${path}")`). Existing users with the path-only-hash form get rename-in-place (preserves pages) when gbrain supports `sources rename`, or register-new-then-remove-old after sync verifies (no data-loss window) when it doesn't | +| Conductor sibling worktrees of the same repo | `.gbrain-source` gets committed in worktree A, clobbers worktree B's pin on next `git pull`, semantic search routes to the wrong source | `.gbrain-source` now lands in the consumer repo's `.gitignore` on every successful sync. Idempotent re-runs | +| `gstack-code-drummerms-av-sow-wiz-skill-270c0001` (long repo name forced truncation) | `gstack-code-kill-270c0001-c32152` (mid-word cut from `skill` → `kill`) | `gstack-code-270c0001-050d83` (whole-token cut on hyphen boundaries; `repo-only-hostpathhash` retry when org prefix forces overflow) | +| `https://github.com/foo/bar.git` HTTPS remote (#1357) | Slugs could carry through periods, failing gbrain's 1-32 alnum-hyphen validator | Period-free slugs guaranteed; explicit regression test pinned at `test/gstack-gbrain-sync.test.ts` | +| Federation sync allowlist (existing user upgrading from v1.38.1.0) | `projects/*/*-eng-review-test-plan-*.md` orphaned by v1.38.1.0's done-marker; `/plan-eng-review` test plans silently dropped | v1.40.0.0 migration idempotently patches `.brain-allowlist`, `.brain-privacy-map.json`, `.gitattributes` on top of v1.38.1.0 state | +| `bun install` for gbrain on Windows MSYS / MINGW / Git Bash | Postinstall script aborts with non-zero exit; `gstack-gbrain-install` fails the whole flow | `--ignore-scripts` on Windows shells; post-install probe of `gbrain sources --help` flags any missing native artifacts before they bite at sync time | +| Spawning `gbrain` from gstack | 17+ direct `spawnSync("gbrain"`/`spawn("gbrain"`/`execFileSync("gbrain"` sites across the codebase, each one a missed-env-threading risk | Two hot-path files (`bin/gstack-gbrain-sync.ts`, `bin/gstack-memory-ingest.ts`) route every gbrain spawn through `lib/gbrain-exec.ts`. Static-source invariant test fails the build on direct call sites | + +### What this means for builders + +If you `/sync-gbrain` inside a framework project (Next.js, Prisma, Rails, etc.), the code AND memory stages now work — no more sourcing `~/.zshrc` first or unsetting `DATABASE_URL`. If you sync across multiple machines (chezmoi-managed dotfiles, ansible-provisioned VMs), your source ids stay distinct and your upgrade either renames pages in place or re-indexes once and cleans up the orphan. If you run Conductor sibling worktrees, your `.gbrain-source` pin stops accidentally committing. If you ship long repo names, slugs read cleanly. Run `/gstack-upgrade` to pick up the brain-allowlist migration; everything else is automatic on next sync. + +### Itemized changes + +#### Added + +- `lib/gbrain-exec.ts` (new, ~175 lines) — single source of truth for gbrain CLI invocation. `buildGbrainEnv` seeds DATABASE_URL from `${GBRAIN_HOME:-$HOME/.gbrain}/config.json`, with `GSTACK_RESPECT_ENV_DATABASE_URL=1` opt-out for the rare case where the brain intentionally lives in the project's local DB. `spawnGbrain` / `execGbrainJson` / `execGbrainText` / `spawnGbrainAsync` wrappers always inject the seeded env. Returns a fresh env object every call (no mutable identity leak). +- `bin/gstack-gbrain-sync.ts`: `derivePathOnlyHashLegacyId`, `gbrainSupportsSourcesRename` (exact-command feature check), `sourceLocalPath`, `planHostnameFoldMigration`, `removeOrphanedSource`. Hostname-fold migration: detect old form → probe path-drift → rename in place (if supported) → fall back to register-new + sync-OK + remove-old. +- `gstack-upgrade/migrations/v1.40.0.0.sh` — idempotent jq-based migration for `.brain-allowlist`, `.brain-privacy-map.json`, `.gitattributes` to add `projects/*/*-eng-review-test-plan-*.md`. Targeted in-place repair; never `git commit + push`. +- `test/build-gbrain-env.test.ts` (10 tests) — covers seed/override/escape-hatch/missing/unparseable/no-database_url/GBRAIN_HOME/object-identity/preservation/idempotent-when-matches. +- `test/gbrain-exec-invariant.test.ts` (2 tests) — static-source check that fails the build if `bin/gstack-gbrain-sync.ts` or `bin/gstack-memory-ingest.ts` adds a direct gbrain spawn outside the helper. +- `test/gbrain-source-gitignore.test.ts` (6 tests) — covers create / append / idempotent / whitespace / read-only checkout. +- `test/gstack-gbrain-sync.test.ts` — 15+ new tests for migration paths, path-drift, hyphen-boundary truncation, HTTPS slug period regression (#1357), and the centralized helper plumbing. +- `test/artifacts-init-migration.test.ts` — 5 new tests for v1.40.0.0 migration on top of installed v1.38.1.0 state. + +#### Changed + +- `bin/gstack-gbrain-sync.ts` — `deriveCodeSourceId` folds hostname into the pathhash AND retries with `repo-only-hostpathhash` when the full slug forces truncation. `constrainSourceId` cuts on hyphen boundaries (no more mid-word `skill` → `kill`). `runCodeImport` now runs the hostname-fold migration after the v1.x legacy cleanup, threads the seeded env through every gbrain spawn, and defers the orphan-source removal until AFTER sync verifies pages exist (closes the data-loss window codex review #2 flagged). `ensureGbrainSourceGitignored` appends `.gbrain-source` to the consumer repo's `.gitignore` after a successful attach. `if (import.meta.main)` guard added so the file is importable for unit tests. +- `bin/gstack-memory-ingest.ts` — routes `gbrain --help` probe and `gbrain import` streaming spawn through the helper. The bun grandchild now inherits a seeded env from `gstack-gbrain-sync`; defense-in-depth seeding inside memory-ingest itself for standalone invocations. +- `bin/gstack-artifacts-init` — adds `projects/*/*-eng-review-test-plan-*.md` to `.brain-allowlist`, `.brain-privacy-map.json` (class `artifact`), and `.gitattributes` (`merge=union`). +- `bin/gstack-gbrain-install` — Windows MSYS/MINGW/Cygwin shells get `bun install --ignore-scripts`. Post-install probe of `gbrain sources --help` flags missing native artifacts with a clear Windows-specific remediation message. +- `lib/gbrain-sources.ts` — `gbrain sources list --json` timeout bumped 10s → 30s for slow Supabase round-trips. +- `lib/gbrain-local-status.ts` — `gbrain --version` and `gbrain sources list --json` probes use `spawnSync` directly (no `command -v` shelling). + +#### Fixed + +- Hostname-fold migration data-loss window (codex review #2): the previous "register new, remove old" sequence could wipe pages if the new-source sync failed mid-flight. Now: register new → sync exits 0 → page_count > 0 → only THEN remove old. +- Hostname-fold path-drift (codex review #3): if the old source's `local_path` differs from the current repo root (user moved the repo, or two machines share a hash slot), migration is skipped with a clear warning instead of blindly renaming/removing the wrong source. +- `.gbrain-source` per-worktree pin breaking on commit (#1384): four contributors independently submitted fixes for this bug. PR #1521's exported-helper shape was selected; PR #1501 and PR #1464 closed as superseded. +- Cross-machine source-id collision when two hosts share a path layout (#1414). +- Mid-word slug truncation when long repo names force the 32-char cap. +- HTTPS-with-`.git` remotes producing period-laden source ids (#1357) — closed with explicit regression test. +- Federation queue dropping `/plan-eng-review` test plans on existing installs (#1452 follow-on). +- gbrain CLI probe failing on Windows shells where `command -v` is not a real binary (#1386 — partial; Windows ingest at scale remains separate work). +- `bun install` aborting on Windows MSYS/MINGW shells during gbrain installation (#1271 follow-on). + +#### NOT fixed by this wave (deferred; carry-overs for the next gbrain wave) + +- #1346 — `gstack-memory-ingest` calls `put_page` on gbrain ≥0.18 which renamed the subcommand. This wave routes the probe and stream through `lib/gbrain-exec.ts` but does NOT change the `put_page` call shape. Users on gbrain ≥0.18 still see memory ingest break with "unknown subcommand: put_page" — a separate API adapter pass owns that fix. +- #1435 — PgBouncer transaction-mode pooler breaks the `/sync-gbrain` capability check. v1.40.0.0's timeout bump (10s → 30s) is partial mitigation, not a fix. Needs pooler-mode detection. +- #1301 — `/setup-gbrain` picks port 6543 (transaction pooler) but new Supabase projects only listen on 5432 (session pooler). Provisioning-logic change. +- #1348 — `gstack-brain-init` defaults to SSH remote, fails for HTTPS-configured `gh`. Init-logic change. + +#### For contributors + +- Every new gbrain spawn from `bin/gstack-gbrain-sync.ts` or `bin/gstack-memory-ingest.ts` MUST go through `lib/gbrain-exec.ts`'s `spawnGbrain` / `execGbrainJson` / `execGbrainText` / `spawnGbrainAsync`. The invariant test `test/gbrain-exec-invariant.test.ts` fails the build on direct call sites. This guards against silently regressing the DATABASE_URL fix when a future contributor adds a quick `spawnSync("gbrain", ...)` without env threading. +- `GSTACK_RESPECT_ENV_DATABASE_URL=1` is the documented escape hatch when the brain intentionally lives in the project's local DB (e.g., a developer running a personal brain pointed at the same Postgres their Next.js app uses). The default is "seed from gbrain's config, override the caller's `.env.local`." +- The hostname-fold migration ships in `bin/gstack-gbrain-sync.ts` itself, not as a separate `gstack-upgrade/migrations/v1.40.0.0.sh` step. The trigger is "first sync after upgrade," not "migration runner sweep." It's idempotent — repeat invocations are no-ops because the legacy id either gets renamed/removed on the first run or path-drift skip persists across runs. +- The wave is credited per commit: 0xDevNinja (hostname fold #1468), drummerms (hyphen-boundary cut #1481), Jayesh Betala (probe CLI #1485), Jason Shultz (DATABASE_URL seeding #1508 + timeout #1507), genisis0x (consumer gitignore #1521, allowlist eng-review pattern #1465, Windows postinstall #1487). NikhileshNanduri (#1501) and realcarsonterry (#1464) submitted independent fixes for the gitignore bug — credited in conversation but not in commits (one canonical implementation landed). Thank you. + ## [1.39.2.0] - 2026-05-15 ## **Conductor workspaces wire `GSTACK_*` keys straight into gbrain embeddings and paid evals.** diff --git a/VERSION b/VERSION index 939a56892..895062404 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.39.2.0 +1.40.0.0 diff --git a/bin/gstack-artifacts-init b/bin/gstack-artifacts-init index 3dcb339ca..b8bfe830c 100755 --- a/bin/gstack-artifacts-init +++ b/bin/gstack-artifacts-init @@ -227,8 +227,18 @@ projects/*/ceo-plans/*.md projects/*/ceo-plans/*/*.md projects/*/designs/*.md projects/*/designs/*/*.md +# Project-root design / test-plan artifacts written by /office-hours, +# /plan-eng-review, and /autoplan. The skills emit +# `{user}-{branch}-design-{datetime}.md`, +# `{user}-{branch}-test-plan-{datetime}.md`, and +# `{user}-{branch}-eng-review-test-plan-{datetime}.md` at the project +# root (not under designs/), so the existing `designs/*.md` patterns +# miss them. Without these the cross-machine pull on machine B gets +# the referencing CEO plan but not the underlying design / test plan +# (#1452). projects/*/*-design-*.md projects/*/*-test-plan-*.md +projects/*/*-eng-review-test-plan-*.md projects/*/timeline.jsonl retros/*.md developer-profile.json @@ -256,6 +266,7 @@ cat > "$GSTACK_HOME/.brain-privacy-map.json" <<'EOF' {"pattern": "projects/*/designs/*/*.md", "class": "artifact"}, {"pattern": "projects/*/*-design-*.md", "class": "artifact"}, {"pattern": "projects/*/*-test-plan-*.md", "class": "artifact"}, + {"pattern": "projects/*/*-eng-review-test-plan-*.md", "class": "artifact"}, {"pattern": "retros/*.md", "class": "artifact"}, {"pattern": "builder-journey.md", "class": "artifact"}, {"pattern": "projects/*/timeline.jsonl", "class": "behavioral"}, diff --git a/bin/gstack-gbrain-install b/bin/gstack-gbrain-install index c247ff2df..d9c30396b 100755 --- a/bin/gstack-gbrain-install +++ b/bin/gstack-gbrain-install @@ -131,9 +131,24 @@ if $DRY_RUN; then fi # --- install + link --- +# On Windows MSYS/Cygwin shells, bun's postinstall scripts (notably gbrain's +# native-bindings setup) fail to parse path arguments correctly and abort +# `bun install` with a non-zero exit. The package itself installs fine +# without scripts, so detect Windows and pass --ignore-scripts there. The +# `bun link` step below is unaffected. +IS_WINDOWS=0 +case "$(uname -s)" in + MINGW*|MSYS*|CYGWIN*|Windows_NT) IS_WINDOWS=1 ;; +esac + if ! $VALIDATE_ONLY; then - log "running bun install in $INSTALL_DIR" - ( cd "$INSTALL_DIR" && bun install --silent ) + if [ "$IS_WINDOWS" -eq 1 ]; then + log "running bun install --ignore-scripts in $INSTALL_DIR (Windows shell detected)" + ( cd "$INSTALL_DIR" && bun install --silent --ignore-scripts ) + else + log "running bun install in $INSTALL_DIR" + ( cd "$INSTALL_DIR" && bun install --silent ) + fi log "running bun link in $INSTALL_DIR" ( cd "$INSTALL_DIR" && bun link --silent ) fi @@ -179,5 +194,27 @@ if [ "$actual_norm" != "$expected_norm" ]; then fi log "installed gbrain $actual_version from $INSTALL_DIR" + +# v1.40.0.0 post-install validation (T6 / codex review #19): --ignore-scripts +# may skip artifacts gbrain needs at runtime, especially on Windows +# MSYS/MINGW where we DID pass --ignore-scripts. `gbrain --version` above +# already confirmed the binary runs; this second probe checks that the +# subcommand surface is reachable (`sources` is the entry point the sync +# stage hits first). If the probe fails, we warn but don't exit non-zero — +# the user may still be able to use other commands. +if ! gbrain sources --help >/dev/null 2>&1; then + echo "" >&2 + echo "gstack-gbrain-install: WARNING — gbrain installed but 'gbrain sources --help' did not exit 0." >&2 + if [ "$IS_WINDOWS" -eq 1 ]; then + echo " Windows shells skip bun postinstall scripts; some gbrain features may need native build tools." >&2 + echo " If /sync-gbrain fails to find subcommands, install gbrain from a non-MSYS shell," >&2 + echo " or run: cd $INSTALL_DIR && bun install (without --ignore-scripts)" >&2 + else + echo " This may be a transient gbrain CLI issue or a missing native dependency." >&2 + echo " If /sync-gbrain fails, re-run: cd $INSTALL_DIR && bun install" >&2 + fi + echo "" >&2 +fi + echo "" echo "Next: gbrain init --pglite (or run /setup-gbrain for the full setup flow)" diff --git a/bin/gstack-gbrain-sync.ts b/bin/gstack-gbrain-sync.ts index 4fc658ac4..61d9e677f 100644 --- a/bin/gstack-gbrain-sync.ts +++ b/bin/gstack-gbrain-sync.ts @@ -32,13 +32,14 @@ import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, renameSync } from "fs"; import { join, dirname } from "path"; import { execSync, spawnSync } from "child_process"; -import { homedir } from "os"; +import { homedir, hostname } from "os"; import { createHash } from "crypto"; import "../lib/conductor-env-shim"; import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers"; import { ensureSourceRegistered, sourcePageCount } from "../lib/gbrain-sources"; import { localEngineStatus, type LocalEngineStatus } from "../lib/gbrain-local-status"; +import { buildGbrainEnv, spawnGbrain, execGbrainJson } from "../lib/gbrain-exec"; // ── Types ────────────────────────────────────────────────────────────────── @@ -161,30 +162,42 @@ function originUrl(): string | null { } /** - * Derive a worktree-aware source id for the cwd code corpus. + * Derive a host- and worktree-aware source id for the cwd code corpus. * - * Pattern: `gstack-code--` where slug comes from origin - * (org/repo) and pathhash8 is the first 8 hex chars of sha1(absolute repo - * path). The pathhash8 is what makes Conductor worktrees of the same repo - * coexist as separate sources in the same gbrain DB instead of stomping on - * each other. + * Pattern: `gstack-code--` where slug comes from origin + * (org/repo) and hostpathhash8 is the first 8 hex chars of + * sha1(`${hostname}::${absolute repo path}`). Folding hostname into the hash + * keeps Conductor worktrees of the same repo as distinct sources on one host + * AND keeps two machines that share an absolute layout (e.g. chezmoi-managed + * home dirs against a federated brain) from colliding on each other. * * Falls back to the repo basename when there is no origin (local repo). * + * `GSTACK_HOSTNAME` env override is honored for deterministic tests; in + * production paths it is unset and `os.hostname()` is used. + * * gbrain enforces source ids to be 1-32 lowercase alnum chars with * optional interior hyphens. `constrainSourceId` handles the 32-char cap * with a hashed-tail fallback when the combined slug exceeds budget. */ function deriveCodeSourceId(repoPath: string): string { - const pathHash = createHash("sha1").update(repoPath).digest("hex").slice(0, 8); + const host = process.env.GSTACK_HOSTNAME || hostname(); + const hostPathHash = createHash("sha1").update(`${host}::${repoPath}`).digest("hex").slice(0, 8); const remote = canonicalizeRemote(originUrl()); if (remote) { const segs = remote.split("/").filter(Boolean); const slugSource = segs.slice(-2).join("-"); - return constrainSourceId("gstack-code", `${slugSource}-${pathHash}`); + const fullId = constrainSourceId("gstack-code", `${slugSource}-${hostPathHash}`); + // If the org+repo+hostpathhash fits cleanly (suffix preserved), use it. + if (fullId.endsWith(`-${hostPathHash}`)) return fullId; + // Otherwise drop the org prefix and retry with just repo+hostpathhash so + // the repo name stays readable. If that still doesn't fit, + // constrainSourceId falls back to a deterministic hash-only form. + const repoOnly = segs[segs.length - 1] || "repo"; + return constrainSourceId("gstack-code", `${repoOnly}-${hostPathHash}`); } const base = repoPath.split("/").pop() || "repo"; - return constrainSourceId("gstack-code", `${base}-${pathHash}`); + return constrainSourceId("gstack-code", `${base}-${hostPathHash}`); } /** @@ -208,10 +221,162 @@ function deriveLegacyCodeSourceId(repoPath: string): string { return constrainSourceId("gstack-code", base); } +/** + * Pre-#1468 path-only-hash source id, kept for hostname-fold migration only. + * + * Before the hostname fold, `deriveCodeSourceId` hashed only the absolute + * repo path: `gstack-code--`. After #1468 the + * hash key is `${hostname}::${path}`, so every existing user's brain has a + * legacy id that no longer matches what `deriveCodeSourceId` produces. We + * detect this form once, attempt rename-in-place if the gbrain CLI supports + * `sources rename`, and otherwise clean up after the new source successfully + * syncs. Distinct from `deriveLegacyCodeSourceId` (pre-pathhash v1.x form); + * both probes run. + */ +export function derivePathOnlyHashLegacyId(repoPath: string): string { + const pathHash = createHash("sha1").update(repoPath).digest("hex").slice(0, 8); + const remote = canonicalizeRemote(originUrl()); + if (remote) { + const segs = remote.split("/").filter(Boolean); + const slugSource = segs.slice(-2).join("-"); + return constrainSourceId("gstack-code", `${slugSource}-${pathHash}`); + } + const base = repoPath.split("/").pop() || "repo"; + return constrainSourceId("gstack-code", `${base}-${pathHash}`); +} + +/** + * Feature-check whether the installed gbrain CLI ships `sources rename `. + * + * Per the v1.40.0.0 design review: probing `gbrain sources rename --help` and + * matching for the exact argument shape catches the case where gbrain's + * `sources` parent help mentions a `rename` subcommand but the CLI doesn't + * accept the ` ` form (or vice versa). Cached for the lifetime + * of the process. As of gbrain 0.35.0.0 this command does not exist, so the + * function returns false and the migration path falls back to register-new + * + sync-OK + remove-old. + */ +let _gbrainSupportsRenameCache: boolean | null = null; +export function _resetGbrainSupportsRenameCache(): void { + _gbrainSupportsRenameCache = null; +} +function gbrainSupportsSourcesRename(env?: NodeJS.ProcessEnv): boolean { + if (_gbrainSupportsRenameCache !== null) return _gbrainSupportsRenameCache; + try { + const r = spawnGbrain(["sources", "rename", "--help"], { + timeout: 5_000, + baseEnv: env, + }); + const out = `${r.stdout || ""}\n${r.stderr || ""}`; + // Match the exact argument shape: `rename ` (with literal + // angle brackets in usage strings) or `rename OLD NEW`. + const exact = /sources\s+rename\s+\s+/i.test(out) + || /sources\s+rename\s+OLD\s+NEW/.test(out) + || /sources\s+rename\s+\s+/i.test(out); + _gbrainSupportsRenameCache = exact && r.status === 0; + } catch { + _gbrainSupportsRenameCache = false; + } + return _gbrainSupportsRenameCache; +} + +/** + * Look up a source's `local_path` from `gbrain sources list --json`. + * Returns null when the source is absent or the listing fails. + * + * `env` is the environment passed to the spawned `gbrain` process; defaults + * to `process.env`. Tests inject a PATH that points at a gbrain shim so the + * helper can be exercised without a real gbrain CLI. + */ +export function sourceLocalPath(sourceId: string, env?: NodeJS.ProcessEnv): string | null { + const list = execGbrainJson>( + ["sources", "list", "--json"], + { baseEnv: env }, + ); + if (!list) return null; + const found = list.find((s) => s.id === sourceId); + return found?.local_path ?? null; +} + +/** Result of `planHostnameFoldMigration` — informs `runCodeImport` of next steps. */ +export type HostnameFoldMigration = + | { kind: "none"; reason: "ids-match" | "no-legacy-source" } + | { kind: "skipped-path-drift"; oldId: string; oldPath: string; currentPath: string } + | { kind: "renamed"; oldId: string; newId: string } + | { kind: "pending-cleanup"; oldId: string }; + +/** + * Decide how to migrate from the pre-#1468 path-only-hash source id to the + * new hostname-fold id. + * + * Order: + * 1. If old == new → no-op. + * 2. Look up old source's local_path. Absent → no legacy source to migrate. + * 3. local_path != currentRoot → user moved the repo or two machines share a + * hash slot. Skip migration; let the user clean up manually. We will NOT + * rename or remove anything; the new source is registered alongside. + * 4. Otherwise: feature-check `gbrain sources rename`. If supported and the + * rename call exits 0 → renamed, pages preserved. + * 5. Else: pending-cleanup. Caller registers + syncs new source first; only + * after sync succeeds with a non-zero page count does it remove the old. + * This avoids a data-loss window where the old source is gone before the + * new one is verifiably populated. + */ +export function planHostnameFoldMigration( + currentRoot: string, + newSourceId: string, + legacyPathHashId: string, + env?: NodeJS.ProcessEnv, +): HostnameFoldMigration { + if (legacyPathHashId === newSourceId) { + return { kind: "none", reason: "ids-match" }; + } + const oldPath = sourceLocalPath(legacyPathHashId, env); + if (oldPath === null) { + return { kind: "none", reason: "no-legacy-source" }; + } + if (oldPath !== currentRoot) { + return { + kind: "skipped-path-drift", + oldId: legacyPathHashId, + oldPath, + currentPath: currentRoot, + }; + } + if (gbrainSupportsSourcesRename(env)) { + const r = spawnGbrain(["sources", "rename", legacyPathHashId, newSourceId], { baseEnv: env }); + if (r.status === 0) { + return { kind: "renamed", oldId: legacyPathHashId, newId: newSourceId }; + } + // Rename failed at runtime — fall through to cleanup path. + } + return { kind: "pending-cleanup", oldId: legacyPathHashId }; +} + +/** + * Remove an orphaned source. Called only after new-source sync verifies pages + * exist, so the old source is provably redundant before deletion. + * + * Flag note: existing call sites used `--confirm-destructive` here and + * `--yes` in `lib/gbrain-sources.ts` — gbrain 0.35.0.0 accepts neither + * deterministically (the subcommand surface help is generic). We pass + * `--confirm-destructive` to match the existing call site convention; the + * flag-helper centralization in commit 4 (lib/gbrain-exec.ts) will resolve + * the inconsistency across the codebase. + */ +export function removeOrphanedSource(oldId: string, env?: NodeJS.ProcessEnv): boolean { + const r = spawnGbrain(["sources", "remove", oldId, "--confirm-destructive"], { baseEnv: env }); + return r.status === 0; +} + /** * Build a gbrain-valid source id (1-32 lowercase alnum + interior hyphens). Sanitizes * `raw`, prefixes with `prefix`, and falls back to a hashed-tail form when total length * would exceed 32 chars. + * + * Truncation cuts on hyphen boundaries (whole-word units) from the right, never + * mid-word. Inputs like "drummerms-av-sow-wiz-skill-270c0001" produce + * "${prefix}-270c0001-", not "${prefix}-kill-270c0001-". */ function constrainSourceId(prefix: string, raw: string): string { const MAX = 32; @@ -230,17 +395,21 @@ function constrainSourceId(prefix: string, raw: string): string { // Total budget: prefix + "-" + tail + "-" + hash const tailBudget = MAX - prefix.length - 2 - hash.length; if (tailBudget < 1) return `${prefix}-${hash}`; - const tail = slug.slice(-tailBudget).replace(/^-+|-+$/g, ""); - return tail ? `${prefix}-${tail}-${hash}` : `${prefix}-${hash}`; -} - -function gbrainAvailable(): boolean { - try { - execSync("command -v gbrain", { stdio: "ignore" }); - return true; - } catch { - return false; + // Cut on hyphen boundaries instead of mid-word. Walk tokens from the right, + // accumulating until adding the next token would exceed tailBudget. This + // preserves readable suffixes (pathhash, repo name) and avoids embarrassing + // mid-word artifacts like "skill" → "kill". + const tokens = slug.split("-").filter(Boolean); + const kept: string[] = []; + let len = 0; + for (let i = tokens.length - 1; i >= 0; i--) { + const add = kept.length === 0 ? tokens[i].length : tokens[i].length + 1; + if (len + add > tailBudget) break; + kept.unshift(tokens[i]); + len += add; } + const tail = kept.join("-"); + return tail ? `${prefix}-${tail}-${hash}` : `${prefix}-${hash}`; } // ── Lock file (D1) ───────────────────────────────────────────────────────── @@ -334,9 +503,6 @@ async function runCodeImport(args: CliArgs): Promise { if (!root) { return { name: "code", ran: false, ok: true, duration_ms: 0, summary: "skipped (not in git repo)" }; } - if (!gbrainAvailable()) { - return { name: "code", ran: false, ok: false, duration_ms: 0, summary: "skipped (gbrain CLI not in PATH)" }; - } const sourceId = deriveCodeSourceId(root); @@ -365,31 +531,52 @@ async function runCodeImport(args: CliArgs): Promise { return skipStageForLocalStatus("code", localStatus, t0); } - // Step 0: Best-effort cleanup of pre-pathhash legacy source. + // Step 0a: Best-effort cleanup of pre-pathhash legacy source (v1.x form). // Earlier /sync-gbrain versions registered `gstack-code-` (no path // suffix). On a multi-worktree repo, those collapsed onto a single id // with last-sync-wins. Federated search would return stale duplicate // hits forever if we left the orphan in place. Remove the legacy id once // here so users don't accumulate orphans. // Failure is non-fatal — we still register the new id below. + // gbrainEnv seeds DATABASE_URL from gbrain's config so this stage works + // inside Next.js / Prisma / Rails projects with their own .env.local + // (codex review #7 — bug fix is wider than #1508 as filed). + const gbrainEnv = buildGbrainEnv({ announce: !args.quiet }); const legacyId = deriveLegacyCodeSourceId(root); let legacyRemoved = false; if (legacyId !== sourceId) { - const rm = spawnSync("gbrain", ["sources", "remove", legacyId, "--confirm-destructive"], { - encoding: "utf-8", + const rm = spawnGbrain(["sources", "remove", legacyId, "--confirm-destructive"], { timeout: 30_000, - stdio: ["ignore", "pipe", "pipe"], + baseEnv: gbrainEnv, }); // Treat absent-source as success (clean state). gbrain emits "not found" on // missing id; treat any non-zero exit without "not found" as a soft fail. if (rm.status === 0) legacyRemoved = true; } + // Step 0b: Hostname-fold migration (#1414). + // Before #1468 the source id hashed only the absolute repo path. After the + // hostname fold, every existing user has a legacy id that no longer matches + // what deriveCodeSourceId produces. Try rename-in-place first (preserves + // pages); fall back to register-new → sync-OK → remove-old. Path-drift + // (user moved the repo, etc.) skips migration with a warning. + const pathOnlyHashLegacyId = derivePathOnlyHashLegacyId(root); + const migration = planHostnameFoldMigration(root, sourceId, pathOnlyHashLegacyId, gbrainEnv); + if (migration.kind === "skipped-path-drift" && !args.quiet) { + console.error( + `[sync:code] hostname-fold migration skipped: legacy source ${migration.oldId} ` + + `points at ${migration.oldPath}, current repo is ${migration.currentPath}. ` + + `Clean up manually with: gbrain sources remove ${migration.oldId} --confirm-destructive`, + ); + } else if (migration.kind === "renamed" && !args.quiet) { + console.error(`[sync:code] hostname-fold migration: renamed ${migration.oldId} → ${migration.newId} (pages preserved)`); + } + // Step 1: Ensure source registered (idempotent). Single source of truth in lib — // no synchronous duplicate here (per /codex review #12). let registered = false; try { - const result = await ensureSourceRegistered(sourceId, root, { federated: true }); + const result = await ensureSourceRegistered(sourceId, root, { federated: true, env: gbrainEnv }); registered = result.changed; } catch (err) { return { @@ -407,9 +594,10 @@ async function runCodeImport(args: CliArgs): Promise { ? ["reindex-code", "--source", sourceId, "--yes"] : ["sync", "--strategy", "code", "--source", sourceId]; - const syncResult = spawnSync("gbrain", syncArgs, { + const syncResult = spawnGbrain(syncArgs, { stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"], timeout: 35 * 60 * 1000, + baseEnv: gbrainEnv, }); if (syncResult.status !== 0) { @@ -432,14 +620,32 @@ async function runCodeImport(args: CliArgs): Promise { // the wrong/default source. Treat it as a stage failure (ok=false) so the // verdict block surfaces ERR and the user knows to retry rather than // trusting stale results. - const attach = spawnSync("gbrain", ["sources", "attach", sourceId], { - encoding: "utf-8", + const attach = spawnGbrain(["sources", "attach", sourceId], { timeout: 10_000, cwd: root, - stdio: ["ignore", "pipe", "pipe"], + baseEnv: gbrainEnv, }); - const pageCount = sourcePageCount(sourceId); - const legacyNote = legacyRemoved ? `, removed legacy ${legacyId}` : ""; + const pageCount = sourcePageCount(sourceId, gbrainEnv); + + // Step 4: Deferred hostname-fold cleanup. + // Only remove the pre-#1468 path-only-hash source NOW that the new source + // has registered + synced + has pages. Removing before sync would create a + // data-loss window if sync failed; removing without a page-count check would + // wipe pages when sync silently no-op'd. This is the codex-review-flagged + // safety: register → sync → verify → THEN delete. + let hostnameLegacyRemoved = false; + if (migration.kind === "pending-cleanup" && pageCount !== null && pageCount > 0) { + hostnameLegacyRemoved = removeOrphanedSource(migration.oldId, gbrainEnv); + if (hostnameLegacyRemoved && !args.quiet) { + console.error(`[sync:code] hostname-fold migration: removed legacy ${migration.oldId} after new source sync verified (page_count=${pageCount})`); + } + } + + const legacyParts: string[] = []; + if (legacyRemoved) legacyParts.push(`removed legacy ${legacyId}`); + if (migration.kind === "renamed") legacyParts.push(`renamed ${migration.oldId}→${migration.newId}`); + if (hostnameLegacyRemoved) legacyParts.push(`removed pre-hostname-fold ${migration.kind === "pending-cleanup" ? migration.oldId : ""}`); + const legacyNote = legacyParts.length > 0 ? `, ${legacyParts.join(", ")}` : ""; const baseSummary = `${registered ? "registered + " : ""}synced ${sourceId} (page_count=${pageCount ?? "unknown"}${legacyNote})`; if (attach.status !== 0) { @@ -460,6 +666,13 @@ async function runCodeImport(args: CliArgs): Promise { }; } + // v1.29.0.0 changelog promised the per-worktree pin would be ignored in the + // consuming repo, but the change actually only added .gbrain-source to + // gstack's own .gitignore. Without the consumer-side entry, the pin gets + // committed and breaks the per-worktree promise: Conductor sibling worktrees + // step on each other's pin every time anyone commits (#1384). + ensureGbrainSourceGitignored(root); + return { name: "code", ran: true, @@ -476,6 +689,39 @@ async function runCodeImport(args: CliArgs): Promise { }; } +/** + * Ensure `.gbrain-source` is listed in the consumer repo's `.gitignore`. + * + * Idempotent: only appends when the entry is not already present (matched on + * trimmed lines so a leading/trailing whitespace difference doesn't add a + * second copy). Wraps writes in try/catch so a read-only checkout or weird + * perms logs a warning and lets the rest of the sync continue. + */ +export function ensureGbrainSourceGitignored(root: string): void { + const gitignorePath = join(root, ".gitignore"); + try { + let existing = ""; + try { + existing = readFileSync(gitignorePath, "utf-8"); + } catch { + // No .gitignore yet — we'll create it. + } + const alreadyIgnored = existing + .split("\n") + .some((line) => line.trim() === ".gbrain-source"); + if (alreadyIgnored) { + return; + } + const sep = existing.length > 0 && !existing.endsWith("\n") ? "\n" : ""; + writeFileSync(gitignorePath, existing + sep + ".gbrain-source\n"); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.warn( + `[sync:code] could not add .gbrain-source to ${gitignorePath}: ${msg}`, + ); + } +} + function runMemoryIngest(args: CliArgs): StageResult { const t0 = Date.now(); @@ -498,9 +744,14 @@ function runMemoryIngest(args: CliArgs): StageResult { else ingestArgs.push("--incremental"); if (args.quiet) ingestArgs.push("--quiet"); + // Thread the seeded env into the bun grandchild (codex review #7 — the + // .env.local footgun affects gstack-memory-ingest.ts too, not just the + // direct gbrain spawns in this file). The grandchild calls gbrain import + // internally and must see the DATABASE_URL from gbrain's own config. const result = spawnSync("bun", ingestArgs, { encoding: "utf-8", timeout: 35 * 60 * 1000, + env: buildGbrainEnv({ announce: false }), }); // D6: parse [memory-ingest] lines from the child's stderr. ERR-prefixed @@ -675,8 +926,10 @@ async function main(): Promise { process.exit(exitCode); } -main().catch((err) => { - console.error(`gstack-gbrain-sync fatal: ${err instanceof Error ? err.message : String(err)}`); - releaseLock(); - process.exit(1); -}); +if (import.meta.main) { + main().catch((err) => { + console.error(`gstack-gbrain-sync fatal: ${err instanceof Error ? err.message : String(err)}`); + releaseLock(); + process.exit(1); + }); +} diff --git a/bin/gstack-memory-ingest.ts b/bin/gstack-memory-ingest.ts index b1169ae69..88fdbc7e4 100644 --- a/bin/gstack-memory-ingest.ts +++ b/bin/gstack-memory-ingest.ts @@ -54,7 +54,7 @@ import { rmSync, } from "fs"; import { join, basename, dirname } from "path"; -import { execSync, execFileSync, spawnSync, spawn, type ChildProcess } from "child_process"; +import { execFileSync, spawnSync, spawn, type ChildProcess } from "child_process"; import { homedir } from "os"; import { createHash } from "crypto"; @@ -64,6 +64,7 @@ import { detectEngineTier, withErrorContext, } from "../lib/gstack-memory-helpers"; +import { execGbrainText, spawnGbrainAsync } from "../lib/gbrain-exec"; // ── Types ────────────────────────────────────────────────────────────────── @@ -809,16 +810,14 @@ let _gbrainAvailability: boolean | null = null; function gbrainAvailable(): boolean { if (_gbrainAvailability !== null) return _gbrainAvailability; try { - execSync("command -v gbrain", { stdio: "ignore" }); // Probe `--help` for the `import` subcommand. gbrain v0.20.0+ ships // `import ` (batch markdown import via path-authoritative slugs). // If absent, we surface a single clean error here rather than failing // the whole stage with a confusing usage message from gbrain itself. - const help = execFileSync("gbrain", ["--help"], { - encoding: "utf-8", - timeout: 5000, - stdio: ["ignore", "pipe", "pipe"], - }); + // `gbrain --help` probes only CLI availability, not DB connectivity, so + // it doesn't strictly need DATABASE_URL. But routing through the helper + // keeps the invariant test from chasing exceptions per call site. + const help = execGbrainText(["--help"], { timeout: 5000 }); _gbrainAvailability = /^\s+import\s/m.test(help); } catch { _gbrainAvailability = false; @@ -1317,11 +1316,11 @@ function runGbrainImport( ): Promise<{ status: number | null; stdout: string; stderr: string }> { installSignalForwarder(); return new Promise((resolve) => { - const child = spawn( - "gbrain", - ["import", stagingDir, "--no-embed", "--json"], - { stdio: ["ignore", "pipe", "pipe"] }, - ); + // Seed DATABASE_URL from gbrain's own config so this stage works + // inside Next.js / Prisma / Rails projects with their own + // .env.local (codex review #7 — defense in depth on top of the + // parent gstack-gbrain-sync seeding the bun grandchild's env). + const child = spawnGbrainAsync(["import", stagingDir, "--no-embed", "--json"]); _activeImportChild = child; let stdout = ""; let stderr = ""; diff --git a/gstack-upgrade/migrations/v1.40.0.0.sh b/gstack-upgrade/migrations/v1.40.0.0.sh new file mode 100755 index 000000000..d21c18ba3 --- /dev/null +++ b/gstack-upgrade/migrations/v1.40.0.0.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# Migration: v1.40.0.0 — add eng-review-test-plan project-root pattern to +# .brain-allowlist, .brain-privacy-map.json, and .gitattributes (#1452 follow-on). +# +# Why a second migration: v1.38.1.0 shipped two of three filenames for #1452 +# (`*-design-*.md` and `*-test-plan-*.md`) but missed `/plan-eng-review`'s +# actual filename: `*-eng-review-test-plan-*.md`. The v1.38.1.0 migration has +# a done-marker, so a "fix v1.38.1.0 and re-run" approach silently no-ops on +# existing users. v1.40.0.0 needs its own migration to patch installs that +# already ran v1.38.1.0. +# +# Per-file independent — if one file is missing we still repair the others. +# +# Idempotent: each insertion is gated on `not already present` so re-running +# the migration is a no-op. + +set -u + +GSTACK_HOME="${HOME}/.gstack" +ALLOWLIST="${GSTACK_HOME}/.brain-allowlist" +PRIVACY="${GSTACK_HOME}/.brain-privacy-map.json" +GITATTRS="${GSTACK_HOME}/.gitattributes" + +MIGRATION_DIR="${GSTACK_HOME}/.migrations" +DONE="${MIGRATION_DIR}/v1.40.0.0.done" + +mkdir -p "${MIGRATION_DIR}" 2>/dev/null || true +if [ -f "${DONE}" ]; then + exit 0 +fi + +NEW_PATTERNS=( + 'projects/*/*-eng-review-test-plan-*.md' +) + +added_any=0 + +# ----- .brain-allowlist --------------------------------------------------- +if [ -f "${ALLOWLIST}" ]; then + for PATTERN in "${NEW_PATTERNS[@]}"; do + if ! grep -Fq -- "${PATTERN}" "${ALLOWLIST}" 2>/dev/null; then + if grep -q '^# ---- USER ADDITIONS BELOW' "${ALLOWLIST}" 2>/dev/null; then + sed -i.bak "/^# ---- USER ADDITIONS BELOW/i\\ +${PATTERN} +" "${ALLOWLIST}" && rm -f "${ALLOWLIST}.bak" + added_any=1 + else + printf '%s\n' "${PATTERN}" >> "${ALLOWLIST}" + added_any=1 + fi + fi + done +fi + +# ----- .brain-privacy-map.json ------------------------------------------- +if [ -f "${PRIVACY}" ]; then + if command -v jq >/dev/null 2>&1; then + for PATTERN in "${NEW_PATTERNS[@]}"; do + if ! jq -e --arg p "${PATTERN}" 'map(select(.pattern == $p)) | length > 0' "${PRIVACY}" >/dev/null 2>&1; then + if jq --arg p "${PATTERN}" '. += [{"pattern": $p, "class": "artifact"}]' "${PRIVACY}" > "${PRIVACY}.tmp" 2>/dev/null; then + mv "${PRIVACY}.tmp" "${PRIVACY}" + added_any=1 + else + rm -f "${PRIVACY}.tmp" + echo " [v1.40.0.0] WARN: jq failed to patch ${PRIVACY}; skipping pattern ${PATTERN}." >&2 + fi + fi + done + else + echo " [v1.40.0.0] WARN: jq not found; skipping privacy-map repair. Install jq and re-run gstack-upgrade, or run gstack-artifacts-init manually." >&2 + fi +fi + +# ----- .gitattributes ----------------------------------------------------- +if [ -f "${GITATTRS}" ]; then + for PATTERN in "${NEW_PATTERNS[@]}"; do + RULE="${PATTERN} merge=union" + if ! grep -Fq -- "${RULE}" "${GITATTRS}" 2>/dev/null; then + printf '%s\n' "${RULE}" >> "${GITATTRS}" + added_any=1 + fi + done +fi + +# Mark done even if no patches needed — a fresh-init user's +# bin/gstack-artifacts-init now writes the pattern directly, so re-runs +# should no-op. The touchfile keeps the migration runner from looping. +touch "${DONE}" + +if [ "${added_any}" = "1" ]; then + echo " [v1.40.0.0] allowlist/privacy-map/gitattributes patched for /plan-eng-review test plans (idempotent)" >&2 +fi + +# NEVER `git commit + push` from this migration. The user controls when the +# patches ship into their federated artifacts repo. + +exit 0 diff --git a/lib/gbrain-exec.ts b/lib/gbrain-exec.ts new file mode 100644 index 000000000..5b768749f --- /dev/null +++ b/lib/gbrain-exec.ts @@ -0,0 +1,174 @@ +/** + * Centralized gbrain CLI invocation. + * + * Every `gbrain ...` spawn from `bin/gstack-gbrain-sync.ts` and + * `bin/gstack-memory-ingest.ts` MUST go through `spawnGbrain` (or + * `execGbrainJson`), and the invariant test + * `test/gbrain-exec-invariant.test.ts` enforces this with a static-source + * grep. The helper layer guarantees three properties: + * + * 1. **DATABASE_URL is seeded from gbrain's own config**, not from the + * caller's `.env.local`. gbrain auto-loads `.env.local` via dotenv on + * startup. When `/sync-gbrain` runs inside a Next.js / Prisma / Rails + * project with its own `DATABASE_URL`, gbrain reads that one and not + * its own `${GBRAIN_HOME:-$HOME/.gbrain}/config.json`. Auth fails; + * code + memory stages crash; only brain-sync's git push survives. + * + * 2. **Bun-aware env passing.** Mutating `process.env.DATABASE_URL` does + * NOT propagate to children of `child_process.spawnSync`/`spawn` in + * Bun — the child gets the original startup env. So we cannot just + * set process.env; we must thread an explicit `env:` dict to every + * spawn. This is the central bug the helper exists to prevent + * regressing on. + * + * 3. **`GBRAIN_HOME` honored consistently.** Other gstack helpers + * (`detectEngineTier`) already honor `GBRAIN_HOME`. `buildGbrainEnv` + * reads from `${GBRAIN_HOME:-$HOME/.gbrain}/config.json` so all + * gstack-side gbrain calls agree on which config file matters. + * + * **Escape hatch:** `GSTACK_RESPECT_ENV_DATABASE_URL=1` returns the + * caller's env unchanged. Use only when the brain intentionally lives in + * the project's local DB (rare). + */ + +import { existsSync, readFileSync } from "fs"; +import { join } from "path"; +import { homedir } from "os"; +import { spawnSync, spawn, execFileSync, type SpawnSyncReturns, type ChildProcess, type SpawnOptions } from "child_process"; + +interface GbrainConfig { + database_url?: string; +} + +export interface BuildGbrainEnvOptions { + /** + * Caller env to extend. Defaults to `process.env`. Tests inject a + * synthetic env so the helper can be exercised without polluting the + * real process env. + */ + baseEnv?: NodeJS.ProcessEnv; + /** + * When true, announce on stderr that we overrode the caller's + * DATABASE_URL. Suppressed for the `--quiet` sync flow. + */ + announce?: boolean; +} + +/** + * Build an env dict with DATABASE_URL seeded from + * `${GBRAIN_HOME:-$HOME/.gbrain}/config.json`. Returns the base env + * unchanged when: + * - `GSTACK_RESPECT_ENV_DATABASE_URL=1` (intentional opt-out), + * - the config file is missing or unparseable, + * - the config has no `database_url`, + * - the caller already set DATABASE_URL to the same value. + * + * Always returns a fresh object — mutating the returned env never + * affects the caller's env. Tests assert on effective values, not + * object identity. + */ +export function buildGbrainEnv(opts: BuildGbrainEnvOptions = {}): NodeJS.ProcessEnv { + const baseEnv = opts.baseEnv || process.env; + const out: NodeJS.ProcessEnv = { ...baseEnv }; + if (baseEnv.GSTACK_RESPECT_ENV_DATABASE_URL === "1") return out; + + const homeBase = baseEnv.HOME || homedir(); + const gbrainHome = baseEnv.GBRAIN_HOME || join(homeBase, ".gbrain"); + const configPath = join(gbrainHome, "config.json"); + if (!existsSync(configPath)) return out; + + let cfg: GbrainConfig = {}; + try { + cfg = JSON.parse(readFileSync(configPath, "utf-8")) as GbrainConfig; + } catch { + return out; + } + if (!cfg.database_url) return out; + if (baseEnv.DATABASE_URL === cfg.database_url) return out; + + const hadCaller = baseEnv.DATABASE_URL !== undefined; + out.DATABASE_URL = cfg.database_url; + if (opts.announce) { + const note = hadCaller ? " (overrode value from caller env / .env.local)" : ""; + process.stderr.write(`[gbrain-exec] seeded DATABASE_URL from ${configPath}${note}\n`); + } + return out; +} + +export interface SpawnGbrainOptions { + /** Timeout in milliseconds. Defaults to 30s. */ + timeout?: number; + /** Working directory for the child process. */ + cwd?: string; + /** Stdio configuration. Defaults to capturing both stdout and stderr. */ + stdio?: "inherit" | "pipe" | "ignore" | Array<"inherit" | "pipe" | "ignore">; + /** + * Base env to extend before seeding DATABASE_URL. Defaults to + * `process.env`. Tests inject a synthetic env so the spawn picks up a + * gbrain shim on PATH and a fake `~/.gbrain/config.json`. + */ + baseEnv?: NodeJS.ProcessEnv; + /** Whether to announce DATABASE_URL seeding on stderr. */ + announce?: boolean; +} + +/** + * Spawn `gbrain ` with the seeded env. Returns the raw + * `SpawnSyncReturns` so callers can inspect `status`, `stdout`, + * `stderr` exactly as they would with `spawnSync` directly. + */ +export function spawnGbrain(args: string[], opts: SpawnGbrainOptions = {}): SpawnSyncReturns { + return spawnSync("gbrain", args, { + encoding: "utf-8", + timeout: opts.timeout ?? 30_000, + cwd: opts.cwd, + stdio: opts.stdio || ["ignore", "pipe", "pipe"], + env: buildGbrainEnv({ baseEnv: opts.baseEnv, announce: opts.announce }), + }); +} + +/** + * Run `gbrain ` and parse stdout as JSON. Returns `null` on + * non-zero exit, parse failure, or timeout. Useful for `gbrain sources + * list --json` and similar. + */ +export function execGbrainJson(args: string[], opts: SpawnGbrainOptions = {}): T | null { + const r = spawnGbrain(args, opts); + if (r.status !== 0) return null; + try { + return JSON.parse(r.stdout || "null") as T; + } catch { + return null; + } +} + +/** + * Async streaming variant for callers that need to attach stdout/stderr + * listeners (e.g., `gbrain import` in `gstack-memory-ingest.ts`). Always + * injects the seeded env. Returns the raw `ChildProcess` so the caller + * can wire up its own promise around exit/timeout/signal handling. + */ +export function spawnGbrainAsync( + args: string[], + opts: { stdio?: SpawnOptions["stdio"]; cwd?: string; baseEnv?: NodeJS.ProcessEnv } = {}, +): ChildProcess { + return spawn("gbrain", args, { + stdio: opts.stdio || ["ignore", "pipe", "pipe"], + cwd: opts.cwd, + env: buildGbrainEnv({ baseEnv: opts.baseEnv, announce: false }), + }); +} + +/** + * Run `gbrain ` via execFileSync. Throws on non-zero exit. Useful + * for callers that want to surface gbrain's stderr as the error message. + */ +export function execGbrainText(args: string[], opts: SpawnGbrainOptions = {}): string { + return execFileSync("gbrain", args, { + encoding: "utf-8", + timeout: opts.timeout ?? 30_000, + cwd: opts.cwd, + stdio: opts.stdio || ["ignore", "pipe", "pipe"], + env: buildGbrainEnv({ baseEnv: opts.baseEnv, announce: opts.announce }), + }); +} diff --git a/lib/gbrain-local-status.ts b/lib/gbrain-local-status.ts index e646abd61..f546a93bc 100644 --- a/lib/gbrain-local-status.ts +++ b/lib/gbrain-local-status.ts @@ -101,13 +101,13 @@ export function resolveGbrainBin(env?: NodeJS.ProcessEnv): string | null { if (_gbrainBinCache.has(key)) return _gbrainBinCache.get(key)!; let result: string | null = null; try { - const out = execFileSync("sh", ["-c", "command -v gbrain"], { + execFileSync("gbrain", ["--version"], { encoding: "utf-8", timeout: 2_000, - stdio: ["ignore", "pipe", "ignore"], + stdio: ["ignore", "ignore", "ignore"], env: e, }); - result = out.trim() || null; + result = "gbrain"; } catch { result = null; } @@ -266,4 +266,3 @@ export function localEngineStatus(opts: ClassifyOptions = {}): LocalEngineStatus writeCache(fresh, key); return fresh; } - diff --git a/lib/gbrain-sources.ts b/lib/gbrain-sources.ts index 6cf219554..c8ffbad5a 100644 --- a/lib/gbrain-sources.ts +++ b/lib/gbrain-sources.ts @@ -53,7 +53,7 @@ export function probeSource(id: string, env?: NodeJS.ProcessEnv): SourceState { try { stdout = execFileSync("gbrain", ["sources", "list", "--json"], { encoding: "utf-8", - timeout: 10_000, + timeout: 30_000, stdio: ["ignore", "pipe", "pipe"], env, }); @@ -164,7 +164,7 @@ export function sourcePageCount(id: string, env?: NodeJS.ProcessEnv): number | n try { stdout = execFileSync("gbrain", ["sources", "list", "--json"], { encoding: "utf-8", - timeout: 10_000, + timeout: 30_000, stdio: ["ignore", "pipe", "pipe"], env, }); diff --git a/package.json b/package.json index 592493d5e..3851a78bd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gstack", - "version": "1.39.2.0", + "version": "1.40.0.0", "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.", "license": "MIT", "type": "module", diff --git a/test/artifacts-init-migration.test.ts b/test/artifacts-init-migration.test.ts index e2f27f444..c09affffd 100644 --- a/test/artifacts-init-migration.test.ts +++ b/test/artifacts-init-migration.test.ts @@ -201,3 +201,133 @@ describe('v1.38.1.0 migration', () => { } }); }); + +// ────────────────────────────────────────────────────────────────────────── +// v1.40.0.0 — `projects/*/*-eng-review-test-plan-*.md` follow-on for #1452. +// v1.38.1.0 shipped the design + test-plan patterns but missed +// /plan-eng-review's filename. Codex review #5 flagged that +// v1.38.1.0's done-marker prevents users who already upgraded from picking +// up #1465's allowlist edit, so v1.40.0.0 needs its own migration. +// ────────────────────────────────────────────────────────────────────────── +const MIGRATION_V1_40 = join(REPO_ROOT, 'gstack-upgrade', 'migrations', 'v1.40.0.0.sh'); + +function runMigrationV140(fakeHome: string): { code: number; stdout: string; stderr: string } { + const proc = Bun.spawnSync({ + cmd: ['bash', MIGRATION_V1_40], + env: { ...process.env, HOME: fakeHome }, + stdout: 'pipe', + stderr: 'pipe', + }); + return { + code: proc.exitCode ?? -1, + stdout: new TextDecoder().decode(proc.stdout), + stderr: new TextDecoder().decode(proc.stderr), + }; +} + +describe('v1.40.0.0 migration', () => { + test('adds eng-review-test-plan pattern to allowlist on top of an installed v1.38.1.0 state', () => { + const home = setupFakeHome(); + try { + // Simulate post-v1.38.1.0 state: design + test-plan patterns present, + // done-marker set so the v1.38.1.0 migration wouldn't re-run. + mkdirSync(join(home, '.gstack', '.migrations'), { recursive: true }); + writeFileSync(join(home, '.gstack', '.migrations', 'v1.38.1.0.done'), ''); + writeFileSync(join(home, '.gstack', '.brain-allowlist'), [ + 'projects/*/learnings.jsonl', + 'projects/*/designs/*.md', + 'projects/*/*-design-*.md', + 'projects/*/*-test-plan-*.md', + '# ---- USER ADDITIONS BELOW ---- (survives re-init; above is managed)', + 'projects/*/my-custom.txt', + ].join('\n') + '\n'); + + const r = runMigrationV140(home); + expect(r.code).toBe(0); + + const content = readFileSync(join(home, '.gstack', '.brain-allowlist'), 'utf-8'); + expect(content).toContain('projects/*/*-eng-review-test-plan-*.md'); + // New pattern above the user marker. + const engRevIdx = content.indexOf('projects/*/*-eng-review-test-plan-*.md'); + const markerIdx = content.indexOf('# ---- USER ADDITIONS BELOW'); + expect(engRevIdx).toBeLessThan(markerIdx); + // User customizations below the marker preserved. + expect(content).toContain('projects/*/my-custom.txt'); + // v1.40.0.0 done-marker created. + expect(existsSync(join(home, '.gstack', '.migrations', 'v1.40.0.0.done'))).toBe(true); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); + + test('adds eng-review-test-plan entry to privacy-map.json via jq', () => { + const home = setupFakeHome(); + try { + writeFileSync(join(home, '.gstack', '.brain-privacy-map.json'), JSON.stringify([ + { pattern: 'projects/*/*-design-*.md', class: 'artifact' }, + { pattern: 'projects/*/*-test-plan-*.md', class: 'artifact' }, + ], null, 2)); + + const r = runMigrationV140(home); + expect(r.code).toBe(0); + + const parsed = JSON.parse(readFileSync(join(home, '.gstack', '.brain-privacy-map.json'), 'utf-8')); + const patterns = parsed.map((e: any) => e.pattern); + expect(patterns).toContain('projects/*/*-eng-review-test-plan-*.md'); + expect(parsed.find((e: any) => e.pattern === 'projects/*/*-eng-review-test-plan-*.md').class).toBe('artifact'); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); + + test('adds union-merge rule to gitattributes', () => { + const home = setupFakeHome(); + try { + writeFileSync(join(home, '.gstack', '.gitattributes'), [ + 'projects/*/*-design-*.md merge=union', + 'projects/*/*-test-plan-*.md merge=union', + ].join('\n') + '\n'); + + const r = runMigrationV140(home); + expect(r.code).toBe(0); + + const content = readFileSync(join(home, '.gstack', '.gitattributes'), 'utf-8'); + expect(content).toContain('projects/*/*-eng-review-test-plan-*.md merge=union'); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); + + test('is idempotent: re-running is a no-op', () => { + const home = setupFakeHome(); + try { + writeFileSync(join(home, '.gstack', '.brain-allowlist'), + 'projects/*/*-eng-review-test-plan-*.md\n# ---- USER ADDITIONS BELOW ---- (survives re-init; above is managed)\n'); + + const r1 = runMigrationV140(home); + expect(r1.code).toBe(0); + + const r2 = runMigrationV140(home); + expect(r2.code).toBe(0); + + const content = readFileSync(join(home, '.gstack', '.brain-allowlist'), 'utf-8'); + const occurrences = content.match(/projects\/\*\/\*-eng-review-test-plan-\*\.md/g) || []; + expect(occurrences.length).toBe(1); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); + + test('writes done-marker even when files are missing', () => { + const home = setupFakeHome(); + try { + // No allowlist / privacy-map / gitattributes — fresh-init users with + // no federated artifacts yet. Migration should still mark itself done. + const r = runMigrationV140(home); + expect(r.code).toBe(0); + expect(existsSync(join(home, '.gstack', '.migrations', 'v1.40.0.0.done'))).toBe(true); + } finally { + rmSync(home, { recursive: true, force: true }); + } + }); +}); diff --git a/test/build-gbrain-env.test.ts b/test/build-gbrain-env.test.ts new file mode 100644 index 000000000..4066126d0 --- /dev/null +++ b/test/build-gbrain-env.test.ts @@ -0,0 +1,120 @@ +/** + * Unit tests for `buildGbrainEnv` in lib/gbrain-exec.ts. + * + * The helper is the single source of truth for "what DATABASE_URL does + * gbrain see when spawned from gstack." The bug it prevents: gbrain's + * dotenv autoload pulls a host project's `.env.local` `DATABASE_URL` + * instead of gbrain's own `~/.gbrain/config.json`. Every helper test + * asserts on the **effective value** of the returned env, never object + * identity — Codex review #11 flagged that returning the same mutable + * object can leak later mutation. + */ + +import { describe, it, expect, beforeEach, afterEach } from "bun:test"; +import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; + +import { buildGbrainEnv } from "../lib/gbrain-exec"; + +describe("buildGbrainEnv", () => { + let home: string; + let gbrainHome: string; + + beforeEach(() => { + home = mkdtempSync(join(tmpdir(), "gstack-build-env-")); + gbrainHome = join(home, ".gbrain"); + mkdirSync(gbrainHome, { recursive: true }); + }); + + afterEach(() => { + rmSync(home, { recursive: true, force: true }); + }); + + it("seeds DATABASE_URL from ~/.gbrain/config.json when caller env has no DATABASE_URL", () => { + writeFileSync(join(gbrainHome, "config.json"), JSON.stringify({ database_url: "postgresql://gbrain/db" })); + const baseEnv = { HOME: home }; + const result = buildGbrainEnv({ baseEnv }); + expect(result.DATABASE_URL).toBe("postgresql://gbrain/db"); + }); + + it("overrides caller's DATABASE_URL when config differs", () => { + writeFileSync(join(gbrainHome, "config.json"), JSON.stringify({ database_url: "postgresql://gbrain/db" })); + const baseEnv = { HOME: home, DATABASE_URL: "postgresql://app-local/wrong" }; + const result = buildGbrainEnv({ baseEnv }); + expect(result.DATABASE_URL).toBe("postgresql://gbrain/db"); + }); + + it("leaves DATABASE_URL untouched when GSTACK_RESPECT_ENV_DATABASE_URL=1", () => { + writeFileSync(join(gbrainHome, "config.json"), JSON.stringify({ database_url: "postgresql://gbrain/db" })); + const baseEnv = { + HOME: home, + DATABASE_URL: "postgresql://intentional/app-db", + GSTACK_RESPECT_ENV_DATABASE_URL: "1", + }; + const result = buildGbrainEnv({ baseEnv }); + expect(result.DATABASE_URL).toBe("postgresql://intentional/app-db"); + }); + + it("returns caller env unchanged when config file is missing", () => { + // No config.json written. + const baseEnv = { HOME: home, DATABASE_URL: "postgresql://app/db" }; + const result = buildGbrainEnv({ baseEnv }); + expect(result.DATABASE_URL).toBe("postgresql://app/db"); + }); + + it("returns caller env unchanged when config file is unparseable", () => { + writeFileSync(join(gbrainHome, "config.json"), "{not json"); + const baseEnv = { HOME: home, DATABASE_URL: "postgresql://app/db" }; + const result = buildGbrainEnv({ baseEnv }); + expect(result.DATABASE_URL).toBe("postgresql://app/db"); + }); + + it("returns caller env unchanged when config has no database_url field", () => { + writeFileSync(join(gbrainHome, "config.json"), JSON.stringify({ engine: "pglite" })); + const baseEnv = { HOME: home, DATABASE_URL: "postgresql://app/db" }; + const result = buildGbrainEnv({ baseEnv }); + expect(result.DATABASE_URL).toBe("postgresql://app/db"); + }); + + it("honors GBRAIN_HOME when set (config aligned with detectEngineTier)", () => { + // Move the config to an alternate dir; set GBRAIN_HOME to point at it. + const altGbrainHome = join(home, "alt-gbrain"); + mkdirSync(altGbrainHome, { recursive: true }); + writeFileSync(join(altGbrainHome, "config.json"), JSON.stringify({ database_url: "postgresql://alt/db" })); + // No file at the default ~/.gbrain location. + const baseEnv = { HOME: home, GBRAIN_HOME: altGbrainHome }; + const result = buildGbrainEnv({ baseEnv }); + expect(result.DATABASE_URL).toBe("postgresql://alt/db"); + }); + + it("returns a fresh env object — never the caller's env by identity", () => { + // Codex review #11: object-identity equality lets later mutation of the + // returned env leak back into the caller's view. The helper MUST clone. + writeFileSync(join(gbrainHome, "config.json"), JSON.stringify({ database_url: "postgresql://gbrain/db" })); + const baseEnv: NodeJS.ProcessEnv = { HOME: home, FOO: "bar" }; + const result = buildGbrainEnv({ baseEnv }); + expect(result).not.toBe(baseEnv); + // Mutating result must not affect baseEnv. + result.FOO = "changed"; + expect(baseEnv.FOO).toBe("bar"); + }); + + it("preserves unrelated env vars from the base env", () => { + writeFileSync(join(gbrainHome, "config.json"), JSON.stringify({ database_url: "postgresql://gbrain/db" })); + const baseEnv = { HOME: home, PATH: "/usr/bin", FOO: "bar" }; + const result = buildGbrainEnv({ baseEnv }); + expect(result.PATH).toBe("/usr/bin"); + expect(result.FOO).toBe("bar"); + expect(result.HOME).toBe(home); + }); + + it("does not modify DATABASE_URL when caller's value already matches config", () => { + // Subtle: helper should be a no-op when caller already has the right value. + // Lets us skip the stderr announce on idempotent re-invocation. + writeFileSync(join(gbrainHome, "config.json"), JSON.stringify({ database_url: "postgresql://gbrain/db" })); + const baseEnv = { HOME: home, DATABASE_URL: "postgresql://gbrain/db" }; + const result = buildGbrainEnv({ baseEnv }); + expect(result.DATABASE_URL).toBe("postgresql://gbrain/db"); + }); +}); diff --git a/test/gbrain-exec-invariant.test.ts b/test/gbrain-exec-invariant.test.ts new file mode 100644 index 000000000..a0d962b4a --- /dev/null +++ b/test/gbrain-exec-invariant.test.ts @@ -0,0 +1,80 @@ +/** + * Static-source invariant: every gbrain CLI invocation in the hot-path + * sync code MUST route through `lib/gbrain-exec.ts` (or accept env via + * the existing `lib/gbrain-sources.ts` opts surface). A future contributor + * who adds a `spawnSync("gbrain", ...)` call directly in + * `bin/gstack-gbrain-sync.ts` or `bin/gstack-memory-ingest.ts` silently + * regresses the DATABASE_URL fix from #1508 + codex review #7 — gbrain's + * dotenv autoload pulls a host project's `.env.local` value instead of + * gbrain's own config. + * + * This test reads each source file directly and asserts zero direct + * `spawnSync("gbrain"`, `spawn("gbrain"`, `execFileSync("gbrain"`, or + * `execSync(...gbrain` matches. Bun runs TS directly so there is no + * compiled artifact to grep — the .ts source is the truth. + * + * The check is intentionally narrow: only the two files where the bug + * actually hurts users are guarded. Other gbrain spawn sites + * (`lib/gbrain-sources.ts`, `lib/gbrain-local-status.ts`, + * `lib/gstack-memory-helpers.ts`, `bin/gstack-brain-context-load.ts`) + * either already accept env from callers or run probes that don't need + * DATABASE_URL. Expanding the invariant to those files is a follow-up. + */ + +import { describe, it, expect } from "bun:test"; +import { readFileSync } from "fs"; +import { join } from "path"; + +const ROOT = join(import.meta.dir, ".."); + +const GUARDED_FILES = [ + "bin/gstack-gbrain-sync.ts", + "bin/gstack-memory-ingest.ts", +]; + +// Patterns that would bypass lib/gbrain-exec.ts. Match the literal `"gbrain"` +// as the first argument since these helpers are the failure mode. +const BANNED_PATTERNS: Array<{ name: string; regex: RegExp }> = [ + { name: 'spawnSync("gbrain", ...)', regex: /spawnSync\s*\(\s*["']gbrain["']/g }, + { name: 'spawn("gbrain", ...)', regex: /\bspawn\s*\(\s*["']gbrain["']/g }, + { name: 'execFileSync("gbrain", ...)', regex: /execFileSync\s*\(\s*["']gbrain["']/g }, + { name: 'execSync("...gbrain...")', regex: /execSync\s*\(\s*["'`][^"'`]*\bgbrain\b/g }, +]; + +describe("gbrain-exec invariant", () => { + for (const relpath of GUARDED_FILES) { + it(`${relpath} routes every gbrain spawn through lib/gbrain-exec.ts`, () => { + const source = readFileSync(join(ROOT, relpath), "utf-8"); + // Strip block comments and line comments before scanning — a + // documentation reference like `// spawnSync("gbrain", ...)` in a + // comment shouldn't trip the invariant. The strip is approximate + // (sufficient for the patterns we care about); production code + // should match cleanly. + const stripped = source + .replace(/\/\*[\s\S]*?\*\//g, "") + .replace(/\/\/.*$/gm, ""); + + for (const { name, regex } of BANNED_PATTERNS) { + const matches = stripped.match(regex) || []; + if (matches.length > 0) { + // Find the line numbers to make the failure actionable. + const lines = stripped.split("\n"); + const hits: string[] = []; + for (let i = 0; i < lines.length; i++) { + if (new RegExp(regex.source).test(lines[i])) { + hits.push(` ${relpath}:${i + 1}: ${lines[i].trim()}`); + } + } + throw new Error( + `Found ${matches.length} direct gbrain invocation(s) in ${relpath} matching \`${name}\`:\n${hits.join("\n")}\n\n` + + `Route every gbrain spawn through \`spawnGbrain\`/\`execGbrainJson\`/\`execGbrainText\` ` + + `in lib/gbrain-exec.ts so DATABASE_URL is seeded from gbrain's config.`, + ); + } + } + + // Positive assertion: the file should import from lib/gbrain-exec. + expect(source).toMatch(/from\s+["']\.\.\/lib\/gbrain-exec["']/); + }); + } +}); diff --git a/test/gbrain-local-status.test.ts b/test/gbrain-local-status.test.ts index 272a99289..90744bb2c 100644 --- a/test/gbrain-local-status.test.ts +++ b/test/gbrain-local-status.test.ts @@ -21,6 +21,7 @@ import { describe, it, expect, beforeEach, afterEach } from "bun:test"; import { mkdtempSync, writeFileSync, + readFileSync, mkdirSync, rmSync, chmodSync, @@ -160,6 +161,16 @@ describe("lib/gbrain-local-status — five status cases", () => { restoreEnv = null; }); + it("probes the gbrain executable directly instead of shelling through command -v", () => { + const source = readFileSync( + join(import.meta.dir, "..", "lib", "gbrain-local-status.ts"), + "utf-8", + ); + + expect(source).not.toContain('command -v gbrain'); + expect(source).toContain('execFileSync("gbrain", ["--version"]'); + }); + it("returns 'no-cli' when gbrain is not on PATH", () => { env = makeEnv({ withGbrain: false }); restoreEnv = applyEnv(env); diff --git a/test/gbrain-source-gitignore.test.ts b/test/gbrain-source-gitignore.test.ts new file mode 100644 index 000000000..1fd1db05e --- /dev/null +++ b/test/gbrain-source-gitignore.test.ts @@ -0,0 +1,96 @@ +/** + * Unit tests for the `.gbrain-source` gitignore append done by + * `runCodeImport` after a successful `gbrain sources attach`. + * + * Covers #1384: v1.29.0.0 changelog promised the per-worktree pin would be + * ignored in the consuming repo, but the change actually only added + * `.gbrain-source` to gstack's own `.gitignore`. Without the consumer-side + * entry, Conductor sibling worktrees commit the pin and clobber each other. + */ + +import { describe, it, expect, beforeEach, afterEach } from "bun:test"; +import { mkdtempSync, writeFileSync, readFileSync, existsSync, rmSync, chmodSync, statSync } from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; + +import { ensureGbrainSourceGitignored } from "../bin/gstack-gbrain-sync"; + +describe("ensureGbrainSourceGitignored", () => { + let root: string; + + beforeEach(() => { + root = mkdtempSync(join(tmpdir(), "gstack-gbrain-gitignore-")); + }); + + afterEach(() => { + rmSync(root, { recursive: true, force: true }); + }); + + it("creates .gitignore with the pin entry when none exists", () => { + const gitignorePath = join(root, ".gitignore"); + expect(existsSync(gitignorePath)).toBe(false); + + ensureGbrainSourceGitignored(root); + + expect(existsSync(gitignorePath)).toBe(true); + expect(readFileSync(gitignorePath, "utf-8")).toBe(".gbrain-source\n"); + }); + + it("appends the pin entry to an existing .gitignore without trailing newline", () => { + const gitignorePath = join(root, ".gitignore"); + writeFileSync(gitignorePath, "node_modules\n.env"); + + ensureGbrainSourceGitignored(root); + + expect(readFileSync(gitignorePath, "utf-8")).toBe( + "node_modules\n.env\n.gbrain-source\n", + ); + }); + + it("appends the pin entry to an existing .gitignore with trailing newline", () => { + const gitignorePath = join(root, ".gitignore"); + writeFileSync(gitignorePath, "node_modules\n.env\n"); + + ensureGbrainSourceGitignored(root); + + expect(readFileSync(gitignorePath, "utf-8")).toBe( + "node_modules\n.env\n.gbrain-source\n", + ); + }); + + it("is idempotent: does not duplicate the pin entry on a second call", () => { + const gitignorePath = join(root, ".gitignore"); + writeFileSync(gitignorePath, "node_modules\n.gbrain-source\n.env\n"); + + ensureGbrainSourceGitignored(root); + ensureGbrainSourceGitignored(root); + + const lines = readFileSync(gitignorePath, "utf-8").split("\n"); + const hits = lines.filter((line) => line.trim() === ".gbrain-source"); + expect(hits.length).toBe(1); + }); + + it("recognizes the entry even when it has surrounding whitespace", () => { + const gitignorePath = join(root, ".gitignore"); + writeFileSync(gitignorePath, "node_modules\n .gbrain-source \n"); + + ensureGbrainSourceGitignored(root); + + const lines = readFileSync(gitignorePath, "utf-8").split("\n"); + const hits = lines.filter((line) => line.trim() === ".gbrain-source"); + expect(hits.length).toBe(1); + }); + + it("does not throw when the .gitignore is read-only", () => { + const gitignorePath = join(root, ".gitignore"); + writeFileSync(gitignorePath, "node_modules\n"); + const originalMode = statSync(gitignorePath).mode; + chmodSync(gitignorePath, 0o444); + try { + // Must not throw — sync stage continues on write failure. + expect(() => ensureGbrainSourceGitignored(root)).not.toThrow(); + } finally { + chmodSync(gitignorePath, originalMode); + } + }); +}); diff --git a/test/gstack-gbrain-sync.test.ts b/test/gstack-gbrain-sync.test.ts index 528d6deed..0f1edec21 100644 --- a/test/gstack-gbrain-sync.test.ts +++ b/test/gstack-gbrain-sync.test.ts @@ -7,12 +7,19 @@ * preview + state file lifecycle + flag composition. */ -import { describe, it, expect } from "bun:test"; -import { mkdtempSync, writeFileSync, readFileSync, existsSync, rmSync, mkdirSync } from "fs"; +import { describe, it, expect, beforeEach, afterEach } from "bun:test"; +import { mkdtempSync, writeFileSync, readFileSync, existsSync, rmSync, mkdirSync, chmodSync } from "fs"; import { tmpdir } from "os"; import { join } from "path"; import { spawnSync } from "child_process"; +import { + derivePathOnlyHashLegacyId, + planHostnameFoldMigration, + sourceLocalPath, + _resetGbrainSupportsRenameCache, +} from "../bin/gstack-gbrain-sync"; + const SCRIPT = join(import.meta.dir, "..", "bin", "gstack-gbrain-sync.ts"); function makeTestHome(): string { @@ -48,6 +55,13 @@ describe("gstack-gbrain-sync CLI", () => { expect(r.stderr).toContain("Unknown argument: --bogus"); }); + it("uses the shared local gbrain status classifier instead of shelling through command -v", () => { + const source = readFileSync(SCRIPT, "utf-8"); + + expect(source).not.toContain('command -v gbrain'); + expect(source).toContain("localEngineStatus"); + }); + it("--dry-run with --code-only reports the code import preview only", () => { const home = makeTestHome(); const gstackHome = join(home, ".gstack"); @@ -215,6 +229,62 @@ describe("gstack-gbrain-sync CLI", () => { rmSync(home, { recursive: true, force: true }); }); + it("derives distinct source ids for the same absolute path on different hosts", () => { + // Issue #1414: two machines with identical home-dir layouts (chezmoi-managed + // dotfiles, ansible-provisioned VMs) collide on the same source id when + // federated against a shared gbrain DB, because the pre-fix `pathHash` was + // sha1(absolute path) only — host-agnostic. Folding hostname into the hash + // key keeps them distinct. `GSTACK_HOSTNAME` env var is the test-only knob; + // production uses `os.hostname()`. + const home = makeTestHome(); + const gstackHome = join(home, ".gstack"); + mkdirSync(gstackHome, { recursive: true }); + const repo = mkdtempSync(join(tmpdir(), "gstack-host-collide-")); + spawnSync("git", ["init", "--quiet", "-b", "main"], { cwd: repo }); + spawnSync("git", ["remote", "add", "origin", "https://github.com/example/multihost.git"], { cwd: repo }); + + // Dry-run still gates the code stage on `command -v gbrain`. Drop a no-op + // shim on PATH so the stage runs (we only assert the preview line, never + // invoke gbrain itself). + const bindir = mkdtempSync(join(tmpdir(), "gstack-host-collide-bin-")); + const shim = join(bindir, "gbrain"); + writeFileSync(shim, "#!/bin/sh\nexit 0\n"); + chmodSync(shim, 0o755); + const PATH = `${bindir}:${process.env.PATH || ""}`; + + const runAs = (host: string) => + spawnSync("bun", [SCRIPT, "--dry-run", "--code-only", "--quiet"], { + encoding: "utf-8", + timeout: 60000, + cwd: repo, + env: { ...process.env, HOME: home, GSTACK_HOME: gstackHome, GSTACK_HOSTNAME: host, PATH }, + }); + + const a = runAs("machine-a"); + const b = runAs("machine-b"); + expect(a.status).toBe(0); + expect(b.status).toBe(0); + const idA = (a.stdout || "").match(/gbrain sources add (\S+)/)?.[1]; + const idB = (b.stdout || "").match(/gbrain sources add (\S+)/)?.[1]; + expect(idA).toBeTruthy(); + expect(idB).toBeTruthy(); + expect(idA).not.toBe(idB); + // Both still gbrain-valid. + const VALID_ID = /^[a-z0-9](?:[a-z0-9-]{0,30}[a-z0-9])?$/; + expect(idA!).toMatch(VALID_ID); + expect(idB!).toMatch(VALID_ID); + + // Same host + same path stays stable across invocations. + const a2 = runAs("machine-a"); + expect(a2.status).toBe(0); + const idA2 = (a2.stdout || "").match(/gbrain sources add (\S+)/)?.[1]; + expect(idA2).toBe(idA); + + rmSync(repo, { recursive: true, force: true }); + rmSync(home, { recursive: true, force: true }); + rmSync(bindir, { recursive: true, force: true }); + }); + it("dry-run does NOT acquire the lock file (lock is for write paths only)", () => { const home = makeTestHome(); const gstackHome = join(home, ".gstack"); @@ -476,3 +546,295 @@ describe("gstack-gbrain-sync CLI", () => { rmSync(home, { recursive: true, force: true }); }); }); + +// ────────────────────────────────────────────────────────────────────────── +// Hostname-fold migration (v1.40.0.0) +// +// Tests for `derivePathOnlyHashLegacyId` and `planHostnameFoldMigration`, +// which together let an existing user's pre-#1468 path-only-hash source +// transition to the new hostname-folded id without orphaning pages or +// creating a data-loss window. See bin/gstack-gbrain-sync.ts and the +// gbrain-sync-hardening plan. +// ────────────────────────────────────────────────────────────────────────── + +/** + * Build a gbrain shim that responds to specific subcommands with canned + * output, then return PATH-prepend value. Lets us run helpers in-process + * (which spawn `gbrain` from PATH) without a real gbrain CLI. + */ +function makeShim(bindir: string, responses: Record): string { + const shim = join(bindir, "gbrain"); + const cases = Object.entries(responses).map(([key, r]) => { + const exit = r.exit ?? 0; + const stdout = (r.stdout || "").replace(/'/g, "'\\''"); + const stderr = (r.stderr || "").replace(/'/g, "'\\''"); + // Patterns with spaces MUST be double-quoted in sh case statements, + // otherwise the shell parses the second word as the start of the next + // pattern and errors out. + return ` "${key}") printf '%s' '${stdout}'; printf '%s' '${stderr}' >&2; exit ${exit} ;;`; + }).join("\n"); + // Match on the full argument string, joined with literal spaces. + const script = `#!/bin/sh\nARGS="$*"\ncase "$ARGS" in\n${cases}\n *) echo "shim: no match for [$ARGS]" >&2; exit 1 ;;\nesac\n`; + writeFileSync(shim, script); + chmodSync(shim, 0o755); + return shim; +} + +describe("derivePathOnlyHashLegacyId", () => { + it("returns the pre-#1468 form (path-only sha1, no hostname)", () => { + // Pure function — no subprocess. The same repoPath must yield the same + // legacy id regardless of $GSTACK_HOSTNAME, because the pre-#1468 hash + // didn't include hostname. + const repo = mkdtempSync(join(tmpdir(), "gstack-legacy-id-")); + spawnSync("git", ["init", "--quiet", "-b", "main"], { cwd: repo }); + spawnSync("git", ["remote", "add", "origin", "https://github.com/example/legacy-test.git"], { cwd: repo }); + + const cwd = process.cwd(); + try { + process.chdir(repo); + const a = derivePathOnlyHashLegacyId(repo); + process.env.GSTACK_HOSTNAME = "machine-a"; + const b = derivePathOnlyHashLegacyId(repo); + process.env.GSTACK_HOSTNAME = "machine-b"; + const c = derivePathOnlyHashLegacyId(repo); + expect(a).toBe(b); + expect(b).toBe(c); + expect(a.startsWith("gstack-code-")).toBe(true); + expect(a.length).toBeLessThanOrEqual(32); + } finally { + delete process.env.GSTACK_HOSTNAME; + process.chdir(cwd); + rmSync(repo, { recursive: true, force: true }); + } + }); + + it("produces a different id than the new hostname-folded form", () => { + // The whole point of the migration: the path-only-hash legacy id and the + // host-fold id must differ for any non-empty hostname, so the migration + // can detect + clean up the orphan. + const repo = mkdtempSync(join(tmpdir(), "gstack-legacy-id-distinct-")); + spawnSync("git", ["init", "--quiet", "-b", "main"], { cwd: repo }); + spawnSync("git", ["remote", "add", "origin", "https://github.com/example/distinct.git"], { cwd: repo }); + + const cwd = process.cwd(); + try { + process.chdir(repo); + process.env.GSTACK_HOSTNAME = "machine-x"; + const legacy = derivePathOnlyHashLegacyId(repo); + // Drive the new id through the CLI so we use the same code path users hit. + const home = makeTestHome(); + const gstackHome = join(home, ".gstack"); + mkdirSync(gstackHome, { recursive: true }); + const bindir = mkdtempSync(join(tmpdir(), "gstack-legacy-id-distinct-bin-")); + makeShim(bindir, { "--help": { stdout: "gbrain\n" } }); + const r = spawnSync("bun", [SCRIPT, "--dry-run", "--code-only", "--quiet"], { + encoding: "utf-8", + timeout: 60000, + cwd: repo, + env: { ...process.env, HOME: home, GSTACK_HOME: gstackHome, GSTACK_HOSTNAME: "machine-x", PATH: `${bindir}:${process.env.PATH || ""}` }, + }); + const newId = (r.stdout || "").match(/gbrain sources add (\S+)/)?.[1]; + expect(newId).toBeTruthy(); + expect(newId).not.toBe(legacy); + rmSync(home, { recursive: true, force: true }); + rmSync(bindir, { recursive: true, force: true }); + } finally { + delete process.env.GSTACK_HOSTNAME; + process.chdir(cwd); + rmSync(repo, { recursive: true, force: true }); + } + }); +}); + +/** + * Build an env dict that prepends `bindir` to PATH. Bun's spawnSync does NOT + * pick up runtime mutations of `process.env.PATH` — the env must be passed + * explicitly to each spawn for the override to take effect. + */ +function envWithBindir(bindir: string): NodeJS.ProcessEnv { + return { ...process.env, PATH: `${bindir}:${process.env.PATH || ""}` }; +} + +describe("planHostnameFoldMigration", () => { + let bindir: string; + + beforeEach(() => { + bindir = mkdtempSync(join(tmpdir(), "gstack-mig-plan-bin-")); + _resetGbrainSupportsRenameCache(); + }); + afterEach(() => { + rmSync(bindir, { recursive: true, force: true }); + _resetGbrainSupportsRenameCache(); + }); + + it("returns ids-match when legacy == new (degenerate case)", () => { + const result = planHostnameFoldMigration("/repo/path", "gstack-code-same-abc12345", "gstack-code-same-abc12345"); + expect(result).toEqual({ kind: "none", reason: "ids-match" }); + }); + + it("returns no-legacy-source when sources list does not include the legacy id", () => { + makeShim(bindir, { + "sources list --json": { stdout: "[]" }, + }); + const result = planHostnameFoldMigration("/repo/path", "new-id", "legacy-id", envWithBindir(bindir)); + expect(result).toEqual({ kind: "none", reason: "no-legacy-source" }); + }); + + it("returns skipped-path-drift when old source local_path differs from current repo root", () => { + makeShim(bindir, { + "sources list --json": { + stdout: JSON.stringify([{ id: "legacy-id", local_path: "/some/other/repo" }]), + }, + }); + const result = planHostnameFoldMigration("/repo/here", "new-id", "legacy-id", envWithBindir(bindir)); + expect(result.kind).toBe("skipped-path-drift"); + if (result.kind === "skipped-path-drift") { + expect(result.oldId).toBe("legacy-id"); + expect(result.oldPath).toBe("/some/other/repo"); + expect(result.currentPath).toBe("/repo/here"); + } + }); + + it("returns renamed when rename is supported and exits 0", () => { + makeShim(bindir, { + "sources list --json": { + stdout: JSON.stringify([{ id: "legacy-id", local_path: "/repo/here" }]), + }, + "sources rename --help": { + stdout: "Usage: gbrain sources rename \n", + }, + "sources rename legacy-id new-id": { exit: 0 }, + }); + const result = planHostnameFoldMigration("/repo/here", "new-id", "legacy-id", envWithBindir(bindir)); + expect(result).toEqual({ kind: "renamed", oldId: "legacy-id", newId: "new-id" }); + }); + + it("returns pending-cleanup when rename is unsupported (current gbrain 0.35.0.0)", () => { + makeShim(bindir, { + "sources list --json": { + stdout: JSON.stringify([{ id: "legacy-id", local_path: "/repo/here" }]), + }, + // No `sources rename --help` match → shim falls into the catch-all and exits 1. + }); + const result = planHostnameFoldMigration("/repo/here", "new-id", "legacy-id", envWithBindir(bindir)); + expect(result).toEqual({ kind: "pending-cleanup", oldId: "legacy-id" }); + }); + + it("returns pending-cleanup when rename is supported but the rename call itself fails", () => { + makeShim(bindir, { + "sources list --json": { + stdout: JSON.stringify([{ id: "legacy-id", local_path: "/repo/here" }]), + }, + "sources rename --help": { + stdout: "Usage: gbrain sources rename \n", + }, + "sources rename legacy-id new-id": { exit: 1, stderr: "rename failed: db locked" }, + }); + const result = planHostnameFoldMigration("/repo/here", "new-id", "legacy-id", envWithBindir(bindir)); + expect(result).toEqual({ kind: "pending-cleanup", oldId: "legacy-id" }); + }); +}); + +describe("constrainSourceId truncation (hyphen-boundary cut)", () => { + // PR #1481 (Drummerms): the old slug.slice(-tailBudget) cut mid-word when + // the boundary fell inside a token. For a long repo like + // `drummerms-av-sow-wiz-skill-270c0001` the truncated tail used to end in + // `kill-270c0001` (from `skill`). The new tokenized cut walks hyphen + // boundaries from the right and only keeps whole tokens. + // + // Exercised via the dry-run preview (`gbrain sources add gstack-code-…`), + // since constrainSourceId is module-private. + it("never produces mid-word truncation artifacts like `kill` (from `skill`)", () => { + const home = makeTestHome(); + const gstackHome = join(home, ".gstack"); + mkdirSync(gstackHome, { recursive: true }); + const repo = mkdtempSync(join(tmpdir(), "gstack-hyphen-cut-")); + spawnSync("git", ["init", "--quiet", "-b", "main"], { cwd: repo }); + // Remote chosen to be long enough that constrainSourceId truncates and + // the boundary lands inside the word `skill`. + spawnSync("git", ["remote", "add", "origin", "https://github.com/drummerms-av-sow-wiz/skill-270c0001.git"], { cwd: repo }); + + const r = spawnSync("bun", [SCRIPT, "--dry-run", "--code-only", "--quiet"], { + encoding: "utf-8", + timeout: 60000, + cwd: repo, + env: { ...process.env, HOME: home, GSTACK_HOME: gstackHome }, + }); + expect(r.status).toBe(0); + const id = (r.stdout || "").match(/gbrain sources add (\S+)/)?.[1]; + expect(id).toBeTruthy(); + // The id must not contain the mid-word fragment `kill` (left over from + // slicing inside `skill`). Tokens that survive truncation must be whole. + expect(id).not.toMatch(/(^|-)kill(-|$)/); + // Still gbrain-valid. + expect(id!.length).toBeLessThanOrEqual(32); + expect(id!).toMatch(/^[a-z0-9](?:[a-z0-9-]{0,30}[a-z0-9])?$/); + + rmSync(repo, { recursive: true, force: true }); + rmSync(home, { recursive: true, force: true }); + }); + + // Closes #1357: HTTPS remotes ending in `.git` used to pass periods through + // to the source id. canonicalizeRemote strips the `.git` suffix; the + // sanitizer also strips any residual non-alnum. Test asserts the source id + // is period-free for the exact case from the issue. + it("produces a period-free source id for HTTPS remotes ending in .git (#1357)", () => { + const home = makeTestHome(); + const gstackHome = join(home, ".gstack"); + mkdirSync(gstackHome, { recursive: true }); + const repo = mkdtempSync(join(tmpdir(), "gstack-https-period-")); + spawnSync("git", ["init", "--quiet", "-b", "main"], { cwd: repo }); + spawnSync("git", ["remote", "add", "origin", "https://github.com/foo/bar.git"], { cwd: repo }); + + const r = spawnSync("bun", [SCRIPT, "--dry-run", "--code-only", "--quiet"], { + encoding: "utf-8", + timeout: 60000, + cwd: repo, + env: { ...process.env, HOME: home, GSTACK_HOME: gstackHome }, + }); + expect(r.status).toBe(0); + const id = (r.stdout || "").match(/gbrain sources add (\S+)/)?.[1]; + expect(id).toBeTruthy(); + expect(id).not.toContain("."); + expect(id!).toMatch(/^[a-z0-9](?:[a-z0-9-]{0,30}[a-z0-9])?$/); + + rmSync(repo, { recursive: true, force: true }); + rmSync(home, { recursive: true, force: true }); + }); +}); + +describe("sourceLocalPath", () => { + let bindir: string; + beforeEach(() => { + bindir = mkdtempSync(join(tmpdir(), "gstack-source-lp-bin-")); + }); + afterEach(() => { + rmSync(bindir, { recursive: true, force: true }); + }); + + it("returns local_path when the source exists", () => { + makeShim(bindir, { + "sources list --json": { + stdout: JSON.stringify([ + { id: "other-source", local_path: "/x" }, + { id: "target-id", local_path: "/repo/match" }, + ]), + }, + }); + expect(sourceLocalPath("target-id", envWithBindir(bindir))).toBe("/repo/match"); + }); + + it("returns null when the source is missing", () => { + makeShim(bindir, { + "sources list --json": { stdout: "[]" }, + }); + expect(sourceLocalPath("missing-id", envWithBindir(bindir))).toBeNull(); + }); + + it("returns null when gbrain exits non-zero or returns malformed JSON", () => { + makeShim(bindir, { + "sources list --json": { exit: 2, stderr: "db unreachable" }, + }); + expect(sourceLocalPath("any-id", envWithBindir(bindir))).toBeNull(); + }); +}); diff --git a/test/gstack-memory-ingest.test.ts b/test/gstack-memory-ingest.test.ts index 638a2a6d5..fef9070c4 100644 --- a/test/gstack-memory-ingest.test.ts +++ b/test/gstack-memory-ingest.test.ts @@ -421,6 +421,16 @@ esac } describe("gstack-memory-ingest writer (gbrain v0.20+ batch `import` interface)", () => { + it("probes the gbrain executable directly instead of shelling through command -v", () => { + const source = readFileSync(SCRIPT, "utf-8"); + + expect(source).not.toContain('command -v gbrain'); + // v1.40.0.0: probe routes through lib/gbrain-exec.ts's execGbrainText helper + // (codex review #4 — centralized gbrain spawn surface). Pre-v1.40 the call + // was a direct `execFileSync("gbrain", ["--help"], ...)` inline. + expect(source).toContain('execGbrainText(["--help"]'); + }); + it("invokes `gbrain import --no-embed --json` exactly once with hierarchical staging", () => { const home = makeTestHome(); const gstackHome = join(home, ".gstack");