diff --git a/bin/gstack-gbrain-sync.ts b/bin/gstack-gbrain-sync.ts index e2ce7a4b..884e7565 100644 --- a/bin/gstack-gbrain-sync.ts +++ b/bin/gstack-gbrain-sync.ts @@ -4,29 +4,38 @@ * * Orchestrates three storage tiers per plan §"Storage tiering": * - * 1. Code (current repo) → gbrain import (Supabase or local PGLite) + * 1. Code (current repo) → `gbrain sources add` (idempotent via + * lib/gbrain-sources.ts) + `gbrain sync + * --strategy code` (incremental) or + * `gbrain reindex-code --yes` (--full). + * NEVER `gbrain import` (markdown only). * 2. Transcripts + curated memory → gstack-memory-ingest (typed put_page) * 3. Curated artifacts to git → gstack-brain-sync (existing pipeline) * * Modes: * --incremental (default) — mtime fast-path; runs all 3 stages with cache hits - * --full — first-run; full walk + import; honest budget per ED2 - * --dry-run — preview what would sync; no writes + * --full — first-run; full walk + reindex; honest budget per ED2 + * --dry-run — preview what would sync; no writes anywhere (incl. state file) * - * --watch (V1.5 P0 TODO): file-watcher daemon. Deferred per Codex F3 ("no daemon" - * invariant). For V1, continuous sync rides the preamble-boundary hook only. + * Concurrency safety per /plan-eng-review D1: + * - Lock file at ~/.gstack/.sync-gbrain.lock (PID + start ts). + * - Stale-lock takeover after 5 min (process death). + * - State file written via tmp+rename for atomicity. + * - Lock released in finally; SIGINT/SIGTERM trapped for cleanup. * - * Cross-repo TODO (V1.5): when gbrain CLI ships `put_file` + `restore-from-sync`, - * this helper picks them up via version probe (Codex F6 + D9) and routes - * code/transcripts to Supabase Storage instead of put_page. + * --watch (V1.5 P0 TODO): file-watcher daemon. NOTE: gbrain v0.25.1 already + * ships `gbrain sync --watch [--interval N]` and `gbrain sync --install-cron`; + * when revisited, /sync-gbrain --watch wires through to the gbrain CLI rather + * than building a gstack-side daemon. */ -import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync } from "fs"; +import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, renameSync } from "fs"; import { join, dirname } from "path"; -import { execSync, spawnSync } from "child_process"; +import { execSync, execFileSync, spawnSync } from "child_process"; import { homedir } from "os"; -import { detectEngineTier, withErrorContext } from "../lib/gstack-memory-helpers"; +import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers"; +import { sourcePageCount } from "../lib/gbrain-sources"; // ── Types ────────────────────────────────────────────────────────────────── @@ -41,12 +50,22 @@ interface CliArgs { codeOnly: boolean; } +interface CodeStageDetail { + source_id?: string; + source_path?: string; + page_count?: number | null; + last_imported?: string; + status?: "ok" | "skipped" | "failed"; +} + interface StageResult { name: string; ran: boolean; ok: boolean; duration_ms: number; summary: string; + /** Stage-specific structured detail. Code stage carries source_id + page_count. */ + detail?: CodeStageDetail; } // ── Constants ────────────────────────────────────────────────────────────── @@ -54,6 +73,8 @@ interface StageResult { const HOME = homedir(); const GSTACK_HOME = process.env.GSTACK_HOME || join(HOME, ".gstack"); const STATE_PATH = join(GSTACK_HOME, ".gbrain-sync-state.json"); +const LOCK_PATH = join(GSTACK_HOME, ".sync-gbrain.lock"); +const STALE_LOCK_MS = 5 * 60 * 1000; // ── CLI ──────────────────────────────────────────────────────────────────── @@ -62,18 +83,18 @@ function printUsage(): void { Modes: --incremental Default. mtime fast-path; ~50ms steady-state. - --full First-run; full walk + import. Honest ~25-35 min for big Macs (ED2). - --dry-run Preview what would sync; no writes. + --full First-run; full walk + reindex. Honest ~25-35 min for big Macs (ED2). + --dry-run Preview what would sync; no writes anywhere. Options: --quiet Suppress per-stage output. - --no-code Skip the gbrain import (current repo) stage. + --no-code Skip the cwd code-import stage. --no-memory Skip the gstack-memory-ingest stage (transcripts + artifacts). --no-brain-sync Skip the gstack-brain-sync git pipeline stage. - --code-only Only run the gbrain import stage (alias for --no-memory --no-brain-sync). + --code-only Only run the code-import stage (alias for --no-memory --no-brain-sync). --help This text. -Stages run in order: code import → memory ingest → curated git push. +Stages run in order: code → memory ingest → curated git push. Each stage failure is non-fatal; subsequent stages still run. `); } @@ -116,7 +137,7 @@ function parseArgs(): CliArgs { return { mode, quiet, noCode, noMemory, noBrainSync, codeOnly }; } -// ── Stage runners ────────────────────────────────────────────────────────── +// ── Helpers ──────────────────────────────────────────────────────────────── function repoRoot(): string | null { try { @@ -127,6 +148,32 @@ function repoRoot(): string | null { } } +function originUrl(): string | null { + try { + const out = execSync("git remote get-url origin", { encoding: "utf-8", timeout: 2000 }); + return out.trim(); + } catch { + return null; + } +} + +/** + * Derive a stable source id for the cwd code corpus. Pattern: `gstack-code-`, + * where comes from canonicalizeRemote() then `/` → `-` (e.g., + * `github.com/garrytan/gstack` → `gstack-code-github-com-garrytan-gstack`). + * + * Falls back to `gstack-code-` when there is no origin (local repo). + */ +function deriveCodeSourceId(repoPath: string): string { + const remote = canonicalizeRemote(originUrl()); + if (remote) { + return `gstack-code-${remote.replace(/[\/\s]+/g, "-").replace(/-+/g, "-")}`; + } + // Fallback for repos without a remote. + const base = repoPath.split("/").pop() || "repo"; + return `gstack-code-${base.toLowerCase().replace(/[^a-z0-9-]+/g, "-").replace(/-+/g, "-")}`; +} + function gbrainAvailable(): boolean { try { execSync("command -v gbrain", { stdio: "ignore" }); @@ -136,6 +183,55 @@ function gbrainAvailable(): boolean { } } +// ── Lock file (D1) ───────────────────────────────────────────────────────── + +interface LockInfo { + pid: number; + started_at: string; +} + +function acquireLock(): boolean { + mkdirSync(GSTACK_HOME, { recursive: true }); + if (existsSync(LOCK_PATH)) { + // Check if stale. + try { + const stat = statSync(LOCK_PATH); + const ageMs = Date.now() - stat.mtimeMs; + if (ageMs > STALE_LOCK_MS) { + // Stale; take over. + unlinkSync(LOCK_PATH); + } else { + return false; + } + } catch { + // Cannot stat; bail conservatively. + return false; + } + } + const info: LockInfo = { pid: process.pid, started_at: new Date().toISOString() }; + try { + writeFileSync(LOCK_PATH, JSON.stringify(info), { encoding: "utf-8", flag: "wx" }); + return true; + } catch { + return false; + } +} + +function releaseLock(): void { + try { + if (!existsSync(LOCK_PATH)) return; + const raw = readFileSync(LOCK_PATH, "utf-8"); + const info = JSON.parse(raw) as LockInfo; + if (info.pid === process.pid) { + unlinkSync(LOCK_PATH); + } + } catch { + // Best-effort cleanup. + } +} + +// ── Stage runners ────────────────────────────────────────────────────────── + function runCodeImport(args: CliArgs): StageResult { const t0 = Date.now(); const root = repoRoot(); @@ -145,42 +241,135 @@ function runCodeImport(args: CliArgs): StageResult { if (!gbrainAvailable()) { return { name: "code", ran: false, ok: false, duration_ms: 0, summary: "skipped (gbrain CLI not in PATH)" }; } + + const sourceId = deriveCodeSourceId(root); + if (args.mode === "dry-run") { - return { name: "code", ran: false, ok: true, duration_ms: 0, summary: `would: gbrain import ${root} --no-embed` }; - } - - const importArgs = ["import", root, "--no-embed"]; - if (args.mode === "incremental") { - // gbrain import is itself idempotent on re-import; --incremental flag if it supports - importArgs.push("--incremental"); - } - - try { - spawnSync("gbrain", importArgs, { - stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"], - timeout: 5 * 60 * 1000, - }); - // Trigger background embedding catch-up - spawnSync("gbrain", ["embed", "--stale"], { - stdio: ["ignore", "ignore", "ignore"], - timeout: 1000, // background spawn; don't wait - }); return { name: "code", - ran: true, + ran: false, ok: true, - duration_ms: Date.now() - t0, - summary: `imported ${root}`, + duration_ms: 0, + summary: `would: gbrain sources add ${sourceId} --path ${root} --federated; gbrain sync --strategy code --source ${sourceId}`, + detail: { source_id: sourceId, source_path: root, status: "skipped" }, }; + } + + // Step 1: Ensure source registered (idempotent). + let registered = false; + try { + // ensureSourceRegistered is async — but we're in a sync stage runner. Use a deasync pattern. + // Bun supports top-level await in main(), but stage runners are sync per orchestrator contract. + // Workaround: run as a child Bun script for the registration probe. + // Simpler: call gbrain CLI directly via the sync helpers in lib/gbrain-sources.ts probeSource. + // For symmetry, we duplicate the small ensureSourceRegistered logic synchronously here using + // execFileSync. (The lib helper is preferred for async callers; sync helpers below.) + registered = ensureSourceRegisteredSync(sourceId, root); } catch (err) { return { name: "code", ran: true, ok: false, duration_ms: Date.now() - t0, - summary: `gbrain import failed: ${(err as Error).message}`, + summary: `source registration failed: ${(err as Error).message}`, + detail: { source_id: sourceId, source_path: root, status: "failed" }, }; } + + // Step 2: Run sync or reindex. + const syncArgs = args.mode === "full" + ? ["reindex-code", "--source", sourceId, "--yes"] + : ["sync", "--strategy", "code", "--source", sourceId]; + + const syncResult = spawnSync("gbrain", syncArgs, { + stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"], + timeout: 35 * 60 * 1000, + }); + + if (syncResult.status !== 0) { + return { + name: "code", + ran: true, + ok: false, + duration_ms: Date.now() - t0, + summary: `gbrain ${syncArgs.join(" ")} exited ${syncResult.status}`, + detail: { source_id: sourceId, source_path: root, status: "failed" }, + }; + } + + // Step 3: Read page_count from gbrain sources list. + const pageCount = sourcePageCount(sourceId); + + return { + name: "code", + ran: true, + ok: true, + duration_ms: Date.now() - t0, + summary: `${registered ? "registered + " : ""}synced ${sourceId} (page_count=${pageCount ?? "unknown"})`, + detail: { + source_id: sourceId, + source_path: root, + page_count: pageCount, + last_imported: new Date().toISOString(), + status: "ok", + }, + }; +} + +/** + * Synchronous mirror of ensureSourceRegistered for use inside the synchronous + * stage runner. Returns true if registration changed (added or re-added). + */ +function ensureSourceRegisteredSync(id: string, path: string): boolean { + // Probe. + let probeOut: string; + try { + probeOut = execFileSync("gbrain", ["sources", "list", "--json"], { + encoding: "utf-8", + timeout: 10_000, + stdio: ["ignore", "pipe", "pipe"], + }); + } catch (err) { + const e = err as NodeJS.ErrnoException & { stderr?: Buffer }; + const stderr = e.stderr?.toString() || ""; + if (e.code === "ENOENT") throw new Error("gbrain CLI not on PATH"); + if (stderr.includes("Cannot connect to database") || stderr.includes("config.json")) { + throw new Error("gbrain not configured (run /setup-gbrain)"); + } + throw err; + } + + let parsed: { sources?: Array<{ id?: string; local_path?: string }> }; + try { + parsed = JSON.parse(probeOut); + } catch (err) { + throw new Error(`gbrain sources list returned non-JSON: ${(err as Error).message}`); + } + const sources = parsed.sources || []; + const match = sources.find((s) => s.id === id); + + if (match && match.local_path === path) { + return false; // no-op + } + + if (match && match.local_path !== path) { + const rm = spawnSync("gbrain", ["sources", "remove", id, "--yes"], { + encoding: "utf-8", + timeout: 30_000, + }); + if (rm.status !== 0) { + throw new Error(`gbrain sources remove ${id} failed: ${rm.stderr || rm.stdout || `exit ${rm.status}`}`); + } + } + + const add = spawnSync("gbrain", ["sources", "add", id, "--path", path, "--federated"], { + encoding: "utf-8", + timeout: 30_000, + }); + if (add.status !== 0) { + throw new Error(`gbrain sources add ${id} failed: ${add.stderr || add.stdout || `exit ${add.status}`}`); + } + return true; } function runMemoryIngest(args: CliArgs): StageResult { @@ -198,7 +387,7 @@ function runMemoryIngest(args: CliArgs): StageResult { const result = spawnSync("bun", ingestArgs, { encoding: "utf-8", - timeout: 35 * 60 * 1000, // honest 35-min ceiling per ED2 + timeout: 35 * 60 * 1000, }); const summary = (result.stderr || "").split("\n").filter((l) => l.includes("[memory-ingest]")).slice(-1)[0] || "ingest pass complete"; @@ -224,7 +413,6 @@ function runBrainSyncPush(args: CliArgs): StageResult { return { name: "brain-sync", ran: false, ok: true, duration_ms: 0, summary: "skipped (gstack-brain-sync not installed)" }; } - // Discover new artifacts then drain queue spawnSync(brainSyncPath, ["--discover-new"], { stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"], timeout: 60 * 1000, @@ -243,7 +431,7 @@ function runBrainSyncPush(args: CliArgs): StageResult { }; } -// ── State file (records last sync timestamp + stage outcomes) ────────────── +// ── State file ───────────────────────────────────────────────────────────── interface SyncState { schema_version: 1; @@ -266,10 +454,16 @@ function loadSyncState(): SyncState { return { schema_version: 1, last_writer: "gstack-gbrain-sync" }; } +/** + * Atomic state file write per /plan-eng-review D1: write tmp file then rename. + * rename(2) is atomic on POSIX filesystems. + */ function saveSyncState(state: SyncState): void { try { mkdirSync(dirname(STATE_PATH), { recursive: true }); - writeFileSync(STATE_PATH, JSON.stringify(state, null, 2), "utf-8"); + const tmp = `${STATE_PATH}.tmp.${process.pid}`; + writeFileSync(tmp, JSON.stringify(state, null, 2), "utf-8"); + renameSync(tmp, STATE_PATH); } catch { // non-fatal } @@ -293,40 +487,67 @@ async function main(): Promise { console.error(`[gbrain-sync] mode=${args.mode} engine=${engine.engine}`); } - const state = loadSyncState(); - const stages: StageResult[] = []; - - if (!args.noCode) { - stages.push(await withErrorContext("sync:code", () => runCodeImport(args), "gstack-gbrain-sync")); - } - if (!args.noMemory) { - stages.push(await withErrorContext("sync:memory", () => runMemoryIngest(args), "gstack-gbrain-sync")); - } - if (!args.noBrainSync) { - stages.push(await withErrorContext("sync:brain-sync", () => runBrainSyncPush(args), "gstack-gbrain-sync")); + // Acquire lock (skip on dry-run since dry-run never writes). + const needsLock = args.mode !== "dry-run"; + let haveLock = false; + if (needsLock) { + haveLock = acquireLock(); + if (!haveLock) { + console.error( + `[gbrain-sync] another /sync-gbrain is running (lock at ${LOCK_PATH}). ` + + `If that process died, the lock auto-clears after 5 min, or remove it manually.` + ); + process.exit(2); + } } - // Persist state (skip on dry-run) - if (args.mode !== "dry-run") { - state.last_sync = new Date().toISOString(); - if (args.mode === "full") state.last_full_sync = state.last_sync; - state.last_stages = stages; - saveSyncState(state); + const cleanup = () => { + if (haveLock) releaseLock(); + }; + process.on("SIGINT", () => { cleanup(); process.exit(130); }); + process.on("SIGTERM", () => { cleanup(); process.exit(143); }); + + let exitCode = 0; + try { + const state = loadSyncState(); + const stages: StageResult[] = []; + + if (!args.noCode) { + stages.push(await withErrorContext("sync:code", () => runCodeImport(args), "gstack-gbrain-sync")); + } + if (!args.noMemory) { + stages.push(await withErrorContext("sync:memory", () => runMemoryIngest(args), "gstack-gbrain-sync")); + } + if (!args.noBrainSync) { + stages.push(await withErrorContext("sync:brain-sync", () => runBrainSyncPush(args), "gstack-gbrain-sync")); + } + + if (args.mode !== "dry-run") { + state.last_sync = new Date().toISOString(); + if (args.mode === "full") state.last_full_sync = state.last_sync; + state.last_stages = stages; + saveSyncState(state); + } + + if (!args.quiet || args.mode === "dry-run") { + console.log(`\ngstack-gbrain-sync (${args.mode}):`); + for (const s of stages) console.log(formatStage(s)); + const okCount = stages.filter((s) => s.ok).length; + const errCount = stages.filter((s) => !s.ok && s.ran).length; + console.log(`\n ${okCount} ok, ${errCount} error, ${stages.length - okCount - errCount} skipped`); + } + + const anyError = stages.some((s) => s.ran && !s.ok); + exitCode = anyError ? 1 : 0; + } finally { + cleanup(); } - if (!args.quiet || args.mode === "dry-run") { - console.log(`\ngstack-gbrain-sync (${args.mode}):`); - for (const s of stages) console.log(formatStage(s)); - const okCount = stages.filter((s) => s.ok).length; - const errCount = stages.filter((s) => !s.ok && s.ran).length; - console.log(`\n ${okCount} ok, ${errCount} error, ${stages.length - okCount - errCount} skipped`); - } - - const anyError = stages.some((s) => s.ran && !s.ok); - process.exit(anyError ? 1 : 0); + process.exit(exitCode); } main().catch((err) => { console.error(`gstack-gbrain-sync fatal: ${err instanceof Error ? err.message : String(err)}`); + releaseLock(); process.exit(1); }); diff --git a/lib/gbrain-sources.ts b/lib/gbrain-sources.ts new file mode 100644 index 00000000..6cf21955 --- /dev/null +++ b/lib/gbrain-sources.ts @@ -0,0 +1,184 @@ +/** + * gbrain-sources — TypeScript helper for idempotent gbrain federated source registration. + * + * Mirrors the bash logic in bin/gstack-gbrain-source-wireup:204-310 but in a form + * importable by other TS callers (currently bin/gstack-gbrain-sync.ts; future + * callers welcome). gbrain has no `sources update` — drift recovery is + * `sources remove` followed by `sources add`. + * + * Per /plan-eng-review D3 (DRY extraction). + */ + +import { execFileSync, spawnSync } from "child_process"; +import { withErrorContext } from "./gstack-memory-helpers"; + +export interface SourceState { + /** "absent" — id not registered. "match" — id at expected path. "drift" — id at different path. */ + status: "absent" | "match" | "drift"; + /** Path gbrain has registered for this id. Only set when status !== "absent". */ + registered_path?: string; +} + +export interface EnsureResult { + /** True if registration state changed (added or re-registered). False on no-op. */ + changed: boolean; + /** Final source state after the call. */ + state: SourceState; +} + +export interface EnsureOptions { + /** Pass --federated to `gbrain sources add`. Default false. */ + federated?: boolean; + /** When status=drift, force a remove+add to update the registered path. Default true. */ + reregister_on_drift?: boolean; + /** + * Optional env override for the spawned `gbrain` calls. Production callers + * leave this unset (inherit process.env). Tests pass a custom env to point + * at a fake `gbrain` on PATH (Bun's execFileSync does not respect runtime + * mutations of process.env.PATH unless env is passed explicitly). + */ + env?: NodeJS.ProcessEnv; +} + +/** + * Probe the registration state of a source by id. + * + * Errors: + * - "gbrain CLI not on PATH" (exit 127) — caller should treat as absent + skip stage. + * - "gbrain DB connection failed" — caller should treat as absent + skip stage. + * - JSON parse error — propagate via withErrorContext caller. + */ +export function probeSource(id: string, env?: NodeJS.ProcessEnv): SourceState { + let stdout: string; + try { + stdout = execFileSync("gbrain", ["sources", "list", "--json"], { + encoding: "utf-8", + timeout: 10_000, + stdio: ["ignore", "pipe", "pipe"], + env, + }); + } catch (err) { + const e = err as NodeJS.ErrnoException & { stderr?: Buffer }; + const stderr = e.stderr?.toString() || ""; + if (e.code === "ENOENT" || stderr.includes("command not found")) { + throw new Error("gbrain CLI not on PATH"); + } + if (stderr.includes("Cannot connect to database") || stderr.includes("config.json")) { + throw new Error("gbrain not configured (run /setup-gbrain)"); + } + throw err; + } + + let parsed: { sources?: Array<{ id?: string; local_path?: string }> }; + try { + parsed = JSON.parse(stdout); + } catch (err) { + throw new Error(`gbrain sources list returned non-JSON output: ${(err as Error).message}`); + } + + const sources = parsed.sources || []; + const match = sources.find((s) => s.id === id); + if (!match) return { status: "absent" }; + return { + status: "match", + registered_path: match.local_path, + }; +} + +/** + * Ensure source is registered at . Idempotent. + * + * Behavior: + * - status=absent → `gbrain sources add --path [--federated]`, returns changed=true. + * - status=match + same path → no-op, returns changed=false. + * - status=match + different path → `sources remove` + `sources add`, returns changed=true. + * (Skip when reregister_on_drift=false; returns changed=false.) + * + * Caller is responsible for catching errors. The function uses withErrorContext for + * forensic logging to ~/.gstack/.gbrain-errors.jsonl. + */ +export async function ensureSourceRegistered( + id: string, + path: string, + options: EnsureOptions = {} +): Promise { + const federated = options.federated ?? false; + const reregister_on_drift = options.reregister_on_drift ?? true; + const env = options.env; + + return withErrorContext(`ensureSourceRegistered:${id}`, () => { + const probed = probeSource(id, env); + + // Disambiguate match-but-different-path + let state: SourceState = probed; + if (probed.status === "match" && probed.registered_path !== path) { + state = { status: "drift", registered_path: probed.registered_path }; + } + + if (state.status === "match") { + return { changed: false, state }; + } + + if (state.status === "drift" && !reregister_on_drift) { + return { changed: false, state }; + } + + // For drift, remove first. + if (state.status === "drift") { + const rm = spawnSync("gbrain", ["sources", "remove", id, "--yes"], { + encoding: "utf-8", + timeout: 30_000, + env, + }); + if (rm.status !== 0) { + throw new Error(`gbrain sources remove ${id} failed: ${rm.stderr || rm.stdout || `exit ${rm.status}`}`); + } + } + + // Add. + const addArgs = ["sources", "add", id, "--path", path]; + if (federated) addArgs.push("--federated"); + const add = spawnSync("gbrain", addArgs, { + encoding: "utf-8", + timeout: 30_000, + env, + }); + if (add.status !== 0) { + throw new Error(`gbrain sources add ${id} failed: ${add.stderr || add.stdout || `exit ${add.status}`}`); + } + + return { + changed: true, + state: { status: "match", registered_path: path }, + }; + }, "gbrain-sources"); +} + +/** + * Get page_count for a registered source. Returns null if source is absent or if + * page_count is missing/invalid in the JSON. Used by the verdict block + preamble + * variant selection. + */ +export function sourcePageCount(id: string, env?: NodeJS.ProcessEnv): number | null { + let stdout: string; + try { + stdout = execFileSync("gbrain", ["sources", "list", "--json"], { + encoding: "utf-8", + timeout: 10_000, + stdio: ["ignore", "pipe", "pipe"], + env, + }); + } catch { + return null; + } + + try { + const parsed = JSON.parse(stdout) as { sources?: Array<{ id?: string; page_count?: number }> }; + const match = (parsed.sources || []).find((s) => s.id === id); + if (!match) return null; + if (typeof match.page_count !== "number") return null; + return match.page_count; + } catch { + return null; + } +} diff --git a/test/gbrain-sources.test.ts b/test/gbrain-sources.test.ts new file mode 100644 index 00000000..9486eb06 --- /dev/null +++ b/test/gbrain-sources.test.ts @@ -0,0 +1,219 @@ +/** + * Unit tests for lib/gbrain-sources.ts (per /plan-eng-review D3 DRY extraction). + * + * The helper shells out to the real `gbrain` CLI. To test idempotency + * deterministically without a live brain, we put a fake `gbrain` binary on + * PATH that emits canned `sources list --json` output and records its + * invocations. The same trick `test/gstack-gbrain-source-wireup.test.ts` uses. + */ + +import { describe, it, expect } from "bun:test"; +import { mkdtempSync, writeFileSync, readFileSync, existsSync, mkdirSync, rmSync, chmodSync } from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; + +import { ensureSourceRegistered, probeSource, sourcePageCount } from "../lib/gbrain-sources"; + +interface FakeGbrainSetup { + bindir: string; + statePath: string; + logPath: string; + /** + * Env to pass to helper calls. Bun's execFileSync does NOT respect runtime + * mutations of process.env.PATH; we have to pass env explicitly. Production + * callers leave this unset (inherit process.env) — the helper signature has + * an optional `env` param specifically for tests. + */ + env: NodeJS.ProcessEnv; + cleanup: () => void; +} + +/** + * Build a temp dir with a fake `gbrain` shell script on PATH. The fake honors: + * gbrain sources list --json → cat $STATE_PATH + * gbrain sources add --path

[--federated] → append to state, log + * gbrain sources remove --yes → drop from state, log + * gbrain --version → echo "gbrain 0.25.1" + * Anything else exits 1. + */ +function makeFakeGbrain(initialState: { sources: Array<{ id: string; local_path: string; federated?: boolean; page_count?: number }> }): FakeGbrainSetup { + const tmp = mkdtempSync(join(tmpdir(), "gbrain-sources-test-")); + const bindir = join(tmp, "bin"); + mkdirSync(bindir, { recursive: true }); + const statePath = join(tmp, "state.json"); + const logPath = join(tmp, "calls.log"); + writeFileSync(statePath, JSON.stringify(initialState)); + writeFileSync(logPath, ""); + + const fake = `#!/bin/sh +echo "$@" >> "${logPath}" +case "$1 $2" in + "--version ") + echo "gbrain 0.25.1" + exit 0 + ;; + "sources list") + cat "${statePath}" + exit 0 + ;; + "sources add") + ID="$3" + shift 3 + PATH_VAL="" + FED="false" + while [ $# -gt 0 ]; do + case "$1" in + --path) PATH_VAL="$2"; shift 2 ;; + --federated) FED="true"; shift ;; + *) shift ;; + esac + done + NEW=$(jq --arg id "$ID" --arg path "$PATH_VAL" --argjson fed "$FED" \ + '.sources += [{id: $id, local_path: $path, federated: $fed, page_count: 0}]' "${statePath}") + echo "$NEW" > "${statePath}" + exit 0 + ;; + "sources remove") + ID="$3" + NEW=$(jq --arg id "$ID" '.sources = (.sources | map(select(.id != $id)))' "${statePath}") + echo "$NEW" > "${statePath}" + exit 0 + ;; +esac +echo "fake gbrain: unknown command: $@" >&2 +exit 1 +`; + const fakePath = join(bindir, "gbrain"); + writeFileSync(fakePath, fake); + chmodSync(fakePath, 0o755); + + // Build the env override we'll pass to helper calls. We do NOT mutate + // process.env globally because Bun's execFileSync caches PATH at process + // start; explicit env is the only reliable way to redirect spawn-time PATH. + const env: NodeJS.ProcessEnv = { ...process.env, PATH: `${bindir}:${process.env.PATH || ""}` }; + + return { + bindir, + statePath, + logPath, + env, + cleanup: () => { + rmSync(tmp, { recursive: true, force: true }); + }, + }; +} + +describe("probeSource", () => { + it("returns absent when source id is not in the list", () => { + const fake = makeFakeGbrain({ sources: [{ id: "other-source", local_path: "/x" }] }); + const state = probeSource("gstack-code-foo", fake.env); + expect(state.status).toBe("absent"); + expect(state.registered_path).toBeUndefined(); + fake.cleanup(); + }); + + it("returns match when source id is registered (path included)", () => { + const fake = makeFakeGbrain({ + sources: [{ id: "gstack-code-foo", local_path: "/Users/me/repo" }], + }); + const state = probeSource("gstack-code-foo", fake.env); + expect(state.status).toBe("match"); + expect(state.registered_path).toBe("/Users/me/repo"); + fake.cleanup(); + }); +}); + +describe("ensureSourceRegistered", () => { + it("adds source when absent, returns changed=true", async () => { + const fake = makeFakeGbrain({ sources: [] }); + const result = await ensureSourceRegistered("gstack-code-foo", "/Users/me/repo", { + federated: true, + env: fake.env, + }); + expect(result.changed).toBe(true); + expect(result.state.status).toBe("match"); + expect(result.state.registered_path).toBe("/Users/me/repo"); + + const log = readFileSync(fake.logPath, "utf-8"); + expect(log).toContain("sources add gstack-code-foo --path /Users/me/repo --federated"); + expect(log).not.toContain("sources remove"); + fake.cleanup(); + }); + + it("is a no-op when source is already at the correct path, returns changed=false", async () => { + const fake = makeFakeGbrain({ + sources: [{ id: "gstack-code-foo", local_path: "/Users/me/repo" }], + }); + const result = await ensureSourceRegistered("gstack-code-foo", "/Users/me/repo", { env: fake.env }); + expect(result.changed).toBe(false); + expect(result.state.status).toBe("match"); + + const log = readFileSync(fake.logPath, "utf-8"); + expect(log).toContain("sources list --json"); + expect(log).not.toContain("sources add"); + expect(log).not.toContain("sources remove"); + fake.cleanup(); + }); + + it("recreates source when path differs (gbrain has no `sources update`), returns changed=true", async () => { + const fake = makeFakeGbrain({ + sources: [{ id: "gstack-code-foo", local_path: "/old/path" }], + }); + const result = await ensureSourceRegistered("gstack-code-foo", "/new/path", { + federated: true, + env: fake.env, + }); + expect(result.changed).toBe(true); + expect(result.state.status).toBe("match"); + expect(result.state.registered_path).toBe("/new/path"); + + const log = readFileSync(fake.logPath, "utf-8"); + expect(log).toContain("sources remove gstack-code-foo --yes"); + expect(log).toContain("sources add gstack-code-foo --path /new/path --federated"); + fake.cleanup(); + }); + + it("when reregister_on_drift=false and source is at different path, returns changed=false", async () => { + const fake = makeFakeGbrain({ + sources: [{ id: "gstack-code-foo", local_path: "/old/path" }], + }); + const result = await ensureSourceRegistered("gstack-code-foo", "/new/path", { + reregister_on_drift: false, + env: fake.env, + }); + expect(result.changed).toBe(false); + expect(result.state.status).toBe("drift"); + expect(result.state.registered_path).toBe("/old/path"); + + const log = readFileSync(fake.logPath, "utf-8"); + expect(log).not.toContain("sources remove"); + expect(log).not.toContain("sources add"); + fake.cleanup(); + }); +}); + +describe("sourcePageCount", () => { + it("returns the page_count when the source is registered", () => { + const fake = makeFakeGbrain({ + sources: [ + { id: "gstack-code-foo", local_path: "/x", page_count: 1247 }, + { id: "other-source", local_path: "/y", page_count: 99 }, + ], + }); + expect(sourcePageCount("gstack-code-foo", fake.env)).toBe(1247); + expect(sourcePageCount("other-source", fake.env)).toBe(99); + fake.cleanup(); + }); + + it("returns null when the source is absent", () => { + const fake = makeFakeGbrain({ sources: [{ id: "other", local_path: "/x", page_count: 5 }] }); + expect(sourcePageCount("missing", fake.env)).toBeNull(); + fake.cleanup(); + }); + + it("returns null when page_count is missing from the source object", () => { + const fake = makeFakeGbrain({ sources: [{ id: "no-count", local_path: "/x" } as { id: string; local_path: string }] }); + expect(sourcePageCount("no-count", fake.env)).toBeNull(); + fake.cleanup(); + }); +}); diff --git a/test/gstack-gbrain-sync.test.ts b/test/gstack-gbrain-sync.test.ts index c8841268..100d2e27 100644 --- a/test/gstack-gbrain-sync.test.ts +++ b/test/gstack-gbrain-sync.test.ts @@ -55,7 +55,11 @@ describe("gstack-gbrain-sync CLI", () => { const r = runScript(["--dry-run", "--code-only", "--quiet"], { HOME: home, GSTACK_HOME: gstackHome }); expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("would: gbrain import"); + // Code stage now uses native code surface: sources add + sync --strategy code + // (NOT gbrain import — that's the markdown-only path that was rejected post-codex). + expect(r.stdout).toContain("would: gbrain sources add"); + expect(r.stdout).toContain("gbrain sync --strategy code"); + expect(r.stdout).not.toContain("gbrain import"); // memory + brain-sync stages should not appear expect(r.stdout).not.toContain("gstack-memory-ingest --probe"); expect(r.stdout).not.toContain("gstack-brain-sync --discover-new"); @@ -69,7 +73,8 @@ describe("gstack-gbrain-sync CLI", () => { const r = runScript(["--dry-run"], { HOME: home, GSTACK_HOME: gstackHome }); expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("would: gbrain import"); + expect(r.stdout).toContain("would: gbrain sources add"); + expect(r.stdout).toContain("gbrain sync --strategy code"); expect(r.stdout).toContain("would: gstack-memory-ingest"); expect(r.stdout).toContain("would: gstack-brain-sync"); rmSync(home, { recursive: true, force: true }); @@ -82,11 +87,84 @@ describe("gstack-gbrain-sync CLI", () => { const r = runScript(["--dry-run", "--no-code"], { HOME: home, GSTACK_HOME: gstackHome }); expect(r.exitCode).toBe(0); - expect(r.stdout).not.toContain("would: gbrain import"); + expect(r.stdout).not.toContain("would: gbrain sources add"); expect(r.stdout).toContain("would: gstack-memory-ingest"); rmSync(home, { recursive: true, force: true }); }); + it("dry-run derives a stable source id from the canonical git remote", () => { + // The source id pattern is `gstack-code-`. For this + // repo (github.com/garrytan/gstack), the slug should appear in the dry-run + // preview line. We don't pin the exact slug — just verify the prefix + + // that the preview command would target a source with id gstack-code-*. + const home = makeTestHome(); + const gstackHome = join(home, ".gstack"); + mkdirSync(gstackHome, { recursive: true }); + + const r = runScript(["--dry-run", "--code-only", "--quiet"], { HOME: home, GSTACK_HOME: gstackHome }); + expect(r.exitCode).toBe(0); + expect(r.stdout).toMatch(/gbrain sources add gstack-code-[a-z0-9-]+/); + expect(r.stdout).toMatch(/gbrain sync --strategy code --source gstack-code-[a-z0-9-]+/); + rmSync(home, { recursive: true, force: true }); + }); + + it("dry-run does NOT acquire the lock file (lock is for write paths only)", () => { + const home = makeTestHome(); + const gstackHome = join(home, ".gstack"); + mkdirSync(gstackHome, { recursive: true }); + + const r = runScript(["--dry-run"], { HOME: home, GSTACK_HOME: gstackHome }); + expect(r.exitCode).toBe(0); + // Lock file should not exist after a dry-run (it's a write-only safety primitive). + const lockPath = join(gstackHome, ".sync-gbrain.lock"); + expect(existsSync(lockPath)).toBe(false); + rmSync(home, { recursive: true, force: true }); + }); + + it("a stale lock file (older than 5 min) is taken over, not blocking", () => { + const home = makeTestHome(); + const gstackHome = join(home, ".gstack"); + mkdirSync(gstackHome, { recursive: true }); + + // Plant a stale lock file (mtime 6 min ago). + const lockPath = join(gstackHome, ".sync-gbrain.lock"); + writeFileSync(lockPath, JSON.stringify({ pid: 99999, started_at: new Date(Date.now() - 6 * 60 * 1000).toISOString() })); + const sixMinAgo = (Date.now() - 6 * 60 * 1000) / 1000; + // Set mtime explicitly via Bun's fs.utimes + const fs = require("fs"); + fs.utimesSync(lockPath, sixMinAgo, sixMinAgo); + + // Run with all stages disabled so we don't actually invoke anything heavy. + const r = runScript(["--incremental", "--no-code", "--no-memory", "--no-brain-sync", "--quiet"], { + HOME: home, + GSTACK_HOME: gstackHome, + }); + expect(r.exitCode).toBe(0); + // Lock should be cleared after the run (we took it over and released). + expect(existsSync(lockPath)).toBe(false); + rmSync(home, { recursive: true, force: true }); + }); + + it("a fresh lock file (less than 5 min old) blocks a second invocation with exit 2", () => { + const home = makeTestHome(); + const gstackHome = join(home, ".gstack"); + mkdirSync(gstackHome, { recursive: true }); + + // Plant a fresh lock file (mtime now). + const lockPath = join(gstackHome, ".sync-gbrain.lock"); + writeFileSync(lockPath, JSON.stringify({ pid: 99999, started_at: new Date().toISOString() })); + + const r = runScript(["--incremental", "--no-code", "--no-memory", "--no-brain-sync", "--quiet"], { + HOME: home, + GSTACK_HOME: gstackHome, + }); + expect(r.exitCode).toBe(2); + expect(r.stderr).toContain("another /sync-gbrain is running"); + // Lock should still be there — the second invocation didn't take it over. + expect(existsSync(lockPath)).toBe(true); + rmSync(home, { recursive: true, force: true }); + }); + it("writes a state file with schema_version: 1 after a non-dry run", () => { const home = makeTestHome(); const gstackHome = join(home, ".gstack"); diff --git a/test/skill-e2e-memory-pipeline.test.ts b/test/skill-e2e-memory-pipeline.test.ts index c919315c..c0f40f61 100644 --- a/test/skill-e2e-memory-pipeline.test.ts +++ b/test/skill-e2e-memory-pipeline.test.ts @@ -183,7 +183,10 @@ describe("V1 /gbrain-sync orchestrator E2E", () => { const r = runBun(SYNC, ["--dry-run"], env); expect(r.exitCode).toBe(0); - expect(r.stdout).toContain("would: gbrain import"); + // Code stage uses native gbrain code surfaces (sources add + sync --strategy code) + // post-codex review; NOT `gbrain import` (markdown-only path). + expect(r.stdout).toContain("would: gbrain sources add"); + expect(r.stdout).toContain("gbrain sync --strategy code"); expect(r.stdout).toContain("would: gstack-memory-ingest"); expect(r.stdout).toContain("would: gstack-brain-sync");