feat: native gbrain code-surface orchestrator + ensureSourceRegistered helper

Replaces gbrain import (markdown only) with gbrain sources add + sync
--strategy code (or reindex-code on --full). Adds lib/gbrain-sources.ts
exporting ensureSourceRegistered/probeSource/sourcePageCount, plus lock
file + tmp-rename atomicity + dry-run write skip in the orchestrator.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-05-03 20:42:28 -07:00
parent bf65487162
commit fed5b91e4e
5 changed files with 780 additions and 75 deletions

View File

@@ -4,29 +4,38 @@
*
* Orchestrates three storage tiers per plan §"Storage tiering":
*
* 1. Code (current repo) → gbrain import (Supabase or local PGLite)
* 1. Code (current repo) → `gbrain sources add` (idempotent via
* lib/gbrain-sources.ts) + `gbrain sync
* --strategy code` (incremental) or
* `gbrain reindex-code --yes` (--full).
* NEVER `gbrain import` (markdown only).
* 2. Transcripts + curated memory → gstack-memory-ingest (typed put_page)
* 3. Curated artifacts to git → gstack-brain-sync (existing pipeline)
*
* Modes:
* --incremental (default) — mtime fast-path; runs all 3 stages with cache hits
* --full — first-run; full walk + import; honest budget per ED2
* --dry-run — preview what would sync; no writes
* --full — first-run; full walk + reindex; honest budget per ED2
* --dry-run — preview what would sync; no writes anywhere (incl. state file)
*
* --watch (V1.5 P0 TODO): file-watcher daemon. Deferred per Codex F3 ("no daemon"
* invariant). For V1, continuous sync rides the preamble-boundary hook only.
* Concurrency safety per /plan-eng-review D1:
* - Lock file at ~/.gstack/.sync-gbrain.lock (PID + start ts).
* - Stale-lock takeover after 5 min (process death).
* - State file written via tmp+rename for atomicity.
* - Lock released in finally; SIGINT/SIGTERM trapped for cleanup.
*
* Cross-repo TODO (V1.5): when gbrain CLI ships `put_file` + `restore-from-sync`,
* this helper picks them up via version probe (Codex F6 + D9) and routes
* code/transcripts to Supabase Storage instead of put_page.
* --watch (V1.5 P0 TODO): file-watcher daemon. NOTE: gbrain v0.25.1 already
* ships `gbrain sync --watch [--interval N]` and `gbrain sync --install-cron`;
* when revisited, /sync-gbrain --watch wires through to the gbrain CLI rather
* than building a gstack-side daemon.
*/
import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync } from "fs";
import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, renameSync } from "fs";
import { join, dirname } from "path";
import { execSync, spawnSync } from "child_process";
import { execSync, execFileSync, spawnSync } from "child_process";
import { homedir } from "os";
import { detectEngineTier, withErrorContext } from "../lib/gstack-memory-helpers";
import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers";
import { sourcePageCount } from "../lib/gbrain-sources";
// ── Types ──────────────────────────────────────────────────────────────────
@@ -41,12 +50,22 @@ interface CliArgs {
codeOnly: boolean;
}
interface CodeStageDetail {
source_id?: string;
source_path?: string;
page_count?: number | null;
last_imported?: string;
status?: "ok" | "skipped" | "failed";
}
interface StageResult {
name: string;
ran: boolean;
ok: boolean;
duration_ms: number;
summary: string;
/** Stage-specific structured detail. Code stage carries source_id + page_count. */
detail?: CodeStageDetail;
}
// ── Constants ──────────────────────────────────────────────────────────────
@@ -54,6 +73,8 @@ interface StageResult {
const HOME = homedir();
const GSTACK_HOME = process.env.GSTACK_HOME || join(HOME, ".gstack");
const STATE_PATH = join(GSTACK_HOME, ".gbrain-sync-state.json");
const LOCK_PATH = join(GSTACK_HOME, ".sync-gbrain.lock");
const STALE_LOCK_MS = 5 * 60 * 1000;
// ── CLI ────────────────────────────────────────────────────────────────────
@@ -62,18 +83,18 @@ function printUsage(): void {
Modes:
--incremental Default. mtime fast-path; ~50ms steady-state.
--full First-run; full walk + import. Honest ~25-35 min for big Macs (ED2).
--dry-run Preview what would sync; no writes.
--full First-run; full walk + reindex. Honest ~25-35 min for big Macs (ED2).
--dry-run Preview what would sync; no writes anywhere.
Options:
--quiet Suppress per-stage output.
--no-code Skip the gbrain import (current repo) stage.
--no-code Skip the cwd code-import stage.
--no-memory Skip the gstack-memory-ingest stage (transcripts + artifacts).
--no-brain-sync Skip the gstack-brain-sync git pipeline stage.
--code-only Only run the gbrain import stage (alias for --no-memory --no-brain-sync).
--code-only Only run the code-import stage (alias for --no-memory --no-brain-sync).
--help This text.
Stages run in order: code import → memory ingest → curated git push.
Stages run in order: code → memory ingest → curated git push.
Each stage failure is non-fatal; subsequent stages still run.
`);
}
@@ -116,7 +137,7 @@ function parseArgs(): CliArgs {
return { mode, quiet, noCode, noMemory, noBrainSync, codeOnly };
}
// ── Stage runners ──────────────────────────────────────────────────────────
// ── Helpers ────────────────────────────────────────────────────────────────
function repoRoot(): string | null {
try {
@@ -127,6 +148,32 @@ function repoRoot(): string | null {
}
}
function originUrl(): string | null {
try {
const out = execSync("git remote get-url origin", { encoding: "utf-8", timeout: 2000 });
return out.trim();
} catch {
return null;
}
}
/**
* Derive a stable source id for the cwd code corpus. Pattern: `gstack-code-<slug>`,
* where <slug> comes from canonicalizeRemote() then `/` → `-` (e.g.,
* `github.com/garrytan/gstack` → `gstack-code-github-com-garrytan-gstack`).
*
* Falls back to `gstack-code-<basename(repo)>` when there is no origin (local repo).
*/
function deriveCodeSourceId(repoPath: string): string {
const remote = canonicalizeRemote(originUrl());
if (remote) {
return `gstack-code-${remote.replace(/[\/\s]+/g, "-").replace(/-+/g, "-")}`;
}
// Fallback for repos without a remote.
const base = repoPath.split("/").pop() || "repo";
return `gstack-code-${base.toLowerCase().replace(/[^a-z0-9-]+/g, "-").replace(/-+/g, "-")}`;
}
function gbrainAvailable(): boolean {
try {
execSync("command -v gbrain", { stdio: "ignore" });
@@ -136,6 +183,55 @@ function gbrainAvailable(): boolean {
}
}
// ── Lock file (D1) ─────────────────────────────────────────────────────────
interface LockInfo {
pid: number;
started_at: string;
}
function acquireLock(): boolean {
mkdirSync(GSTACK_HOME, { recursive: true });
if (existsSync(LOCK_PATH)) {
// Check if stale.
try {
const stat = statSync(LOCK_PATH);
const ageMs = Date.now() - stat.mtimeMs;
if (ageMs > STALE_LOCK_MS) {
// Stale; take over.
unlinkSync(LOCK_PATH);
} else {
return false;
}
} catch {
// Cannot stat; bail conservatively.
return false;
}
}
const info: LockInfo = { pid: process.pid, started_at: new Date().toISOString() };
try {
writeFileSync(LOCK_PATH, JSON.stringify(info), { encoding: "utf-8", flag: "wx" });
return true;
} catch {
return false;
}
}
function releaseLock(): void {
try {
if (!existsSync(LOCK_PATH)) return;
const raw = readFileSync(LOCK_PATH, "utf-8");
const info = JSON.parse(raw) as LockInfo;
if (info.pid === process.pid) {
unlinkSync(LOCK_PATH);
}
} catch {
// Best-effort cleanup.
}
}
// ── Stage runners ──────────────────────────────────────────────────────────
function runCodeImport(args: CliArgs): StageResult {
const t0 = Date.now();
const root = repoRoot();
@@ -145,42 +241,135 @@ function runCodeImport(args: CliArgs): StageResult {
if (!gbrainAvailable()) {
return { name: "code", ran: false, ok: false, duration_ms: 0, summary: "skipped (gbrain CLI not in PATH)" };
}
const sourceId = deriveCodeSourceId(root);
if (args.mode === "dry-run") {
return { name: "code", ran: false, ok: true, duration_ms: 0, summary: `would: gbrain import ${root} --no-embed` };
}
const importArgs = ["import", root, "--no-embed"];
if (args.mode === "incremental") {
// gbrain import is itself idempotent on re-import; --incremental flag if it supports
importArgs.push("--incremental");
}
try {
spawnSync("gbrain", importArgs, {
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
timeout: 5 * 60 * 1000,
});
// Trigger background embedding catch-up
spawnSync("gbrain", ["embed", "--stale"], {
stdio: ["ignore", "ignore", "ignore"],
timeout: 1000, // background spawn; don't wait
});
return {
name: "code",
ran: true,
ran: false,
ok: true,
duration_ms: Date.now() - t0,
summary: `imported ${root}`,
duration_ms: 0,
summary: `would: gbrain sources add ${sourceId} --path ${root} --federated; gbrain sync --strategy code --source ${sourceId}`,
detail: { source_id: sourceId, source_path: root, status: "skipped" },
};
}
// Step 1: Ensure source registered (idempotent).
let registered = false;
try {
// ensureSourceRegistered is async — but we're in a sync stage runner. Use a deasync pattern.
// Bun supports top-level await in main(), but stage runners are sync per orchestrator contract.
// Workaround: run as a child Bun script for the registration probe.
// Simpler: call gbrain CLI directly via the sync helpers in lib/gbrain-sources.ts probeSource.
// For symmetry, we duplicate the small ensureSourceRegistered logic synchronously here using
// execFileSync. (The lib helper is preferred for async callers; sync helpers below.)
registered = ensureSourceRegisteredSync(sourceId, root);
} catch (err) {
return {
name: "code",
ran: true,
ok: false,
duration_ms: Date.now() - t0,
summary: `gbrain import failed: ${(err as Error).message}`,
summary: `source registration failed: ${(err as Error).message}`,
detail: { source_id: sourceId, source_path: root, status: "failed" },
};
}
// Step 2: Run sync or reindex.
const syncArgs = args.mode === "full"
? ["reindex-code", "--source", sourceId, "--yes"]
: ["sync", "--strategy", "code", "--source", sourceId];
const syncResult = spawnSync("gbrain", syncArgs, {
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
timeout: 35 * 60 * 1000,
});
if (syncResult.status !== 0) {
return {
name: "code",
ran: true,
ok: false,
duration_ms: Date.now() - t0,
summary: `gbrain ${syncArgs.join(" ")} exited ${syncResult.status}`,
detail: { source_id: sourceId, source_path: root, status: "failed" },
};
}
// Step 3: Read page_count from gbrain sources list.
const pageCount = sourcePageCount(sourceId);
return {
name: "code",
ran: true,
ok: true,
duration_ms: Date.now() - t0,
summary: `${registered ? "registered + " : ""}synced ${sourceId} (page_count=${pageCount ?? "unknown"})`,
detail: {
source_id: sourceId,
source_path: root,
page_count: pageCount,
last_imported: new Date().toISOString(),
status: "ok",
},
};
}
/**
* Synchronous mirror of ensureSourceRegistered for use inside the synchronous
* stage runner. Returns true if registration changed (added or re-added).
*/
function ensureSourceRegisteredSync(id: string, path: string): boolean {
// Probe.
let probeOut: string;
try {
probeOut = execFileSync("gbrain", ["sources", "list", "--json"], {
encoding: "utf-8",
timeout: 10_000,
stdio: ["ignore", "pipe", "pipe"],
});
} catch (err) {
const e = err as NodeJS.ErrnoException & { stderr?: Buffer };
const stderr = e.stderr?.toString() || "";
if (e.code === "ENOENT") throw new Error("gbrain CLI not on PATH");
if (stderr.includes("Cannot connect to database") || stderr.includes("config.json")) {
throw new Error("gbrain not configured (run /setup-gbrain)");
}
throw err;
}
let parsed: { sources?: Array<{ id?: string; local_path?: string }> };
try {
parsed = JSON.parse(probeOut);
} catch (err) {
throw new Error(`gbrain sources list returned non-JSON: ${(err as Error).message}`);
}
const sources = parsed.sources || [];
const match = sources.find((s) => s.id === id);
if (match && match.local_path === path) {
return false; // no-op
}
if (match && match.local_path !== path) {
const rm = spawnSync("gbrain", ["sources", "remove", id, "--yes"], {
encoding: "utf-8",
timeout: 30_000,
});
if (rm.status !== 0) {
throw new Error(`gbrain sources remove ${id} failed: ${rm.stderr || rm.stdout || `exit ${rm.status}`}`);
}
}
const add = spawnSync("gbrain", ["sources", "add", id, "--path", path, "--federated"], {
encoding: "utf-8",
timeout: 30_000,
});
if (add.status !== 0) {
throw new Error(`gbrain sources add ${id} failed: ${add.stderr || add.stdout || `exit ${add.status}`}`);
}
return true;
}
function runMemoryIngest(args: CliArgs): StageResult {
@@ -198,7 +387,7 @@ function runMemoryIngest(args: CliArgs): StageResult {
const result = spawnSync("bun", ingestArgs, {
encoding: "utf-8",
timeout: 35 * 60 * 1000, // honest 35-min ceiling per ED2
timeout: 35 * 60 * 1000,
});
const summary = (result.stderr || "").split("\n").filter((l) => l.includes("[memory-ingest]")).slice(-1)[0] || "ingest pass complete";
@@ -224,7 +413,6 @@ function runBrainSyncPush(args: CliArgs): StageResult {
return { name: "brain-sync", ran: false, ok: true, duration_ms: 0, summary: "skipped (gstack-brain-sync not installed)" };
}
// Discover new artifacts then drain queue
spawnSync(brainSyncPath, ["--discover-new"], {
stdio: args.quiet ? ["ignore", "ignore", "ignore"] : ["ignore", "inherit", "inherit"],
timeout: 60 * 1000,
@@ -243,7 +431,7 @@ function runBrainSyncPush(args: CliArgs): StageResult {
};
}
// ── State file (records last sync timestamp + stage outcomes) ──────────────
// ── State file ─────────────────────────────────────────────────────────────
interface SyncState {
schema_version: 1;
@@ -266,10 +454,16 @@ function loadSyncState(): SyncState {
return { schema_version: 1, last_writer: "gstack-gbrain-sync" };
}
/**
* Atomic state file write per /plan-eng-review D1: write tmp file then rename.
* rename(2) is atomic on POSIX filesystems.
*/
function saveSyncState(state: SyncState): void {
try {
mkdirSync(dirname(STATE_PATH), { recursive: true });
writeFileSync(STATE_PATH, JSON.stringify(state, null, 2), "utf-8");
const tmp = `${STATE_PATH}.tmp.${process.pid}`;
writeFileSync(tmp, JSON.stringify(state, null, 2), "utf-8");
renameSync(tmp, STATE_PATH);
} catch {
// non-fatal
}
@@ -293,40 +487,67 @@ async function main(): Promise<void> {
console.error(`[gbrain-sync] mode=${args.mode} engine=${engine.engine}`);
}
const state = loadSyncState();
const stages: StageResult[] = [];
if (!args.noCode) {
stages.push(await withErrorContext("sync:code", () => runCodeImport(args), "gstack-gbrain-sync"));
}
if (!args.noMemory) {
stages.push(await withErrorContext("sync:memory", () => runMemoryIngest(args), "gstack-gbrain-sync"));
}
if (!args.noBrainSync) {
stages.push(await withErrorContext("sync:brain-sync", () => runBrainSyncPush(args), "gstack-gbrain-sync"));
// Acquire lock (skip on dry-run since dry-run never writes).
const needsLock = args.mode !== "dry-run";
let haveLock = false;
if (needsLock) {
haveLock = acquireLock();
if (!haveLock) {
console.error(
`[gbrain-sync] another /sync-gbrain is running (lock at ${LOCK_PATH}). ` +
`If that process died, the lock auto-clears after 5 min, or remove it manually.`
);
process.exit(2);
}
}
// Persist state (skip on dry-run)
if (args.mode !== "dry-run") {
state.last_sync = new Date().toISOString();
if (args.mode === "full") state.last_full_sync = state.last_sync;
state.last_stages = stages;
saveSyncState(state);
const cleanup = () => {
if (haveLock) releaseLock();
};
process.on("SIGINT", () => { cleanup(); process.exit(130); });
process.on("SIGTERM", () => { cleanup(); process.exit(143); });
let exitCode = 0;
try {
const state = loadSyncState();
const stages: StageResult[] = [];
if (!args.noCode) {
stages.push(await withErrorContext("sync:code", () => runCodeImport(args), "gstack-gbrain-sync"));
}
if (!args.noMemory) {
stages.push(await withErrorContext("sync:memory", () => runMemoryIngest(args), "gstack-gbrain-sync"));
}
if (!args.noBrainSync) {
stages.push(await withErrorContext("sync:brain-sync", () => runBrainSyncPush(args), "gstack-gbrain-sync"));
}
if (args.mode !== "dry-run") {
state.last_sync = new Date().toISOString();
if (args.mode === "full") state.last_full_sync = state.last_sync;
state.last_stages = stages;
saveSyncState(state);
}
if (!args.quiet || args.mode === "dry-run") {
console.log(`\ngstack-gbrain-sync (${args.mode}):`);
for (const s of stages) console.log(formatStage(s));
const okCount = stages.filter((s) => s.ok).length;
const errCount = stages.filter((s) => !s.ok && s.ran).length;
console.log(`\n ${okCount} ok, ${errCount} error, ${stages.length - okCount - errCount} skipped`);
}
const anyError = stages.some((s) => s.ran && !s.ok);
exitCode = anyError ? 1 : 0;
} finally {
cleanup();
}
if (!args.quiet || args.mode === "dry-run") {
console.log(`\ngstack-gbrain-sync (${args.mode}):`);
for (const s of stages) console.log(formatStage(s));
const okCount = stages.filter((s) => s.ok).length;
const errCount = stages.filter((s) => !s.ok && s.ran).length;
console.log(`\n ${okCount} ok, ${errCount} error, ${stages.length - okCount - errCount} skipped`);
}
const anyError = stages.some((s) => s.ran && !s.ok);
process.exit(anyError ? 1 : 0);
process.exit(exitCode);
}
main().catch((err) => {
console.error(`gstack-gbrain-sync fatal: ${err instanceof Error ? err.message : String(err)}`);
releaseLock();
process.exit(1);
});

184
lib/gbrain-sources.ts Normal file
View File

@@ -0,0 +1,184 @@
/**
* gbrain-sources — TypeScript helper for idempotent gbrain federated source registration.
*
* Mirrors the bash logic in bin/gstack-gbrain-source-wireup:204-310 but in a form
* importable by other TS callers (currently bin/gstack-gbrain-sync.ts; future
* callers welcome). gbrain has no `sources update` — drift recovery is
* `sources remove` followed by `sources add`.
*
* Per /plan-eng-review D3 (DRY extraction).
*/
import { execFileSync, spawnSync } from "child_process";
import { withErrorContext } from "./gstack-memory-helpers";
export interface SourceState {
/** "absent" — id not registered. "match" — id at expected path. "drift" — id at different path. */
status: "absent" | "match" | "drift";
/** Path gbrain has registered for this id. Only set when status !== "absent". */
registered_path?: string;
}
export interface EnsureResult {
/** True if registration state changed (added or re-registered). False on no-op. */
changed: boolean;
/** Final source state after the call. */
state: SourceState;
}
export interface EnsureOptions {
/** Pass --federated to `gbrain sources add`. Default false. */
federated?: boolean;
/** When status=drift, force a remove+add to update the registered path. Default true. */
reregister_on_drift?: boolean;
/**
* Optional env override for the spawned `gbrain` calls. Production callers
* leave this unset (inherit process.env). Tests pass a custom env to point
* at a fake `gbrain` on PATH (Bun's execFileSync does not respect runtime
* mutations of process.env.PATH unless env is passed explicitly).
*/
env?: NodeJS.ProcessEnv;
}
/**
* Probe the registration state of a source by id.
*
* Errors:
* - "gbrain CLI not on PATH" (exit 127) — caller should treat as absent + skip stage.
* - "gbrain DB connection failed" — caller should treat as absent + skip stage.
* - JSON parse error — propagate via withErrorContext caller.
*/
export function probeSource(id: string, env?: NodeJS.ProcessEnv): SourceState {
let stdout: string;
try {
stdout = execFileSync("gbrain", ["sources", "list", "--json"], {
encoding: "utf-8",
timeout: 10_000,
stdio: ["ignore", "pipe", "pipe"],
env,
});
} catch (err) {
const e = err as NodeJS.ErrnoException & { stderr?: Buffer };
const stderr = e.stderr?.toString() || "";
if (e.code === "ENOENT" || stderr.includes("command not found")) {
throw new Error("gbrain CLI not on PATH");
}
if (stderr.includes("Cannot connect to database") || stderr.includes("config.json")) {
throw new Error("gbrain not configured (run /setup-gbrain)");
}
throw err;
}
let parsed: { sources?: Array<{ id?: string; local_path?: string }> };
try {
parsed = JSON.parse(stdout);
} catch (err) {
throw new Error(`gbrain sources list returned non-JSON output: ${(err as Error).message}`);
}
const sources = parsed.sources || [];
const match = sources.find((s) => s.id === id);
if (!match) return { status: "absent" };
return {
status: "match",
registered_path: match.local_path,
};
}
/**
* Ensure source <id> is registered at <path>. Idempotent.
*
* Behavior:
* - status=absent → `gbrain sources add <id> --path <path> [--federated]`, returns changed=true.
* - status=match + same path → no-op, returns changed=false.
* - status=match + different path → `sources remove` + `sources add`, returns changed=true.
* (Skip when reregister_on_drift=false; returns changed=false.)
*
* Caller is responsible for catching errors. The function uses withErrorContext for
* forensic logging to ~/.gstack/.gbrain-errors.jsonl.
*/
export async function ensureSourceRegistered(
id: string,
path: string,
options: EnsureOptions = {}
): Promise<EnsureResult> {
const federated = options.federated ?? false;
const reregister_on_drift = options.reregister_on_drift ?? true;
const env = options.env;
return withErrorContext(`ensureSourceRegistered:${id}`, () => {
const probed = probeSource(id, env);
// Disambiguate match-but-different-path
let state: SourceState = probed;
if (probed.status === "match" && probed.registered_path !== path) {
state = { status: "drift", registered_path: probed.registered_path };
}
if (state.status === "match") {
return { changed: false, state };
}
if (state.status === "drift" && !reregister_on_drift) {
return { changed: false, state };
}
// For drift, remove first.
if (state.status === "drift") {
const rm = spawnSync("gbrain", ["sources", "remove", id, "--yes"], {
encoding: "utf-8",
timeout: 30_000,
env,
});
if (rm.status !== 0) {
throw new Error(`gbrain sources remove ${id} failed: ${rm.stderr || rm.stdout || `exit ${rm.status}`}`);
}
}
// Add.
const addArgs = ["sources", "add", id, "--path", path];
if (federated) addArgs.push("--federated");
const add = spawnSync("gbrain", addArgs, {
encoding: "utf-8",
timeout: 30_000,
env,
});
if (add.status !== 0) {
throw new Error(`gbrain sources add ${id} failed: ${add.stderr || add.stdout || `exit ${add.status}`}`);
}
return {
changed: true,
state: { status: "match", registered_path: path },
};
}, "gbrain-sources");
}
/**
* Get page_count for a registered source. Returns null if source is absent or if
* page_count is missing/invalid in the JSON. Used by the verdict block + preamble
* variant selection.
*/
export function sourcePageCount(id: string, env?: NodeJS.ProcessEnv): number | null {
let stdout: string;
try {
stdout = execFileSync("gbrain", ["sources", "list", "--json"], {
encoding: "utf-8",
timeout: 10_000,
stdio: ["ignore", "pipe", "pipe"],
env,
});
} catch {
return null;
}
try {
const parsed = JSON.parse(stdout) as { sources?: Array<{ id?: string; page_count?: number }> };
const match = (parsed.sources || []).find((s) => s.id === id);
if (!match) return null;
if (typeof match.page_count !== "number") return null;
return match.page_count;
} catch {
return null;
}
}

219
test/gbrain-sources.test.ts Normal file
View File

@@ -0,0 +1,219 @@
/**
* Unit tests for lib/gbrain-sources.ts (per /plan-eng-review D3 DRY extraction).
*
* The helper shells out to the real `gbrain` CLI. To test idempotency
* deterministically without a live brain, we put a fake `gbrain` binary on
* PATH that emits canned `sources list --json` output and records its
* invocations. The same trick `test/gstack-gbrain-source-wireup.test.ts` uses.
*/
import { describe, it, expect } from "bun:test";
import { mkdtempSync, writeFileSync, readFileSync, existsSync, mkdirSync, rmSync, chmodSync } from "fs";
import { tmpdir } from "os";
import { join } from "path";
import { ensureSourceRegistered, probeSource, sourcePageCount } from "../lib/gbrain-sources";
interface FakeGbrainSetup {
bindir: string;
statePath: string;
logPath: string;
/**
* Env to pass to helper calls. Bun's execFileSync does NOT respect runtime
* mutations of process.env.PATH; we have to pass env explicitly. Production
* callers leave this unset (inherit process.env) — the helper signature has
* an optional `env` param specifically for tests.
*/
env: NodeJS.ProcessEnv;
cleanup: () => void;
}
/**
* Build a temp dir with a fake `gbrain` shell script on PATH. The fake honors:
* gbrain sources list --json → cat $STATE_PATH
* gbrain sources add <id> --path <p> [--federated] → append to state, log
* gbrain sources remove <id> --yes → drop from state, log
* gbrain --version → echo "gbrain 0.25.1"
* Anything else exits 1.
*/
function makeFakeGbrain(initialState: { sources: Array<{ id: string; local_path: string; federated?: boolean; page_count?: number }> }): FakeGbrainSetup {
const tmp = mkdtempSync(join(tmpdir(), "gbrain-sources-test-"));
const bindir = join(tmp, "bin");
mkdirSync(bindir, { recursive: true });
const statePath = join(tmp, "state.json");
const logPath = join(tmp, "calls.log");
writeFileSync(statePath, JSON.stringify(initialState));
writeFileSync(logPath, "");
const fake = `#!/bin/sh
echo "$@" >> "${logPath}"
case "$1 $2" in
"--version ")
echo "gbrain 0.25.1"
exit 0
;;
"sources list")
cat "${statePath}"
exit 0
;;
"sources add")
ID="$3"
shift 3
PATH_VAL=""
FED="false"
while [ $# -gt 0 ]; do
case "$1" in
--path) PATH_VAL="$2"; shift 2 ;;
--federated) FED="true"; shift ;;
*) shift ;;
esac
done
NEW=$(jq --arg id "$ID" --arg path "$PATH_VAL" --argjson fed "$FED" \
'.sources += [{id: $id, local_path: $path, federated: $fed, page_count: 0}]' "${statePath}")
echo "$NEW" > "${statePath}"
exit 0
;;
"sources remove")
ID="$3"
NEW=$(jq --arg id "$ID" '.sources = (.sources | map(select(.id != $id)))' "${statePath}")
echo "$NEW" > "${statePath}"
exit 0
;;
esac
echo "fake gbrain: unknown command: $@" >&2
exit 1
`;
const fakePath = join(bindir, "gbrain");
writeFileSync(fakePath, fake);
chmodSync(fakePath, 0o755);
// Build the env override we'll pass to helper calls. We do NOT mutate
// process.env globally because Bun's execFileSync caches PATH at process
// start; explicit env is the only reliable way to redirect spawn-time PATH.
const env: NodeJS.ProcessEnv = { ...process.env, PATH: `${bindir}:${process.env.PATH || ""}` };
return {
bindir,
statePath,
logPath,
env,
cleanup: () => {
rmSync(tmp, { recursive: true, force: true });
},
};
}
describe("probeSource", () => {
it("returns absent when source id is not in the list", () => {
const fake = makeFakeGbrain({ sources: [{ id: "other-source", local_path: "/x" }] });
const state = probeSource("gstack-code-foo", fake.env);
expect(state.status).toBe("absent");
expect(state.registered_path).toBeUndefined();
fake.cleanup();
});
it("returns match when source id is registered (path included)", () => {
const fake = makeFakeGbrain({
sources: [{ id: "gstack-code-foo", local_path: "/Users/me/repo" }],
});
const state = probeSource("gstack-code-foo", fake.env);
expect(state.status).toBe("match");
expect(state.registered_path).toBe("/Users/me/repo");
fake.cleanup();
});
});
describe("ensureSourceRegistered", () => {
it("adds source when absent, returns changed=true", async () => {
const fake = makeFakeGbrain({ sources: [] });
const result = await ensureSourceRegistered("gstack-code-foo", "/Users/me/repo", {
federated: true,
env: fake.env,
});
expect(result.changed).toBe(true);
expect(result.state.status).toBe("match");
expect(result.state.registered_path).toBe("/Users/me/repo");
const log = readFileSync(fake.logPath, "utf-8");
expect(log).toContain("sources add gstack-code-foo --path /Users/me/repo --federated");
expect(log).not.toContain("sources remove");
fake.cleanup();
});
it("is a no-op when source is already at the correct path, returns changed=false", async () => {
const fake = makeFakeGbrain({
sources: [{ id: "gstack-code-foo", local_path: "/Users/me/repo" }],
});
const result = await ensureSourceRegistered("gstack-code-foo", "/Users/me/repo", { env: fake.env });
expect(result.changed).toBe(false);
expect(result.state.status).toBe("match");
const log = readFileSync(fake.logPath, "utf-8");
expect(log).toContain("sources list --json");
expect(log).not.toContain("sources add");
expect(log).not.toContain("sources remove");
fake.cleanup();
});
it("recreates source when path differs (gbrain has no `sources update`), returns changed=true", async () => {
const fake = makeFakeGbrain({
sources: [{ id: "gstack-code-foo", local_path: "/old/path" }],
});
const result = await ensureSourceRegistered("gstack-code-foo", "/new/path", {
federated: true,
env: fake.env,
});
expect(result.changed).toBe(true);
expect(result.state.status).toBe("match");
expect(result.state.registered_path).toBe("/new/path");
const log = readFileSync(fake.logPath, "utf-8");
expect(log).toContain("sources remove gstack-code-foo --yes");
expect(log).toContain("sources add gstack-code-foo --path /new/path --federated");
fake.cleanup();
});
it("when reregister_on_drift=false and source is at different path, returns changed=false", async () => {
const fake = makeFakeGbrain({
sources: [{ id: "gstack-code-foo", local_path: "/old/path" }],
});
const result = await ensureSourceRegistered("gstack-code-foo", "/new/path", {
reregister_on_drift: false,
env: fake.env,
});
expect(result.changed).toBe(false);
expect(result.state.status).toBe("drift");
expect(result.state.registered_path).toBe("/old/path");
const log = readFileSync(fake.logPath, "utf-8");
expect(log).not.toContain("sources remove");
expect(log).not.toContain("sources add");
fake.cleanup();
});
});
describe("sourcePageCount", () => {
it("returns the page_count when the source is registered", () => {
const fake = makeFakeGbrain({
sources: [
{ id: "gstack-code-foo", local_path: "/x", page_count: 1247 },
{ id: "other-source", local_path: "/y", page_count: 99 },
],
});
expect(sourcePageCount("gstack-code-foo", fake.env)).toBe(1247);
expect(sourcePageCount("other-source", fake.env)).toBe(99);
fake.cleanup();
});
it("returns null when the source is absent", () => {
const fake = makeFakeGbrain({ sources: [{ id: "other", local_path: "/x", page_count: 5 }] });
expect(sourcePageCount("missing", fake.env)).toBeNull();
fake.cleanup();
});
it("returns null when page_count is missing from the source object", () => {
const fake = makeFakeGbrain({ sources: [{ id: "no-count", local_path: "/x" } as { id: string; local_path: string }] });
expect(sourcePageCount("no-count", fake.env)).toBeNull();
fake.cleanup();
});
});

View File

@@ -55,7 +55,11 @@ describe("gstack-gbrain-sync CLI", () => {
const r = runScript(["--dry-run", "--code-only", "--quiet"], { HOME: home, GSTACK_HOME: gstackHome });
expect(r.exitCode).toBe(0);
expect(r.stdout).toContain("would: gbrain import");
// Code stage now uses native code surface: sources add + sync --strategy code
// (NOT gbrain import — that's the markdown-only path that was rejected post-codex).
expect(r.stdout).toContain("would: gbrain sources add");
expect(r.stdout).toContain("gbrain sync --strategy code");
expect(r.stdout).not.toContain("gbrain import");
// memory + brain-sync stages should not appear
expect(r.stdout).not.toContain("gstack-memory-ingest --probe");
expect(r.stdout).not.toContain("gstack-brain-sync --discover-new");
@@ -69,7 +73,8 @@ describe("gstack-gbrain-sync CLI", () => {
const r = runScript(["--dry-run"], { HOME: home, GSTACK_HOME: gstackHome });
expect(r.exitCode).toBe(0);
expect(r.stdout).toContain("would: gbrain import");
expect(r.stdout).toContain("would: gbrain sources add");
expect(r.stdout).toContain("gbrain sync --strategy code");
expect(r.stdout).toContain("would: gstack-memory-ingest");
expect(r.stdout).toContain("would: gstack-brain-sync");
rmSync(home, { recursive: true, force: true });
@@ -82,11 +87,84 @@ describe("gstack-gbrain-sync CLI", () => {
const r = runScript(["--dry-run", "--no-code"], { HOME: home, GSTACK_HOME: gstackHome });
expect(r.exitCode).toBe(0);
expect(r.stdout).not.toContain("would: gbrain import");
expect(r.stdout).not.toContain("would: gbrain sources add");
expect(r.stdout).toContain("would: gstack-memory-ingest");
rmSync(home, { recursive: true, force: true });
});
it("dry-run derives a stable source id from the canonical git remote", () => {
// The source id pattern is `gstack-code-<canonicalized-remote>`. For this
// repo (github.com/garrytan/gstack), the slug should appear in the dry-run
// preview line. We don't pin the exact slug — just verify the prefix +
// that the preview command would target a source with id gstack-code-*.
const home = makeTestHome();
const gstackHome = join(home, ".gstack");
mkdirSync(gstackHome, { recursive: true });
const r = runScript(["--dry-run", "--code-only", "--quiet"], { HOME: home, GSTACK_HOME: gstackHome });
expect(r.exitCode).toBe(0);
expect(r.stdout).toMatch(/gbrain sources add gstack-code-[a-z0-9-]+/);
expect(r.stdout).toMatch(/gbrain sync --strategy code --source gstack-code-[a-z0-9-]+/);
rmSync(home, { recursive: true, force: true });
});
it("dry-run does NOT acquire the lock file (lock is for write paths only)", () => {
const home = makeTestHome();
const gstackHome = join(home, ".gstack");
mkdirSync(gstackHome, { recursive: true });
const r = runScript(["--dry-run"], { HOME: home, GSTACK_HOME: gstackHome });
expect(r.exitCode).toBe(0);
// Lock file should not exist after a dry-run (it's a write-only safety primitive).
const lockPath = join(gstackHome, ".sync-gbrain.lock");
expect(existsSync(lockPath)).toBe(false);
rmSync(home, { recursive: true, force: true });
});
it("a stale lock file (older than 5 min) is taken over, not blocking", () => {
const home = makeTestHome();
const gstackHome = join(home, ".gstack");
mkdirSync(gstackHome, { recursive: true });
// Plant a stale lock file (mtime 6 min ago).
const lockPath = join(gstackHome, ".sync-gbrain.lock");
writeFileSync(lockPath, JSON.stringify({ pid: 99999, started_at: new Date(Date.now() - 6 * 60 * 1000).toISOString() }));
const sixMinAgo = (Date.now() - 6 * 60 * 1000) / 1000;
// Set mtime explicitly via Bun's fs.utimes
const fs = require("fs");
fs.utimesSync(lockPath, sixMinAgo, sixMinAgo);
// Run with all stages disabled so we don't actually invoke anything heavy.
const r = runScript(["--incremental", "--no-code", "--no-memory", "--no-brain-sync", "--quiet"], {
HOME: home,
GSTACK_HOME: gstackHome,
});
expect(r.exitCode).toBe(0);
// Lock should be cleared after the run (we took it over and released).
expect(existsSync(lockPath)).toBe(false);
rmSync(home, { recursive: true, force: true });
});
it("a fresh lock file (less than 5 min old) blocks a second invocation with exit 2", () => {
const home = makeTestHome();
const gstackHome = join(home, ".gstack");
mkdirSync(gstackHome, { recursive: true });
// Plant a fresh lock file (mtime now).
const lockPath = join(gstackHome, ".sync-gbrain.lock");
writeFileSync(lockPath, JSON.stringify({ pid: 99999, started_at: new Date().toISOString() }));
const r = runScript(["--incremental", "--no-code", "--no-memory", "--no-brain-sync", "--quiet"], {
HOME: home,
GSTACK_HOME: gstackHome,
});
expect(r.exitCode).toBe(2);
expect(r.stderr).toContain("another /sync-gbrain is running");
// Lock should still be there — the second invocation didn't take it over.
expect(existsSync(lockPath)).toBe(true);
rmSync(home, { recursive: true, force: true });
});
it("writes a state file with schema_version: 1 after a non-dry run", () => {
const home = makeTestHome();
const gstackHome = join(home, ".gstack");

View File

@@ -183,7 +183,10 @@ describe("V1 /gbrain-sync orchestrator E2E", () => {
const r = runBun(SYNC, ["--dry-run"], env);
expect(r.exitCode).toBe(0);
expect(r.stdout).toContain("would: gbrain import");
// Code stage uses native gbrain code surfaces (sources add + sync --strategy code)
// post-codex review; NOT `gbrain import` (markdown-only path).
expect(r.stdout).toContain("would: gbrain sources add");
expect(r.stdout).toContain("gbrain sync --strategy code");
expect(r.stdout).toContain("would: gstack-memory-ingest");
expect(r.stdout).toContain("would: gstack-brain-sync");