From 465d44875b3cd701ff20042b4b21598a61fc92c6 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 6 May 2026 10:59:00 -0700 Subject: [PATCH] test+fix(memory-ingest): strengthen regression tests, fix inject for malformed-close frontmatter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Imports the shim-based regression tests from PR #1341 (Alex Medina) and strengthens them to assert title, type, and tags actually arrive in put stdin — not just `agent: claude-code`. Asserting the metadata fields matches the regression class that's caused this fix wave: writers can "succeed" while metadata is silently lost. The original PR #1341 tests would have passed even with title/type/tags missing. Strengthening the test surfaced a deeper issue. buildTranscriptPage joins frontmatter array elements with "\n" and does not append a trailing newline, so the close fence is "\n---" directly, not "\n---\n". PR #1328's inject branch searched for "\n---\n" and never matched — which means even with PR #1328 alone, transcript pages were landing in gbrain with no title/type/tags. Two-line fix: search for "\n---" only, since the inject lands before the close fence regardless of what follows it. Also imports PR #1341's V1.5 NOTE doc-block update and the section comment refresh so the prose stays accurate against the new writer shape. Co-Authored-By: Alex Medina --- bin/gstack-memory-ingest.ts | 7 +- test/gstack-memory-ingest.test.ts | 143 +++++++++++++++++++++++++++++- 2 files changed, 148 insertions(+), 2 deletions(-) diff --git a/bin/gstack-memory-ingest.ts b/bin/gstack-memory-ingest.ts index 1913df32..5d3401e0 100644 --- a/bin/gstack-memory-ingest.ts +++ b/bin/gstack-memory-ingest.ts @@ -786,7 +786,12 @@ function gbrainPutPage(page: PageRecord): { ok: boolean; error?: string } { // land in gbrain with empty title/type/tags. let body = page.body; if (body.startsWith("---\n")) { - const end = body.indexOf("\n---\n", 4); + // Locate the closing --- delimiter. buildTranscriptPage joins with "\n" + // and does not append a trailing newline, so the close fence looks like + // "...\n---" followed directly by body content (no "\n---\n" pattern). + // Match the close on "\n---" only — the inject lands BEFORE the close + // fence, inside the frontmatter block, regardless of what follows it. + const end = body.indexOf("\n---", 4); if (end > 0) { const inject = [ `title: ${JSON.stringify(page.title)}`, diff --git a/test/gstack-memory-ingest.test.ts b/test/gstack-memory-ingest.test.ts index e9c45f73..5fb6ebbf 100644 --- a/test/gstack-memory-ingest.test.ts +++ b/test/gstack-memory-ingest.test.ts @@ -15,7 +15,7 @@ */ import { describe, it, expect, beforeEach, afterEach } from "bun:test"; -import { mkdtempSync, writeFileSync, readFileSync, existsSync, rmSync, mkdirSync, statSync } from "fs"; +import { mkdtempSync, writeFileSync, readFileSync, existsSync, rmSync, mkdirSync, statSync, chmodSync } from "fs"; import { tmpdir } from "os"; import { join } from "path"; import { spawnSync } from "child_process"; @@ -265,3 +265,144 @@ describe("gstack-memory-ingest --limit", () => { expect(r.stderr).toContain("--limit requires a positive integer"); }); }); + +// ── Writer regression: gbrain v0.27+ uses `put`, not `put_page` ─────────── + +/** + * Stand up a fake `gbrain` shim on PATH that: + * - advertises `put` in `--help` output (so gbrainAvailable() passes) + * - records `put ` invocations + their stdin to a log + * - rejects `put_page` with a non-zero exit, mimicking real gbrain v0.27+ + * + * If the writer ever regresses to the legacy flag-form, the bulk pass will + * report 0 writes and the assertion on `Wrote: 1` will fail loudly. + */ +function installFakeGbrain(home: string): { binDir: string; logFile: string; stdinFile: string } { + const binDir = join(home, "fake-bin"); + mkdirSync(binDir, { recursive: true }); + const logFile = join(home, "gbrain-calls.log"); + const stdinFile = join(home, "gbrain-stdin.log"); + const script = `#!/usr/bin/env bash +set -euo pipefail +LOG="${logFile}" +STDIN_LOG="${stdinFile}" +case "\${1:-}" in + --help|-h) + cat < [options] + +Commands: + put Write a page (content via stdin, YAML frontmatter for metadata) + search Keyword search across pages + ask Hybrid semantic + keyword query +EOF + exit 0 + ;; + put) + if [ "\${2:-}" = "--help" ]; then + echo "Usage: gbrain put " + exit 0 + fi + echo "put \${2:-}" >> "\$LOG" + { + echo "--- slug=\${2:-} ---" + cat + echo + } >> "\$STDIN_LOG" + exit 0 + ;; + put_page|put-page) + echo "Unknown command: \$1" >&2 + exit 2 + ;; + *) + echo "Unknown command: \${1:-}" >&2 + exit 2 + ;; +esac +`; + const binPath = join(binDir, "gbrain"); + writeFileSync(binPath, script, "utf-8"); + chmodSync(binPath, 0o755); + return { binDir, logFile, stdinFile }; +} + +describe("gstack-memory-ingest writer (gbrain v0.27+ `put` interface)", () => { + it("invokes `gbrain put ` with stdin body, not legacy `put_page`", () => { + const home = makeTestHome(); + const gstackHome = join(home, ".gstack"); + mkdirSync(gstackHome, { recursive: true }); + const { binDir, logFile, stdinFile } = installFakeGbrain(home); + + // Single Claude Code session fixture. --include-unattributed lets it write + // even though there's no resolvable git remote in /tmp. + const session = + `{"type":"user","message":{"role":"user","content":"hi"},"timestamp":"2026-05-01T00:00:00Z","cwd":"/tmp/foo"}\n` + + `{"type":"assistant","message":{"role":"assistant","content":"hello"},"timestamp":"2026-05-01T00:00:01Z"}\n`; + writeClaudeCodeSession(home, "tmp-foo", "abc123", session); + + const r = runScript(["--bulk", "--include-unattributed", "--quiet"], { + HOME: home, + GSTACK_HOME: gstackHome, + PATH: `${binDir}:${process.env.PATH || ""}`, + }); + + expect(r.exitCode).toBe(0); + expect(existsSync(logFile)).toBe(true); + + const calls = readFileSync(logFile, "utf-8"); + expect(calls).toContain("put "); + expect(calls).not.toContain("put_page"); + + // Body should ride stdin and carry frontmatter that gbrain can parse. + // The transcript builder prepends its own frontmatter (agent, session_id, + // etc.) but does NOT include title/type/tags — the writer injects those + // into the existing frontmatter so gbrain pages list/search/filter + // actually surface the page. Asserting all three guards against the + // exact regression that landed in v1.26.0.0 (writer ignored these fields + // entirely; pages landed empty-titled, un-typed, un-tagged). + const stdin = readFileSync(stdinFile, "utf-8"); + expect(stdin).toContain("---"); + expect(stdin).toMatch(/agent:\s+claude-code/); + expect(stdin).toMatch(/title:\s/); + expect(stdin).toMatch(/type:\s+transcript/); + expect(stdin).toMatch(/tags:/); + + rmSync(home, { recursive: true, force: true }); + }); + + it("fails fast when gbrain CLI is missing the `put` subcommand", () => { + const home = makeTestHome(); + const gstackHome = join(home, ".gstack"); + mkdirSync(gstackHome, { recursive: true }); + + // Fake gbrain that ONLY advertises legacy `put_page` (no `put`). + const binDir = join(home, "legacy-bin"); + mkdirSync(binDir, { recursive: true }); + const script = `#!/usr/bin/env bash +case "\${1:-}" in + --help|-h) echo "Commands:"; echo " put_page Write a page (legacy)"; exit 0 ;; + *) echo "Unknown command: \$1" >&2; exit 2 ;; +esac +`; + const binPath = join(binDir, "gbrain"); + writeFileSync(binPath, script, "utf-8"); + chmodSync(binPath, 0o755); + + const session = + `{"type":"user","message":{"role":"user","content":"hi"},"timestamp":"2026-05-01T00:00:00Z","cwd":"/tmp/bar"}\n`; + writeClaudeCodeSession(home, "tmp-bar", "def456", session); + + const r = runScript(["--bulk", "--include-unattributed"], { + HOME: home, + GSTACK_HOME: gstackHome, + PATH: `${binDir}:${process.env.PATH || ""}`, + }); + + // Bulk completes (the script is per-page tolerant), but every page + // surfaces the missing-`put` error rather than the old "Unknown command". + expect(r.stderr + r.stdout).toMatch(/missing `put` subcommand|gbrain CLI not in PATH/); + + rmSync(home, { recursive: true, force: true }); + }); +});