mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-13 07:53:04 +08:00
Merge remote-tracking branch 'origin/main' into garrytan/gstack-upgrade-2026-05-03
# Conflicts: # CHANGELOG.md # VERSION
This commit is contained in:
@@ -33,6 +33,7 @@ import { existsSync, statSync, mkdirSync, writeFileSync, readFileSync, unlinkSyn
|
||||
import { join, dirname } from "path";
|
||||
import { execSync, execFileSync, spawnSync } from "child_process";
|
||||
import { homedir } from "os";
|
||||
import { createHash } from "crypto";
|
||||
|
||||
import { detectEngineTier, withErrorContext, canonicalizeRemote } from "../lib/gstack-memory-helpers";
|
||||
import { sourcePageCount } from "../lib/gbrain-sources";
|
||||
@@ -158,20 +159,51 @@ function originUrl(): string | null {
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive a stable source id for the cwd code corpus. Pattern: `gstack-code-<slug>`,
|
||||
* where <slug> comes from canonicalizeRemote() then `/` → `-` (e.g.,
|
||||
* `github.com/garrytan/gstack` → `gstack-code-github-com-garrytan-gstack`).
|
||||
* Derive a stable source id for the cwd code corpus. Pattern: `gstack-code-<slug>`.
|
||||
*
|
||||
* Falls back to `gstack-code-<basename(repo)>` when there is no origin (local repo).
|
||||
* gbrain enforces source ids to be 1-32 lowercase alnum chars with optional interior
|
||||
* hyphens. We use the last two segments of the canonical remote (org/repo) and skip
|
||||
* the host — `github.com` etc. is the same for nearly every user and just eats budget.
|
||||
* If the resulting id still exceeds 32 chars, we keep the tail (most distinctive end)
|
||||
* and append a 6-char hash of the full slug for collision resistance.
|
||||
*
|
||||
* Falls back to the repo basename when there is no origin (local repo).
|
||||
*/
|
||||
function deriveCodeSourceId(repoPath: string): string {
|
||||
const remote = canonicalizeRemote(originUrl());
|
||||
if (remote) {
|
||||
return `gstack-code-${remote.replace(/[\/\s]+/g, "-").replace(/-+/g, "-")}`;
|
||||
const segs = remote.split("/").filter(Boolean);
|
||||
const slugSource = segs.slice(-2).join("-");
|
||||
return constrainSourceId("gstack-code", slugSource);
|
||||
}
|
||||
// Fallback for repos without a remote.
|
||||
const base = repoPath.split("/").pop() || "repo";
|
||||
return `gstack-code-${base.toLowerCase().replace(/[^a-z0-9-]+/g, "-").replace(/-+/g, "-")}`;
|
||||
return constrainSourceId("gstack-code", base);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a gbrain-valid source id (1-32 lowercase alnum + interior hyphens). Sanitizes
|
||||
* `raw`, prefixes with `prefix`, and falls back to a hashed-tail form when total length
|
||||
* would exceed 32 chars.
|
||||
*/
|
||||
function constrainSourceId(prefix: string, raw: string): string {
|
||||
const MAX = 32;
|
||||
const slug = raw.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
|
||||
// Empty slug after sanitize (e.g. raw was all non-alnum like "___") would
|
||||
// produce "${prefix}-" which fails gbrain's validator on the trailing
|
||||
// hyphen. Fall back to a deterministic hash of the original input so the
|
||||
// result is stable across runs of the same repo.
|
||||
if (!slug) {
|
||||
const hash = createHash("sha1").update(raw || "_empty").digest("hex").slice(0, 6);
|
||||
return `${prefix}-${hash}`;
|
||||
}
|
||||
const full = `${prefix}-${slug}`;
|
||||
if (full.length <= MAX) return full;
|
||||
const hash = createHash("sha1").update(slug).digest("hex").slice(0, 6);
|
||||
// Total budget: prefix + "-" + tail + "-" + hash
|
||||
const tailBudget = MAX - prefix.length - 2 - hash.length;
|
||||
if (tailBudget < 1) return `${prefix}-${hash}`;
|
||||
const tail = slug.slice(-tailBudget).replace(/^-+|-+$/g, "");
|
||||
return tail ? `${prefix}-${tail}-${hash}` : `${prefix}-${hash}`;
|
||||
}
|
||||
|
||||
function gbrainAvailable(): boolean {
|
||||
|
||||
@@ -34,8 +34,9 @@
|
||||
* keep V1 ship-tight. See TODOS.md.
|
||||
*
|
||||
* V1.5 NOTE: When `gbrain put_file` ships in the gbrain CLI (cross-repo P0 TODO),
|
||||
* transcripts will route to Supabase Storage instead of put_page. Until then, all
|
||||
* content rides put_page; gbrain's native dedup keys on session_id.
|
||||
* transcripts will route to Supabase Storage instead of the page-write path.
|
||||
* Until then, all content rides `gbrain put <slug>` (stdin, YAML frontmatter for
|
||||
* title/type/tags); gbrain's native dedup keys on session_id.
|
||||
*/
|
||||
|
||||
import {
|
||||
@@ -745,14 +746,25 @@ function buildArtifactPage(path: string, type: MemoryType): PageRecord {
|
||||
};
|
||||
}
|
||||
|
||||
// ── Writer (calls gbrain put_page) ─────────────────────────────────────────
|
||||
// ── Writer (calls `gbrain put`) ────────────────────────────────────────────
|
||||
|
||||
let _gbrainAvailability: boolean | null = null;
|
||||
function gbrainAvailable(): boolean {
|
||||
if (_gbrainAvailability !== null) return _gbrainAvailability;
|
||||
try {
|
||||
execSync("command -v gbrain", { stdio: "ignore" });
|
||||
_gbrainAvailability = true;
|
||||
// gbrain v0.27 retired the legacy `put_page` flag-form for `put <slug>`
|
||||
// (content via stdin, metadata as YAML frontmatter). Probe `--help` for
|
||||
// the `put` subcommand so we surface a single clean error here rather
|
||||
// than failing every page with "Unknown command: put_page". The regex
|
||||
// anchors on the indented subcommand format gbrain's help actually uses
|
||||
// (" put ..."), not any whitespace-bordered "put" word in prose.
|
||||
const help = execFileSync("gbrain", ["--help"], {
|
||||
encoding: "utf-8",
|
||||
timeout: 5000,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
_gbrainAvailability = /^\s+put\s/m.test(help);
|
||||
} catch {
|
||||
_gbrainAvailability = false;
|
||||
}
|
||||
@@ -761,25 +773,63 @@ function gbrainAvailable(): boolean {
|
||||
|
||||
function gbrainPutPage(page: PageRecord): { ok: boolean; error?: string } {
|
||||
if (!gbrainAvailable()) {
|
||||
return { ok: false, error: "gbrain CLI not in PATH" };
|
||||
return { ok: false, error: "gbrain CLI not in PATH or missing `put` subcommand" };
|
||||
}
|
||||
// gbrain v0.27+ uses `put <slug>` (positional, content via stdin) instead
|
||||
// of the legacy `put_page` flag form. Metadata rides as YAML frontmatter:
|
||||
// - When the page body already starts with frontmatter (transcripts), inject
|
||||
// title/type/tags into the existing block so gbrain's frontmatter parser
|
||||
// picks them up.
|
||||
// - When the page body has no frontmatter (raw artifacts: design-docs,
|
||||
// learnings, builder-profile-entries), wrap with a fresh frontmatter
|
||||
// carrying the same fields. Without this branch, artifact pages would
|
||||
// land in gbrain with empty title/type/tags.
|
||||
let body = page.body;
|
||||
if (body.startsWith("---\n")) {
|
||||
// Locate the closing --- delimiter. buildTranscriptPage joins with "\n"
|
||||
// and does not append a trailing newline, so the close fence looks like
|
||||
// "...\n---" followed directly by body content (no "\n---\n" pattern).
|
||||
// Match the close on "\n---" only — the inject lands BEFORE the close
|
||||
// fence, inside the frontmatter block, regardless of what follows it.
|
||||
const end = body.indexOf("\n---", 4);
|
||||
if (end > 0) {
|
||||
const inject = [
|
||||
`title: ${JSON.stringify(page.title)}`,
|
||||
`type: ${page.type}`,
|
||||
`tags:`,
|
||||
...page.tags.map((t) => ` - ${t}`),
|
||||
].join("\n");
|
||||
body = body.slice(0, end) + "\n" + inject + body.slice(end);
|
||||
}
|
||||
} else {
|
||||
body = [
|
||||
"---",
|
||||
`title: ${JSON.stringify(page.title)}`,
|
||||
`type: ${page.type}`,
|
||||
`tags: [${page.tags.map((t) => JSON.stringify(t)).join(", ")}]`,
|
||||
"---",
|
||||
"",
|
||||
body,
|
||||
].join("\n");
|
||||
}
|
||||
try {
|
||||
const args = [
|
||||
"put_page",
|
||||
"--slug", page.slug,
|
||||
"--title", page.title,
|
||||
"--type", page.type,
|
||||
"--tags", page.tags.join(","),
|
||||
];
|
||||
execFileSync("gbrain", args, {
|
||||
input: page.body,
|
||||
execFileSync("gbrain", ["put", page.slug], {
|
||||
input: body,
|
||||
encoding: "utf-8",
|
||||
timeout: 30000,
|
||||
// Bumped from 30s: auto-link reconciliation on dense transcripts hits
|
||||
// 30s once the brain has a few hundred existing pages.
|
||||
timeout: 60000,
|
||||
// Bumped from default 1MB: without this, gbrain's actual stderr gets
|
||||
// truncated and callers see only "Command failed:" with no detail.
|
||||
maxBuffer: 16 * 1024 * 1024,
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
});
|
||||
return { ok: true };
|
||||
} catch (err) {
|
||||
return { ok: false, error: err instanceof Error ? err.message : String(err) };
|
||||
} catch (err: any) {
|
||||
const stderr = err?.stderr?.toString?.() ?? "";
|
||||
const stdout = err?.stdout?.toString?.() ?? "";
|
||||
const detail = stderr || stdout || (err instanceof Error ? err.message : String(err));
|
||||
return { ok: false, error: detail.split("\n")[0].slice(0, 300) };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user