Bläddra i källkod

fix(install): prune old version bundles instead of piling them up (#1074) (#1075)

install.sh kept each release in its own versions/<v> dir (~50 MB with the
vendored Node runtime) and only moved the `current` symlink, so old versions
accumulated forever across upgrades. Keep only the just-installed version and
delete the rest; `codegraph upgrade` re-runs install.sh, so this covers
upgrades too. The npm-shim self-heal cache (~/.codegraph/bundles/) prunes the
same way. Windows installs overwrite a single dir in place and were never
affected.

Validated real-world on macOS, Linux (Docker/dash), and Windows (VM): a
v1.1.2 -> v1.1.4 install leaves only the latest behind.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Colby Mchenry 1 dag sedan
förälder
incheckning
31a58070c8
5 ändrade filer med 209 tillägg och 2 borttagningar
  1. 1 0
      CHANGELOG.md
  2. 113 0
      __tests__/install-sh-prune.test.ts
  3. 45 0
      __tests__/npm-shim.test.ts
  4. 27 1
      install.sh
  5. 23 1
      scripts/npm-shim.js

+ 1 - 0
CHANGELOG.md

@@ -11,6 +11,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- The standalone installer (`install.sh`) no longer leaves old versions piling up on disk. Each upgrade installed the new release into its own directory and re-pointed the launcher at it, but never removed the previous ones — so on macOS and Linux a full vendored Node runtime (tens of MB per version) accumulated with every update. The installer now keeps only the version it just installed and removes the older ones automatically (the npm installer's download-fallback cache prunes the same way). Windows installs already replaced a single directory in place, so they were never affected. Anything still left behind under `~/.codegraph/versions` from earlier upgrades is safe to delete. Thanks @lalanbv for the report. (#1074)
 - `codegraph index` can now rebuild an existing oversized index from an older version, instead of hanging until the watchdog kills it. The previous fix (#1065) stopped *new* indexes from sweeping in a gitignored corpus of nested repos, but a project that had already built the multi-gigabyte graph before upgrading couldn't recover: `codegraph index` is meant to rebuild from scratch, yet it cleared the old graph by deleting every row one at a time, and on a graph of well over a million symbols that took longer than the 60-second responsiveness watchdog allows — so the command was killed before indexing even started, leaving the bad index in place. A full re-index now discards the old database outright and starts fresh, which is near-instant regardless of the old size and also frees the disk the bloated database was holding. Thanks @AriaShishegaran for the detailed follow-up report. (#1067)
 
 ## [1.1.5] - 2026-06-30

+ 113 - 0
__tests__/install-sh-prune.test.ts

@@ -0,0 +1,113 @@
+/**
+ * install.sh version-prune tests (issue #1074).
+ *
+ * The standalone installer keeps each release in its own `versions/<v>` dir and
+ * — before this fix — never removed the old ones, so they piled up (~50 MB of
+ * vendored Node runtime each) across upgrades. `install.sh` now prunes every
+ * `versions/*` dir except the one it just installed.
+ *
+ * Rather than duplicate the shell (which would drift from the shipped script),
+ * these tests extract the REAL prune block from `install.sh` — between its
+ * `CODEGRAPH_PRUNE_OLD_VERSIONS` markers — and run it under `sh` against a temp
+ * fixture, with `$INSTALL_DIR` / `$dest` injected. No network, no download.
+ *
+ * POSIX only: the block is `/bin/sh`. Windows installs overwrite a single dir in
+ * place (install.ps1) and never reach this code, so there's nothing to prune.
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { spawnSync } from 'child_process';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+
+const INSTALL_SH = path.join(__dirname, '..', 'install.sh');
+const START = '# >>> CODEGRAPH_PRUNE_OLD_VERSIONS';
+const END = '# <<< CODEGRAPH_PRUNE_OLD_VERSIONS';
+
+/** Pull the exact prune block out of the shipped install.sh (no duplication). */
+function extractPruneBlock(): string {
+  const lines = fs.readFileSync(INSTALL_SH, 'utf8').split('\n');
+  const i = lines.findIndex((l) => l.trim() === START);
+  const j = lines.findIndex((l) => l.trim() === END);
+  if (i < 0 || j < 0 || j <= i) {
+    throw new Error('CODEGRAPH_PRUNE_OLD_VERSIONS markers not found in install.sh');
+  }
+  return lines.slice(i + 1, j).join('\n');
+}
+
+/** Single-quote a path for safe interpolation into the sh script. */
+function shq(s: string): string {
+  return `'${s.replace(/'/g, `'\\''`)}'`;
+}
+
+/** Run the real prune block with INSTALL_DIR/dest set, return code + stdout. */
+function runPrune(installDir: string, dest: string): { code: number; stdout: string } {
+  const script = `set -eu\nINSTALL_DIR=${shq(installDir)}\ndest=${shq(dest)}\n${extractPruneBlock()}\n`;
+  const r = spawnSync('sh', ['-c', script], { encoding: 'utf8' });
+  return { code: r.status ?? -1, stdout: r.stdout ?? '' };
+}
+
+/** Create a versions/<v>/bin dir with a dummy launcher, like a real bundle. */
+function seedVersion(installDir: string, version: string): string {
+  const dir = path.join(installDir, 'versions', version);
+  fs.mkdirSync(path.join(dir, 'bin'), { recursive: true });
+  fs.writeFileSync(path.join(dir, 'bin', 'codegraph'), '#!/bin/sh\n');
+  return dir;
+}
+
+describe.skipIf(process.platform === 'win32')('install.sh version prune (#1074)', () => {
+  let installDir: string;
+
+  beforeEach(() => {
+    installDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-prune-'));
+  });
+  afterEach(() => {
+    fs.rmSync(installDir, { recursive: true, force: true });
+  });
+
+  it('removes older version dirs and keeps only the just-installed one', () => {
+    seedVersion(installDir, 'v1.1.2');
+    seedVersion(installDir, 'v1.1.3');
+    const dest = seedVersion(installDir, 'v1.1.4');
+    fs.symlinkSync(dest, path.join(installDir, 'current'));
+
+    const { code, stdout } = runPrune(installDir, dest);
+
+    expect(code).toBe(0);
+    const remaining = fs.readdirSync(path.join(installDir, 'versions')).sort();
+    expect(remaining).toEqual(['v1.1.4']);
+    expect(stdout).toContain('Removed    2 older version(s)');
+    // The `current` symlink (outside versions/) is never globbed → untouched.
+    expect(fs.existsSync(path.join(installDir, 'current'))).toBe(true);
+    expect(fs.realpathSync(path.join(installDir, 'current'))).toBe(fs.realpathSync(dest));
+  });
+
+  it('is a silent no-op when the just-installed version is the only one', () => {
+    const dest = seedVersion(installDir, 'v1.1.4');
+
+    const { code, stdout } = runPrune(installDir, dest);
+
+    expect(code).toBe(0);
+    expect(fs.readdirSync(path.join(installDir, 'versions'))).toEqual(['v1.1.4']);
+    expect(stdout).not.toContain('Removed');
+  });
+
+  it('does not error when there is no versions/ dir yet', () => {
+    const dest = path.join(installDir, 'versions', 'v1.1.4'); // never created
+    const { code, stdout } = runPrune(installDir, dest);
+    expect(code).toBe(0);
+    expect(stdout).not.toContain('Removed');
+  });
+
+  it('reports the count when several older versions are present', () => {
+    for (const v of ['v1.0.0', 'v1.1.0', 'v1.1.1', 'v1.1.2', 'v1.1.3']) seedVersion(installDir, v);
+    const dest = seedVersion(installDir, 'v1.1.4');
+
+    const { code, stdout } = runPrune(installDir, dest);
+
+    expect(code).toBe(0);
+    expect(fs.readdirSync(path.join(installDir, 'versions'))).toEqual(['v1.1.4']);
+    expect(stdout).toContain('Removed    5 older version(s)');
+  });
+});

+ 45 - 0
__tests__/npm-shim.test.ts

@@ -103,6 +103,33 @@ describe.skipIf(isWindows)('npm-shim launcher', () => {
     expect(r.stderr).toBe('');
   });
 
+  it('prunes older cached bundles for this target, keeping the current one (#1074)', async () => {
+    const pkg = makePkg('2.0.0-keep');
+    const cache = mkTmp('cache');
+    const bundles = path.join(cache, 'bundles');
+    // current (matches pkg version) + an older bundle for the same target
+    writeLauncher(path.join(bundles, `${target}-2.0.0-keep`, 'bin'));
+    writeLauncher(path.join(bundles, `${target}-1.0.0-old`, 'bin'));
+    // a different platform's bundle and an in-flight staging dir must survive
+    const otherTarget = target === 'linux-x64' ? 'darwin-arm64' : 'linux-x64';
+    writeLauncher(path.join(bundles, `${otherTarget}-1.0.0`, 'bin'));
+    fs.mkdirSync(path.join(bundles, '.dl-inflight'), { recursive: true });
+
+    const r = await runShim(pkg, ['--probe-prune'], {
+      CODEGRAPH_INSTALL_DIR: cache,
+      CODEGRAPH_NO_DOWNLOAD: '1',
+    });
+
+    expect(r.status).toBe(0);
+    expect(r.stdout).toContain('FAKE_BUNDLE_RAN');
+    // older same-target bundle pruned; current kept
+    expect(fs.existsSync(path.join(bundles, `${target}-1.0.0-old`))).toBe(false);
+    expect(fs.existsSync(path.join(bundles, `${target}-2.0.0-keep`))).toBe(true);
+    // unrelated target + staging dir untouched
+    expect(fs.existsSync(path.join(bundles, `${otherTarget}-1.0.0`))).toBe(true);
+    expect(fs.existsSync(path.join(bundles, '.dl-inflight'))).toBe(true);
+  });
+
   it('prints actionable guidance and exits 1 when disabled with no bundle', async () => {
     const pkg = makePkg();
     const r = await runShim(pkg, ['--version'], {
@@ -182,6 +209,24 @@ describe.skipIf(!CAN_NET)('npm-shim download fallback (local HTTPS)', () => {
     expect(fs.existsSync(path.join(cache, 'bundles', `${target}-5.0.0-net`, 'bin', 'codegraph'))).toBe(true);
   }, 20000);
 
+  it('prunes older cached bundles after downloading a new one (#1074)', async () => {
+    sumsBody = `${fixtureSha}  ${asset}\n`;
+    const pkg = makePkg('6.0.0-new');
+    const cache = mkTmp('cache');
+    const bundles = path.join(cache, 'bundles');
+    // a stale bundle from a previous version (same target) left by an earlier run
+    writeLauncher(path.join(bundles, `${target}-5.0.0-stale`, 'bin'));
+
+    const r = await runShim(pkg, ['--probe-newdl'], netEnv(cache));
+
+    expect(r.status).toBe(0);
+    expect(r.stderr).toContain('downloading');
+    expect(r.stdout).toContain('FAKE_BUNDLE_RAN');
+    // freshly downloaded version present, stale one pruned
+    expect(fs.existsSync(path.join(bundles, `${target}-6.0.0-new`, 'bin', 'codegraph'))).toBe(true);
+    expect(fs.existsSync(path.join(bundles, `${target}-5.0.0-stale`))).toBe(false);
+  }, 20000);
+
   it('aborts (exit 1) on a checksum mismatch and caches nothing', async () => {
     sumsBody = `${'0'.repeat(64)}  ${asset}\n`;
     const pkg = makePkg('5.0.0-bad');

+ 27 - 1
install.sh

@@ -84,7 +84,33 @@ ln -sfn "$dest" "$INSTALL_DIR/current"
 echo "Installed to $dest"
 echo "Linked     $BIN_DIR/codegraph"
 
-# 5. PATH sanity. Two ways this install can fail to be the codegraph that runs:
+# 5. Prune older bundles so they don't pile up across upgrades (issue #1074).
+# Each release lives in its own versions/<v> dir (~50 MB with the vendored Node
+# runtime). `codegraph upgrade` re-runs this script, which drops in a new dir
+# and re-points `current` + the launcher — but it never removed the old dirs, so
+# they accumulated indefinitely. Keep only what we just installed ($dest) and
+# delete the rest. Safe even if a daemon is still executing an older bundle: on
+# POSIX the inode stays alive until that process exits, so removing the dir can't
+# break a running process. (Windows installs overwrite a single dir in place and
+# never reach this.) The markers below let a unit test run this exact block.
+# >>> CODEGRAPH_PRUNE_OLD_VERSIONS
+pruned=0
+if [ -d "$INSTALL_DIR/versions" ]; then
+  for d in "$INSTALL_DIR/versions"/*; do
+    [ -d "$d" ] || continue
+    if [ "$d" != "$dest" ]; then
+      if rm -rf "$d"; then
+        pruned=$((pruned + 1))
+      fi
+    fi
+  done
+fi
+if [ "$pruned" -gt 0 ]; then
+  echo "Removed    $pruned older version(s)"
+fi
+# <<< CODEGRAPH_PRUNE_OLD_VERSIONS
+
+# 6. PATH sanity. Two ways this install can fail to be the codegraph that runs:
 #   1. $BIN_DIR isn't on PATH at all.
 #   2. A *different* codegraph sits earlier on PATH and shadows ours — most
 #      often a stale `npm i -g @colbymchenry/codegraph`, whose launcher keeps

+ 23 - 1
scripts/npm-shim.js

@@ -105,7 +105,7 @@ async function selfHealBundle() {
   // Already downloaded by a previous run? Use it even when downloads are
   // disabled — CODEGRAPH_NO_DOWNLOAD blocks fetching, not a cached bundle.
   var cached = launcherIn(dest);
-  if (cached) return cached;
+  if (cached) { pruneOldBundles(bundlesDir, dest); return cached; }
 
   if (process.env.CODEGRAPH_NO_DOWNLOAD) {
     fail('the network fallback is disabled (CODEGRAPH_NO_DOWNLOAD is set).');
@@ -149,6 +149,7 @@ async function selfHealBundle() {
 
   var ready = launcherIn(dest);
   if (!ready) fail('downloaded bundle is missing its launcher under ' + dest + '.');
+  pruneOldBundles(bundlesDir, dest);
   process.stderr.write('codegraph: bundle ready.\n');
   return ready;
 }
@@ -230,6 +231,27 @@ function rmrf(p) {
   try { fs.rmSync(p, { recursive: true, force: true }); } catch (e) { /* best effort */ }
 }
 
+// Drop sibling bundles for OTHER versions of this same platform target, keeping
+// only keepDir. The self-heal cache otherwise accumulates a full ~50 MB bundle
+// per version forever (issue #1074). Best-effort: a locked/busy dir (a
+// concurrent run still mapping an older node.exe on Windows) just stays — rmrf
+// already swallows its own errors, and the readdir is guarded — so cleanup can
+// never break a working command. Only this target's "<target>-<version>" dirs
+// are touched; other platforms' bundles and the ".dl-*" staging dirs are left
+// alone.
+function pruneOldBundles(bundlesDir, keepDir) {
+  var keep = path.basename(keepDir);
+  try {
+    var names = fs.readdirSync(bundlesDir);
+    for (var i = 0; i < names.length; i++) {
+      var name = names[i];
+      if (name === keep) continue;
+      if (name.indexOf(target + '-') !== 0) continue;
+      rmrf(path.join(bundlesDir, name));
+    }
+  } catch (e) { /* best effort — never break a working run over cleanup */ }
+}
+
 function fail(reason) {
   process.stderr.write(
     'codegraph: no prebuilt bundle for ' + target + '.\n' +