Explorar el Código

fix(installer,cli): refuse to index $HOME / filesystem root (#860)

Running the installer or `codegraph init`/`index` from $HOME auto-indexed the
entire home tree (installer indexes process.cwd() with no guard), producing a
multi-GB ~/.codegraph/codegraph.db; the install dir sharing the ~/.codegraph
name then made every home subdir resolve its root to $HOME. On pre-1.0 macOS the
per-file watcher over that tree exhausted kern.maxfiles and crashed the machine
(#845; the fd blowup was fixed in 1.0.0, this fixes the root cause).

Add unsafeIndexRootReason() and refuse the home dir, a parent of home, and
filesystem roots at the installer auto-index, `init`, and `index`. Overridable
with --force. Closes #845.
Colby Mchenry hace 1 semana
padre
commit
2472508549
Se han modificado 5 ficheros con 136 adiciones y 3 borrados
  1. 1 0
      CHANGELOG.md
  2. 52 0
      __tests__/unsafe-index-root.test.ts
  3. 23 2
      src/bin/codegraph.ts
  4. 47 0
      src/directory.ts
  5. 13 1
      src/installer/index.ts

+ 1 - 0
CHANGELOG.md

@@ -16,6 +16,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ### Fixes
 
 - The CodeGraph MCP server no longer risks getting stuck at 100% CPU after an unexpected internal error. Previously such an error was logged but the process was left running in a broken state, where it could spin a CPU core indefinitely and had to be killed by hand. The server now logs the error and exits cleanly, so a fresh one starts on the next request. Thanks @songhlc. (#850)
+- CodeGraph no longer indexes your entire home directory by accident. Running the installer — or `codegraph init` / `codegraph index` — from your home folder or a filesystem root would index everything underneath it (caches, `Library`, every other project), producing a multi-gigabyte index and constant file-watching churn. CodeGraph now refuses these roots and points you at a specific project instead; pass `--force` if you genuinely mean to. (Combined with the macOS file-descriptor fix already in 1.0.0, this closes the report of a runaway watcher exhausting the system file limit.) Thanks @ligson. (#845)
 
 
 ## [1.0.0] - 2026-06-12

+ 52 - 0
__tests__/unsafe-index-root.test.ts

@@ -0,0 +1,52 @@
+import { describe, it, expect, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import { unsafeIndexRootReason } from '../src/directory';
+
+/**
+ * Guard for #845: the installer / `init` / `index` must refuse the home
+ * directory and filesystem roots, which would otherwise index the entire tree
+ * (multi-GB index, watcher churn, pre-1.0 macOS fd exhaustion that crashed the
+ * machine). The classic trigger was running the installer from `$HOME`.
+ */
+describe('unsafeIndexRootReason', () => {
+  const tmpDirs: string[] = [];
+  afterEach(() => {
+    for (const d of tmpDirs.splice(0)) {
+      try { fs.rmSync(d, { recursive: true, force: true }); } catch { /* ignore */ }
+    }
+  });
+
+  it('flags the home directory', () => {
+    const reason = unsafeIndexRootReason(os.homedir());
+    expect(reason).toBeTruthy();
+    expect(reason).toContain('home');
+  });
+
+  it('flags a parent of the home directory (broader than home)', () => {
+    // dirname(home) is either a parent of home or — for a root-level home like
+    // `/root` — the filesystem root; both are unsafe.
+    expect(unsafeIndexRootReason(path.dirname(os.homedir()))).toBeTruthy();
+  });
+
+  it.runIf(process.platform !== 'win32')('flags the POSIX filesystem root', () => {
+    expect(unsafeIndexRootReason('/')).toContain('filesystem root');
+  });
+
+  it('allows a normal project directory', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-unsafe-'));
+    tmpDirs.push(dir);
+    expect(unsafeIndexRootReason(dir)).toBeNull();
+    // …and a nested subdir of it.
+    const nested = path.join(dir, 'packages', 'app');
+    fs.mkdirSync(nested, { recursive: true });
+    expect(unsafeIndexRootReason(nested)).toBeNull();
+  });
+
+  it('matches the home directory case-insensitively on macOS/Windows', () => {
+    if (process.platform !== 'darwin' && process.platform !== 'win32') return;
+    // The FS is case-insensitive there, so an upper-cased home path must still flag.
+    expect(unsafeIndexRootReason(os.homedir().toUpperCase())).toBeTruthy();
+  });
+});

+ 23 - 2
src/bin/codegraph.ts

@@ -26,7 +26,7 @@
 import { Command } from 'commander';
 import * as path from 'path';
 import * as fs from 'fs';
-import { getCodeGraphDir, isInitialized } from '../directory';
+import { getCodeGraphDir, isInitialized, unsafeIndexRootReason } from '../directory';
 import { detectWorktreeIndexMismatch, worktreeMismatchWarning } from '../sync/worktree';
 import { createShimmerProgress } from '../ui/shimmer-progress';
 import { getGlyphs } from '../ui/glyphs';
@@ -455,14 +455,27 @@ program
   .command('init [path]')
   .description('Initialize CodeGraph in a project directory and build the initial index')
   .option('-i, --index', 'Deprecated: indexing now runs by default; flag accepted for backward compatibility')
+  .option('-f, --force', 'Initialize even if the path looks like your home directory or a filesystem root')
   .option('-v, --verbose', 'Show detailed worker lifecycle and memory info')
-  .action(async (pathArg: string | undefined, options: { index?: boolean; verbose?: boolean }) => {
+  .action(async (pathArg: string | undefined, options: { index?: boolean; force?: boolean; verbose?: boolean }) => {
     const projectPath = path.resolve(pathArg || process.cwd());
     const clack = await importESM('@clack/prompts');
 
     clack.intro('Initializing CodeGraph');
 
     try {
+      // Refuse to index your home directory / a filesystem root — it pulls in
+      // caches, other projects, and your whole tree (a multi-GB index + watcher
+      // churn, and on pre-1.0 macOS a machine-crashing fd blowup, #845).
+      const unsafe = unsafeIndexRootReason(projectPath);
+      if (unsafe && !options.force) {
+        clack.log.error(`Refusing to initialize in ${projectPath} — it looks like ${unsafe}.`);
+        clack.log.info('Run this inside a specific project directory, or pass --force if you really mean to index everything under it.');
+        clack.outro('');
+        process.exitCode = 1;
+        return;
+      }
+
       if (isInitialized(projectPath)) {
         clack.log.warn(`Already initialized in ${projectPath}`);
         clack.log.info('Use "codegraph index" to re-index or "codegraph sync" to update');
@@ -585,6 +598,14 @@ program
     const projectPath = resolveProjectPath(pathArg);
 
     try {
+      // Don't (re)index your home directory / a filesystem root (#845). --force
+      // (already "force full re-index") doubles as the override.
+      const unsafe = unsafeIndexRootReason(projectPath);
+      if (unsafe && !options.force) {
+        error(`Refusing to index ${projectPath} — it looks like ${unsafe}. Pass --force to override.`);
+        process.exit(1);
+      }
+
       if (!isInitialized(projectPath)) {
         error(`CodeGraph not initialized in ${projectPath}`);
         info('Run "codegraph init" first');

+ 47 - 0
src/directory.ts

@@ -5,6 +5,7 @@
  */
 
 import * as fs from 'fs';
+import * as os from 'os';
 import * as path from 'path';
 
 /** The default per-project data directory name. */
@@ -108,6 +109,52 @@ export function isInitialized(projectRoot: string): boolean {
  * @param startPath - Directory to start searching from
  * @returns The project root containing .codegraph/, or null if not found
  */
+/**
+ * Reason a directory is unsafe to use as an index ROOT, or null when it's fine.
+ *
+ * Indexing your home directory or a filesystem root drags in caches, `Library`,
+ * every other project, etc. — a multi-GB index, constant file-watcher churn, and
+ * (pre-1.0 on macOS) a file-descriptor blowup that exhausted `kern.maxfiles` and
+ * took unrelated apps / the whole machine down (#845). The classic trigger:
+ * running the installer or `codegraph init` from `$HOME`, which auto-indexes the
+ * current directory. These are never intended project roots, so the installer
+ * and `init`/`index` refuse them (overridable with `--force`).
+ *
+ * Pure-ish (reads only `os.homedir()` + realpath) so it's easy to unit-test.
+ * The returned string is a human phrase that slots into "… looks like {reason}".
+ */
+export function unsafeIndexRootReason(projectRoot: string): string | null {
+  const resolve = (p: string): string => {
+    try {
+      return fs.realpathSync(path.resolve(p));
+    } catch {
+      return path.resolve(p);
+    }
+  };
+  const resolved = resolve(projectRoot);
+
+  // Filesystem root: `/` on POSIX, a drive root like `C:\` on Windows.
+  if (path.parse(resolved).root === resolved) {
+    return 'the filesystem root';
+  }
+
+  const home = resolve(os.homedir());
+  // Case-insensitive on macOS/Windows (case-preserving but case-insensitive FS).
+  const norm = (p: string): string =>
+    process.platform === 'darwin' || process.platform === 'win32' ? p.toLowerCase() : p;
+  const r = norm(resolved);
+  const h = norm(home);
+
+  if (r === h) {
+    return 'your home directory';
+  }
+  // An ancestor of home (e.g. `/Users`, `/home`) — even broader than home.
+  if (h.startsWith(r + path.sep)) {
+    return 'a parent of your home directory';
+  }
+  return null;
+}
+
 export function findNearestCodeGraphRoot(startPath: string): string | null {
   let current = path.resolve(startPath);
   const root = path.parse(current).root;

+ 13 - 1
src/installer/index.ts

@@ -28,7 +28,7 @@ import { getGlyphs } from '../ui/glyphs';
 // installer must stay importable even when native modules can't load).
 import { watchDisabledReason } from '../sync/watch-policy';
 import { isGitRepo, isSyncHookInstalled, installGitSyncHook } from '../sync/git-hooks';
-import { getCodeGraphDir, codeGraphDirName } from '../directory';
+import { getCodeGraphDir, codeGraphDirName, unsafeIndexRootReason } from '../directory';
 import { getTelemetry, recordIndexEvent, TELEMETRY_DOCS } from '../telemetry';
 
 // Backwards-compat: keep these named exports — downstream code may
@@ -501,6 +501,18 @@ async function initializeLocalProject(
 ): Promise<void> {
   const projectPath = process.cwd();
 
+  // Never auto-index the home directory or a filesystem root. Running the
+  // installer from `$HOME` would otherwise index the entire home tree — a
+  // multi-GB index, constant watcher churn, and (pre-1.0 on macOS) fd
+  // exhaustion that crashed the machine (#845). The install itself still
+  // completes; we just skip the auto-index and point them at a real project.
+  const unsafe = unsafeIndexRootReason(projectPath);
+  if (unsafe) {
+    clack.log.warn(`Skipping automatic indexing — ${projectPath} looks like ${unsafe}.`);
+    clack.log.info('Indexing it would pull in caches, other projects, and your whole tree. Run "codegraph init" inside a specific project instead.');
+    return;
+  }
+
   let CodeGraph: typeof import('../index').default;
   try {
     CodeGraph = (await import('../index')).default;