Explorar el Código

fix(watcher): exclude ignored dirs before watching to prevent inotify exhaustion (#276)

The file watcher registered a recursive watch over the entire project (node_modules, build output, caches included) and filtered only in the callback — exhausting the Linux inotify budget on large repos (#276). It now uses chokidar and excludes the same directories the indexer ignores (built-in default-ignore set + the project .gitignore) BEFORE registering a watch, so the watch count on a 900-dir node_modules drops from ~1200 to ~14 even with no .gitignore. Stacks with the shared daemon (#411): one watcher across agents, now small.

Also hardens the #411 daemon lockfile against a concurrent-startup race the new watcher timing made reproducible — the lock is now created atomically with its content (temp-write + hard-link), so racing daemons can never both win. Validated on macOS, Linux (Docker), and Windows (chokidar + fs.linkSync on NTFS).

Co-Authored-By: Colby McHenry <me@colbymchenry.com>
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Lorenzo Feng hace 4 semanas
padre
commit
b09b23cf54
Se han modificado 7 ficheros con 202 adiciones y 84 borrados
  1. 14 0
      CHANGELOG.md
  2. 30 0
      __tests__/watcher.test.ts
  3. 31 2
      package-lock.json
  4. 1 0
      package.json
  5. 1 1
      src/extraction/index.ts
  6. 40 34
      src/mcp/daemon.ts
  7. 85 47
      src/sync/watcher.ts

+ 14 - 0
CHANGELOG.md

@@ -34,6 +34,20 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
   already attached to the old daemon keep using it while new sessions run
   standalone until it idles out — they never mix versions over the socket.
 
+### Fixed
+- **The file watcher no longer exhausts the OS file-watch budget on large
+  repos (#276).** It used to register a recursive watch over the *entire*
+  project — `node_modules/`, build output, caches and all — and filter only
+  after the fact. On Linux that meant hundreds of thousands of inotify watches
+  per project; enough that a second project, or codegraph alongside your editor
+  / `next dev`, could hit the per-user ceiling and fail with "OS file watch
+  limit reached." The watcher now excludes the same directories the indexer
+  ignores (the built-in default-ignore set **plus** your `.gitignore`) *before*
+  registering a watch — so on a repo with a 900-directory `node_modules` the
+  watch count drops from ~1,200 to ~14, even when the project has no
+  `.gitignore`. (Stacks with the shared daemon from #411: one watcher across
+  agents, and now that watcher is small.)
+
 ## [0.9.5] - 2026-05-25
 
 ### Fixed

+ 30 - 0
__tests__/watcher.test.ts

@@ -166,6 +166,36 @@ describe('FileWatcher', () => {
 
       watcher.stop();
     });
+
+    it('should not watch node_modules even without a .gitignore (#276/#417)', async () => {
+      // No .gitignore in testDir — exclusion relies on the built-in
+      // default-ignore set the indexer uses (buildDefaultIgnore), which a
+      // .gitignore-only filter would miss.
+      fs.mkdirSync(path.join(testDir, 'node_modules', 'dep', 'lib'), { recursive: true });
+      fs.writeFileSync(path.join(testDir, 'node_modules', 'dep', 'index.ts'), 'export const dep = 1;');
+
+      const syncFn = vi.fn().mockResolvedValue({ filesChanged: 0, durationMs: 0 });
+      const watcher = new FileWatcher(testDir, syncFn, { debounceMs: 200 });
+      watcher.start();
+
+      // Let the watcher settle past any residual crawl events.
+      await new Promise((r) => setTimeout(r, 400));
+      syncFn.mockClear();
+
+      // A source-extension edit INSIDE node_modules must NOT trigger a sync —
+      // the directory was never watched.
+      fs.writeFileSync(path.join(testDir, 'node_modules', 'dep', 'lib', 'extra.ts'), 'export const e = 2;');
+      await new Promise((r) => setTimeout(r, 600));
+      expect(syncFn).not.toHaveBeenCalled();
+
+      // Positive control: a real source edit still triggers sync, proving the
+      // watcher is live (not merely inert).
+      fs.writeFileSync(path.join(testDir, 'src', 'live.ts'), 'export const live = 3;');
+      await waitFor(() => syncFn.mock.calls.length > 0, 5000);
+      expect(syncFn).toHaveBeenCalled();
+
+      watcher.stop();
+    });
   });
 
   describe('callbacks', () => {

+ 31 - 2
package-lock.json

@@ -1,15 +1,16 @@
 {
   "name": "@colbymchenry/codegraph",
-  "version": "0.9.3",
+  "version": "0.9.4",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "@colbymchenry/codegraph",
-      "version": "0.9.3",
+      "version": "0.9.4",
       "license": "MIT",
       "dependencies": {
         "@clack/prompts": "^1.3.0",
+        "chokidar": "^4.0.3",
         "commander": "^14.0.2",
         "fast-string-width": "^3.0.2",
         "fast-wrap-ansi": "^0.2.0",
@@ -1004,6 +1005,21 @@
         "node": ">= 16"
       }
     },
+    "node_modules/chokidar": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz",
+      "integrity": "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==",
+      "license": "MIT",
+      "dependencies": {
+        "readdirp": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 14.16.0"
+      },
+      "funding": {
+        "url": "https://paulmillr.com/funding/"
+      }
+    },
     "node_modules/commander": {
       "version": "14.0.3",
       "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.3.tgz",
@@ -1269,6 +1285,19 @@
         "node": "^10 || ^12 || >=14"
       }
     },
+    "node_modules/readdirp": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.1.2.tgz",
+      "integrity": "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 14.18.0"
+      },
+      "funding": {
+        "type": "individual",
+        "url": "https://paulmillr.com/funding/"
+      }
+    },
     "node_modules/rollup": {
       "version": "4.57.1",
       "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.57.1.tgz",

+ 1 - 0
package.json

@@ -33,6 +33,7 @@
   "license": "MIT",
   "dependencies": {
     "@clack/prompts": "^1.3.0",
+    "chokidar": "^4.0.3",
     "commander": "^14.0.2",
     "fast-string-width": "^3.0.2",
     "fast-wrap-ansi": "^0.2.0",

+ 1 - 1
src/extraction/index.ts

@@ -166,7 +166,7 @@ const DEFAULT_IGNORE_PATTERNS: string[] = [
  * the defaults apply to tracked files too (committing a dependency dir doesn't make
  * it project code; the explicit `.gitignore` negation is the only opt-in).
  */
-function buildDefaultIgnore(rootDir: string): Ignore {
+export function buildDefaultIgnore(rootDir: string): Ignore {
   const ig = ignore().add(DEFAULT_IGNORE_PATTERNS);
   try {
     const rootGitignore = path.join(rootDir, '.gitignore');

+ 40 - 34
src/mcp/daemon.ts

@@ -280,20 +280,24 @@ export type AcquireResult =
   | { kind: 'taken'; existing: DaemonLockInfo | null; pidPath: string };
 
 /**
- * Atomically create the daemon pidfile AND write its full record in the same
- * call. Returns either an `acquired` result (the caller is now the daemon-elect
- * and may construct a {@link Daemon}) or a `taken` result.
+ * Atomically create the daemon pidfile with its full record already in place.
+ * Returns either an `acquired` result (the caller is the daemon-elect and may
+ * construct a {@link Daemon}) or a `taken` result.
  *
- * must-fix 1 (issue #411 review): the original implementation created the
- * pidfile empty under an `O_EXCL` fd and only wrote the body later, after
- * `server.listen` resolved. A second candidate that read the pidfile during
- * that millisecond-wide window saw an empty file, decoded it as `null`, treated
- * it as stale, and `unlink`'d the lock the first daemon still held — producing
- * two daemons (two watchers, two writers) on concurrent startup, exactly the
- * multi-agent scenario the feature targets. Writing the complete record before
- * returning the handle closes that window: a concurrent reader always sees a
- * valid pid+version+socketPath, never an empty file. The socket path is
- * deterministic from the project root, so it's known here.
+ * must-fix 1 (issue #411 review): the lockfile must appear in ONE atomic step,
+ * already complete — never empty, even momentarily. The first attempt at this
+ * (`O_EXCL` create then a separate `writeSync`) left a microsecond window where
+ * the file existed but was empty; under concurrent daemon startup a third
+ * candidate could read that empty file, decode it as `null`, and `unlink` the
+ * winner's lock → two daemons (two watchers, two writers). The window was
+ * normally too small to hit, but the chokidar watcher's extra startup time made
+ * concurrent daemons overlap enough to reproduce it reliably.
+ *
+ * The fix writes the complete record to a private temp file, then hard-links it
+ * into place: `link()` is atomic AND exclusive (EEXIST if the target exists), so
+ * the pidfile becomes visible in one step already containing a full record.
+ * Whoever links first wins; everyone else gets EEXIST and reads a complete file.
+ * There is no empty-file window at all.
  */
 export function tryAcquireDaemonLock(projectRoot: string): AcquireResult {
   const pidPath = getDaemonPidPath(projectRoot);
@@ -301,34 +305,36 @@ export function tryAcquireDaemonLock(projectRoot: string): AcquireResult {
   // thing to touch it on a fresh-clone-but-already-initialized checkout.
   fs.mkdirSync(path.dirname(pidPath), { recursive: true });
 
+  const info: DaemonLockInfo = {
+    pid: process.pid,
+    version: CodeGraphPackageVersion,
+    socketPath: getDaemonSocketPath(projectRoot),
+    startedAt: Date.now(),
+  };
+
+  // Temp name is pid-scoped so racing candidates never collide on it.
+  const tmp = `${pidPath}.${process.pid}.tmp`;
+  let acquired = false;
   try {
-    // `wx` = O_CREAT | O_EXCL | O_WRONLY: atomic "create only if absent".
-    const fd = fs.openSync(pidPath, 'wx', 0o600);
-    const info: DaemonLockInfo = {
-      pid: process.pid,
-      version: CodeGraphPackageVersion,
-      socketPath: getDaemonSocketPath(projectRoot),
-      startedAt: Date.now(),
-    };
+    fs.writeFileSync(tmp, encodeLockInfo(info), { mode: 0o600 });
     try {
-      // Synchronous write immediately after the create — no await in between —
-      // so the empty-file window is a single fs.writeSync, not an I/O-bound
-      // `server.listen`. Combined with the pid-verified `clearStaleDaemonLock`
-      // below, concurrent candidates can never delete a live daemon's lock.
-      fs.writeSync(fd, encodeLockInfo(info));
-    } finally {
-      fs.closeSync(fd);
+      fs.linkSync(tmp, pidPath); // atomic + exclusive
+      acquired = true;
+    } catch (err: unknown) {
+      if ((err as NodeJS.ErrnoException).code !== 'EEXIST') throw err;
     }
-    return { kind: 'acquired', pidPath, info };
-  } catch (err: unknown) {
-    const e = err as NodeJS.ErrnoException;
-    if (e.code !== 'EEXIST') throw err;
+  } finally {
+    try { fs.unlinkSync(tmp); } catch { /* temp already gone */ }
   }
 
+  if (acquired) return { kind: 'acquired', pidPath, info };
+
+  // Taken. Because the pidfile was link'd atomically it always holds a complete
+  // record — `existing` is null only for a genuinely corrupt leftover, never a
+  // mid-write race.
   let existing: DaemonLockInfo | null = null;
   try {
-    const raw = fs.readFileSync(pidPath, 'utf8');
-    existing = decodeLockInfo(raw);
+    existing = decodeLockInfo(fs.readFileSync(pidPath, 'utf8'));
   } catch { /* unreadable lockfile — treat as malformed */ }
   return { kind: 'taken', existing, pidPath };
 }

+ 85 - 47
src/sync/watcher.ts

@@ -1,15 +1,24 @@
 /**
  * File Watcher
  *
- * Watches the project directory for file changes and triggers
- * debounced sync operations to keep the code graph up-to-date.
+ * Watches the project directory for file changes and triggers debounced sync
+ * operations to keep the code graph up-to-date.
  *
- * Uses Node.js native fs.watch with recursive mode (macOS FSEvents,
- * Windows ReadDirectoryChangesW, Linux inotify on Node 19+).
+ * Uses chokidar, whose `ignored` callback filters directories BEFORE they are
+ * watched — so we never register inotify watches on excluded trees like
+ * node_modules/, dist/, .git/ (fixes #276: recursive fs.watch exhausted the
+ * kernel watch budget on large repos). The ignore decision reuses the indexer's
+ * `buildDefaultIgnore` (built-in default-ignore dirs + the project's .gitignore)
+ * so the watcher watches exactly the set the indexer indexes — in particular,
+ * node_modules/build/cache dirs are excluded even when the repo has no
+ * .gitignore (#407), which a .gitignore-only filter would miss.
  */
 
-import * as fs from 'fs';
-import { isSourceFile } from '../extraction';
+import * as path from 'path';
+import type { Stats } from 'fs';
+import chokidar, { FSWatcher } from 'chokidar';
+import type { Ignore } from 'ignore';
+import { isSourceFile, buildDefaultIgnore } from '../extraction';
 import { logDebug, logWarn } from '../errors';
 import { normalizePath } from '../utils';
 import { watchDisabledReason } from './watch-policy';
@@ -41,17 +50,22 @@ export interface WatchOptions {
  * debounced sync operations via a provided callback.
  *
  * Design goals:
- * - Minimal resource usage (native OS file events, no polling)
+ * - Minimal resource usage (chokidar filters excluded directories before
+ *   registering an inotify watch — see module docs / #276)
  * - Debounced to avoid thrashing on rapid saves
  * - Filters to supported source files by extension
- * - Ignores .codegraph/ directory changes
+ * - Ignores .codegraph/ and .git/ regardless of .gitignore
  */
 export class FileWatcher {
-  private watcher: fs.FSWatcher | null = null;
+  private watcher: FSWatcher | null = null;
   private debounceTimer: ReturnType<typeof setTimeout> | null = null;
   private hasChanges = false;
   private syncing = false;
   private stopped = false;
+  // The shared ignore matcher (built-in defaults + project .gitignore), built
+  // once at start(). Same source of truth the indexer uses, so watcher scope
+  // can never diverge from index scope.
+  private ignoreMatcher: Ignore | null = null;
 
   private readonly projectRoot: string;
   private readonly debounceMs: number;
@@ -79,61 +93,84 @@ export class FileWatcher {
     if (this.watcher) return true; // Already watching
     this.stopped = false;
 
-    // Some environments make recursive fs.watch unusable — most notably WSL2
-    // /mnt/ drives, where setup blocks long enough to break MCP startup
-    // handshakes (issue #199). Skip watching there; callers fall back to
-    // manual `codegraph sync` or the git sync hooks.
+    // Some environments make filesystem watching unusable — most notably
+    // WSL2 /mnt/ drives, where the underlying fs.watch calls block long
+    // enough to break MCP startup handshakes (issue #199). Skip watching
+    // there; callers fall back to manual `codegraph sync` or git sync hooks.
     const disabledReason = watchDisabledReason(this.projectRoot);
     if (disabledReason) {
       logDebug('File watcher disabled', { reason: disabledReason, projectRoot: this.projectRoot });
       return false;
     }
 
+    // Reuse the indexer's ignore set so the watcher and indexer agree on scope.
+    // chokidar only registers an inotify watch on directories that pass this
+    // filter — that's the #276 fix.
+    this.ignoreMatcher = buildDefaultIgnore(this.projectRoot);
+
     try {
-      this.watcher = fs.watch(
-        this.projectRoot,
-        { recursive: true },
-        (_eventType, filename) => {
-          if (!filename || this.stopped) return;
-
-          // Normalize path separators
-          const normalized = normalizePath(filename);
-
-          // Ignore .codegraph/ directory changes (our own DB writes)
-          if (
-            normalized === '.codegraph' ||
-            normalized.startsWith('.codegraph/') ||
-            normalized.startsWith('.codegraph\\')
-          ) {
-            return;
-          }
-
-          // Only sync changes to files we can actually parse.
-          if (!isSourceFile(normalized)) {
-            return;
-          }
-
-          logDebug('File change detected', { file: normalized });
-          this.hasChanges = true;
-          this.scheduleSync();
-        }
-      );
-
-      // Handle watcher errors gracefully
-      this.watcher.on('error', (err) => {
+      this.watcher = chokidar.watch(this.projectRoot, {
+        // chokidar calls this for every path it encounters and only watches
+        // those that pass — so excluded trees (node_modules/, dist/, .git/, …)
+        // never get an inotify watch in the first place.
+        ignored: (testPath: string, stats?: Stats) => this.shouldIgnore(testPath, stats),
+      });
+
+      // chokidar emits 'all' for every event type; we only sync source files.
+      this.watcher.on('all', (_event: string, filePath: string) => {
+        if (this.stopped) return;
+
+        const normalized = normalizePath(path.relative(this.projectRoot, filePath));
+
+        // Defense in depth: `ignored` should already keep these out, but events
+        // can still arrive during setup or via symlink traversal.
+        if (this.isAlwaysIgnored(normalized)) return;
+        if (!isSourceFile(normalized)) return;
+
+        logDebug('File change detected', { file: normalized });
+        this.hasChanges = true;
+        this.scheduleSync();
+      });
+
+      // Handle watcher errors gracefully — don't crash, the user can restart.
+      this.watcher.on('error', (err: unknown) => {
         logWarn('File watcher error', { error: String(err) });
-        // Don't crash — watcher may recover or user can restart
       });
 
       logDebug('File watcher started', { projectRoot: this.projectRoot, debounceMs: this.debounceMs });
       return true;
     } catch (err) {
-      // Recursive watch not supported (e.g., Linux < Node 19)
-      logWarn('Could not start file watcher — recursive fs.watch not supported on this platform', { error: String(err) });
+      // Watcher setup failed (e.g., permission denied, missing directory).
+      logWarn('Could not start file watcher', { error: String(err) });
       return false;
     }
   }
 
+  /** Our own dirs are always ignored, regardless of .gitignore. */
+  private isAlwaysIgnored(rel: string): boolean {
+    return (
+      rel === '.codegraph' || rel.startsWith('.codegraph/') ||
+      rel === '.git' || rel.startsWith('.git/')
+    );
+  }
+
+  /**
+   * chokidar `ignored` predicate — true for any path that should NOT be watched.
+   * Uses chokidar's provided `stats` to decide directory-vs-file so a dir-only
+   * rule like `build/` matches, without an extra `statSync` per path.
+   */
+  private shouldIgnore(testPath: string, stats?: Stats): boolean {
+    const rel = normalizePath(path.relative(this.projectRoot, testPath));
+    if (!rel || rel === '.' || rel.startsWith('..')) return false; // root / outside
+    if (this.isAlwaysIgnored(rel)) return true;
+    if (!this.ignoreMatcher) return false;
+    if (stats) {
+      return this.ignoreMatcher.ignores(stats.isDirectory() ? rel + '/' : rel);
+    }
+    // Stats unknown: test both forms so a directory match isn't missed.
+    return this.ignoreMatcher.ignores(rel) || this.ignoreMatcher.ignores(rel + '/');
+  }
+
   /**
    * Stop watching for file changes.
    */
@@ -151,6 +188,7 @@ export class FileWatcher {
     }
 
     this.hasChanges = false;
+    this.ignoreMatcher = null;
     logDebug('File watcher stopped');
   }