Kaynağa Gözat

fix(watcher): exclude ignored dirs before watching to prevent inotify exhaustion (#276)

The file watcher registered a recursive watch over the entire project (node_modules, build output, caches included) and filtered only in the callback — exhausting the Linux inotify budget on large repos (#276). It now uses chokidar and excludes the same directories the indexer ignores (built-in default-ignore set + the project .gitignore) BEFORE registering a watch, so the watch count on a 900-dir node_modules drops from ~1200 to ~14 even with no .gitignore. Stacks with the shared daemon (#411): one watcher across agents, now small.

Also hardens the #411 daemon lockfile against a concurrent-startup race the new watcher timing made reproducible — the lock is now created atomically with its content (temp-write + hard-link), so racing daemons can never both win. Validated on macOS, Linux (Docker), and Windows (chokidar + fs.linkSync on NTFS).

Co-Authored-By: Colby McHenry <me@colbymchenry.com>
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Lorenzo Feng 4 hafta önce
ebeveyn
işleme
b09b23cf54
7 değiştirilmiş dosya ile 202 ekleme ve 84 silme
  1. 14 0
      CHANGELOG.md
  2. 30 0
      __tests__/watcher.test.ts
  3. 31 2
      package-lock.json
  4. 1 0
      package.json
  5. 1 1
      src/extraction/index.ts
  6. 40 34
      src/mcp/daemon.ts
  7. 85 47
      src/sync/watcher.ts

+ 14 - 0
CHANGELOG.md

@@ -34,6 +34,20 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
   already attached to the old daemon keep using it while new sessions run
   already attached to the old daemon keep using it while new sessions run
   standalone until it idles out — they never mix versions over the socket.
   standalone until it idles out — they never mix versions over the socket.
 
 
+### Fixed
+- **The file watcher no longer exhausts the OS file-watch budget on large
+  repos (#276).** It used to register a recursive watch over the *entire*
+  project — `node_modules/`, build output, caches and all — and filter only
+  after the fact. On Linux that meant hundreds of thousands of inotify watches
+  per project; enough that a second project, or codegraph alongside your editor
+  / `next dev`, could hit the per-user ceiling and fail with "OS file watch
+  limit reached." The watcher now excludes the same directories the indexer
+  ignores (the built-in default-ignore set **plus** your `.gitignore`) *before*
+  registering a watch — so on a repo with a 900-directory `node_modules` the
+  watch count drops from ~1,200 to ~14, even when the project has no
+  `.gitignore`. (Stacks with the shared daemon from #411: one watcher across
+  agents, and now that watcher is small.)
+
 ## [0.9.5] - 2026-05-25
 ## [0.9.5] - 2026-05-25
 
 
 ### Fixed
 ### Fixed

+ 30 - 0
__tests__/watcher.test.ts

@@ -166,6 +166,36 @@ describe('FileWatcher', () => {
 
 
       watcher.stop();
       watcher.stop();
     });
     });
+
+    it('should not watch node_modules even without a .gitignore (#276/#417)', async () => {
+      // No .gitignore in testDir — exclusion relies on the built-in
+      // default-ignore set the indexer uses (buildDefaultIgnore), which a
+      // .gitignore-only filter would miss.
+      fs.mkdirSync(path.join(testDir, 'node_modules', 'dep', 'lib'), { recursive: true });
+      fs.writeFileSync(path.join(testDir, 'node_modules', 'dep', 'index.ts'), 'export const dep = 1;');
+
+      const syncFn = vi.fn().mockResolvedValue({ filesChanged: 0, durationMs: 0 });
+      const watcher = new FileWatcher(testDir, syncFn, { debounceMs: 200 });
+      watcher.start();
+
+      // Let the watcher settle past any residual crawl events.
+      await new Promise((r) => setTimeout(r, 400));
+      syncFn.mockClear();
+
+      // A source-extension edit INSIDE node_modules must NOT trigger a sync —
+      // the directory was never watched.
+      fs.writeFileSync(path.join(testDir, 'node_modules', 'dep', 'lib', 'extra.ts'), 'export const e = 2;');
+      await new Promise((r) => setTimeout(r, 600));
+      expect(syncFn).not.toHaveBeenCalled();
+
+      // Positive control: a real source edit still triggers sync, proving the
+      // watcher is live (not merely inert).
+      fs.writeFileSync(path.join(testDir, 'src', 'live.ts'), 'export const live = 3;');
+      await waitFor(() => syncFn.mock.calls.length > 0, 5000);
+      expect(syncFn).toHaveBeenCalled();
+
+      watcher.stop();
+    });
   });
   });
 
 
   describe('callbacks', () => {
   describe('callbacks', () => {

+ 31 - 2
package-lock.json

@@ -1,15 +1,16 @@
 {
 {
   "name": "@colbymchenry/codegraph",
   "name": "@colbymchenry/codegraph",
-  "version": "0.9.3",
+  "version": "0.9.4",
   "lockfileVersion": 3,
   "lockfileVersion": 3,
   "requires": true,
   "requires": true,
   "packages": {
   "packages": {
     "": {
     "": {
       "name": "@colbymchenry/codegraph",
       "name": "@colbymchenry/codegraph",
-      "version": "0.9.3",
+      "version": "0.9.4",
       "license": "MIT",
       "license": "MIT",
       "dependencies": {
       "dependencies": {
         "@clack/prompts": "^1.3.0",
         "@clack/prompts": "^1.3.0",
+        "chokidar": "^4.0.3",
         "commander": "^14.0.2",
         "commander": "^14.0.2",
         "fast-string-width": "^3.0.2",
         "fast-string-width": "^3.0.2",
         "fast-wrap-ansi": "^0.2.0",
         "fast-wrap-ansi": "^0.2.0",
@@ -1004,6 +1005,21 @@
         "node": ">= 16"
         "node": ">= 16"
       }
       }
     },
     },
+    "node_modules/chokidar": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz",
+      "integrity": "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==",
+      "license": "MIT",
+      "dependencies": {
+        "readdirp": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 14.16.0"
+      },
+      "funding": {
+        "url": "https://paulmillr.com/funding/"
+      }
+    },
     "node_modules/commander": {
     "node_modules/commander": {
       "version": "14.0.3",
       "version": "14.0.3",
       "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.3.tgz",
       "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.3.tgz",
@@ -1269,6 +1285,19 @@
         "node": "^10 || ^12 || >=14"
         "node": "^10 || ^12 || >=14"
       }
       }
     },
     },
+    "node_modules/readdirp": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.1.2.tgz",
+      "integrity": "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 14.18.0"
+      },
+      "funding": {
+        "type": "individual",
+        "url": "https://paulmillr.com/funding/"
+      }
+    },
     "node_modules/rollup": {
     "node_modules/rollup": {
       "version": "4.57.1",
       "version": "4.57.1",
       "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.57.1.tgz",
       "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.57.1.tgz",

+ 1 - 0
package.json

@@ -33,6 +33,7 @@
   "license": "MIT",
   "license": "MIT",
   "dependencies": {
   "dependencies": {
     "@clack/prompts": "^1.3.0",
     "@clack/prompts": "^1.3.0",
+    "chokidar": "^4.0.3",
     "commander": "^14.0.2",
     "commander": "^14.0.2",
     "fast-string-width": "^3.0.2",
     "fast-string-width": "^3.0.2",
     "fast-wrap-ansi": "^0.2.0",
     "fast-wrap-ansi": "^0.2.0",

+ 1 - 1
src/extraction/index.ts

@@ -166,7 +166,7 @@ const DEFAULT_IGNORE_PATTERNS: string[] = [
  * the defaults apply to tracked files too (committing a dependency dir doesn't make
  * the defaults apply to tracked files too (committing a dependency dir doesn't make
  * it project code; the explicit `.gitignore` negation is the only opt-in).
  * it project code; the explicit `.gitignore` negation is the only opt-in).
  */
  */
-function buildDefaultIgnore(rootDir: string): Ignore {
+export function buildDefaultIgnore(rootDir: string): Ignore {
   const ig = ignore().add(DEFAULT_IGNORE_PATTERNS);
   const ig = ignore().add(DEFAULT_IGNORE_PATTERNS);
   try {
   try {
     const rootGitignore = path.join(rootDir, '.gitignore');
     const rootGitignore = path.join(rootDir, '.gitignore');

+ 40 - 34
src/mcp/daemon.ts

@@ -280,20 +280,24 @@ export type AcquireResult =
   | { kind: 'taken'; existing: DaemonLockInfo | null; pidPath: string };
   | { kind: 'taken'; existing: DaemonLockInfo | null; pidPath: string };
 
 
 /**
 /**
- * Atomically create the daemon pidfile AND write its full record in the same
- * call. Returns either an `acquired` result (the caller is now the daemon-elect
- * and may construct a {@link Daemon}) or a `taken` result.
+ * Atomically create the daemon pidfile with its full record already in place.
+ * Returns either an `acquired` result (the caller is the daemon-elect and may
+ * construct a {@link Daemon}) or a `taken` result.
  *
  *
- * must-fix 1 (issue #411 review): the original implementation created the
- * pidfile empty under an `O_EXCL` fd and only wrote the body later, after
- * `server.listen` resolved. A second candidate that read the pidfile during
- * that millisecond-wide window saw an empty file, decoded it as `null`, treated
- * it as stale, and `unlink`'d the lock the first daemon still held — producing
- * two daemons (two watchers, two writers) on concurrent startup, exactly the
- * multi-agent scenario the feature targets. Writing the complete record before
- * returning the handle closes that window: a concurrent reader always sees a
- * valid pid+version+socketPath, never an empty file. The socket path is
- * deterministic from the project root, so it's known here.
+ * must-fix 1 (issue #411 review): the lockfile must appear in ONE atomic step,
+ * already complete — never empty, even momentarily. The first attempt at this
+ * (`O_EXCL` create then a separate `writeSync`) left a microsecond window where
+ * the file existed but was empty; under concurrent daemon startup a third
+ * candidate could read that empty file, decode it as `null`, and `unlink` the
+ * winner's lock → two daemons (two watchers, two writers). The window was
+ * normally too small to hit, but the chokidar watcher's extra startup time made
+ * concurrent daemons overlap enough to reproduce it reliably.
+ *
+ * The fix writes the complete record to a private temp file, then hard-links it
+ * into place: `link()` is atomic AND exclusive (EEXIST if the target exists), so
+ * the pidfile becomes visible in one step already containing a full record.
+ * Whoever links first wins; everyone else gets EEXIST and reads a complete file.
+ * There is no empty-file window at all.
  */
  */
 export function tryAcquireDaemonLock(projectRoot: string): AcquireResult {
 export function tryAcquireDaemonLock(projectRoot: string): AcquireResult {
   const pidPath = getDaemonPidPath(projectRoot);
   const pidPath = getDaemonPidPath(projectRoot);
@@ -301,34 +305,36 @@ export function tryAcquireDaemonLock(projectRoot: string): AcquireResult {
   // thing to touch it on a fresh-clone-but-already-initialized checkout.
   // thing to touch it on a fresh-clone-but-already-initialized checkout.
   fs.mkdirSync(path.dirname(pidPath), { recursive: true });
   fs.mkdirSync(path.dirname(pidPath), { recursive: true });
 
 
+  const info: DaemonLockInfo = {
+    pid: process.pid,
+    version: CodeGraphPackageVersion,
+    socketPath: getDaemonSocketPath(projectRoot),
+    startedAt: Date.now(),
+  };
+
+  // Temp name is pid-scoped so racing candidates never collide on it.
+  const tmp = `${pidPath}.${process.pid}.tmp`;
+  let acquired = false;
   try {
   try {
-    // `wx` = O_CREAT | O_EXCL | O_WRONLY: atomic "create only if absent".
-    const fd = fs.openSync(pidPath, 'wx', 0o600);
-    const info: DaemonLockInfo = {
-      pid: process.pid,
-      version: CodeGraphPackageVersion,
-      socketPath: getDaemonSocketPath(projectRoot),
-      startedAt: Date.now(),
-    };
+    fs.writeFileSync(tmp, encodeLockInfo(info), { mode: 0o600 });
     try {
     try {
-      // Synchronous write immediately after the create — no await in between —
-      // so the empty-file window is a single fs.writeSync, not an I/O-bound
-      // `server.listen`. Combined with the pid-verified `clearStaleDaemonLock`
-      // below, concurrent candidates can never delete a live daemon's lock.
-      fs.writeSync(fd, encodeLockInfo(info));
-    } finally {
-      fs.closeSync(fd);
+      fs.linkSync(tmp, pidPath); // atomic + exclusive
+      acquired = true;
+    } catch (err: unknown) {
+      if ((err as NodeJS.ErrnoException).code !== 'EEXIST') throw err;
     }
     }
-    return { kind: 'acquired', pidPath, info };
-  } catch (err: unknown) {
-    const e = err as NodeJS.ErrnoException;
-    if (e.code !== 'EEXIST') throw err;
+  } finally {
+    try { fs.unlinkSync(tmp); } catch { /* temp already gone */ }
   }
   }
 
 
+  if (acquired) return { kind: 'acquired', pidPath, info };
+
+  // Taken. Because the pidfile was link'd atomically it always holds a complete
+  // record — `existing` is null only for a genuinely corrupt leftover, never a
+  // mid-write race.
   let existing: DaemonLockInfo | null = null;
   let existing: DaemonLockInfo | null = null;
   try {
   try {
-    const raw = fs.readFileSync(pidPath, 'utf8');
-    existing = decodeLockInfo(raw);
+    existing = decodeLockInfo(fs.readFileSync(pidPath, 'utf8'));
   } catch { /* unreadable lockfile — treat as malformed */ }
   } catch { /* unreadable lockfile — treat as malformed */ }
   return { kind: 'taken', existing, pidPath };
   return { kind: 'taken', existing, pidPath };
 }
 }

+ 85 - 47
src/sync/watcher.ts

@@ -1,15 +1,24 @@
 /**
 /**
  * File Watcher
  * File Watcher
  *
  *
- * Watches the project directory for file changes and triggers
- * debounced sync operations to keep the code graph up-to-date.
+ * Watches the project directory for file changes and triggers debounced sync
+ * operations to keep the code graph up-to-date.
  *
  *
- * Uses Node.js native fs.watch with recursive mode (macOS FSEvents,
- * Windows ReadDirectoryChangesW, Linux inotify on Node 19+).
+ * Uses chokidar, whose `ignored` callback filters directories BEFORE they are
+ * watched — so we never register inotify watches on excluded trees like
+ * node_modules/, dist/, .git/ (fixes #276: recursive fs.watch exhausted the
+ * kernel watch budget on large repos). The ignore decision reuses the indexer's
+ * `buildDefaultIgnore` (built-in default-ignore dirs + the project's .gitignore)
+ * so the watcher watches exactly the set the indexer indexes — in particular,
+ * node_modules/build/cache dirs are excluded even when the repo has no
+ * .gitignore (#407), which a .gitignore-only filter would miss.
  */
  */
 
 
-import * as fs from 'fs';
-import { isSourceFile } from '../extraction';
+import * as path from 'path';
+import type { Stats } from 'fs';
+import chokidar, { FSWatcher } from 'chokidar';
+import type { Ignore } from 'ignore';
+import { isSourceFile, buildDefaultIgnore } from '../extraction';
 import { logDebug, logWarn } from '../errors';
 import { logDebug, logWarn } from '../errors';
 import { normalizePath } from '../utils';
 import { normalizePath } from '../utils';
 import { watchDisabledReason } from './watch-policy';
 import { watchDisabledReason } from './watch-policy';
@@ -41,17 +50,22 @@ export interface WatchOptions {
  * debounced sync operations via a provided callback.
  * debounced sync operations via a provided callback.
  *
  *
  * Design goals:
  * Design goals:
- * - Minimal resource usage (native OS file events, no polling)
+ * - Minimal resource usage (chokidar filters excluded directories before
+ *   registering an inotify watch — see module docs / #276)
  * - Debounced to avoid thrashing on rapid saves
  * - Debounced to avoid thrashing on rapid saves
  * - Filters to supported source files by extension
  * - Filters to supported source files by extension
- * - Ignores .codegraph/ directory changes
+ * - Ignores .codegraph/ and .git/ regardless of .gitignore
  */
  */
 export class FileWatcher {
 export class FileWatcher {
-  private watcher: fs.FSWatcher | null = null;
+  private watcher: FSWatcher | null = null;
   private debounceTimer: ReturnType<typeof setTimeout> | null = null;
   private debounceTimer: ReturnType<typeof setTimeout> | null = null;
   private hasChanges = false;
   private hasChanges = false;
   private syncing = false;
   private syncing = false;
   private stopped = false;
   private stopped = false;
+  // The shared ignore matcher (built-in defaults + project .gitignore), built
+  // once at start(). Same source of truth the indexer uses, so watcher scope
+  // can never diverge from index scope.
+  private ignoreMatcher: Ignore | null = null;
 
 
   private readonly projectRoot: string;
   private readonly projectRoot: string;
   private readonly debounceMs: number;
   private readonly debounceMs: number;
@@ -79,61 +93,84 @@ export class FileWatcher {
     if (this.watcher) return true; // Already watching
     if (this.watcher) return true; // Already watching
     this.stopped = false;
     this.stopped = false;
 
 
-    // Some environments make recursive fs.watch unusable — most notably WSL2
-    // /mnt/ drives, where setup blocks long enough to break MCP startup
-    // handshakes (issue #199). Skip watching there; callers fall back to
-    // manual `codegraph sync` or the git sync hooks.
+    // Some environments make filesystem watching unusable — most notably
+    // WSL2 /mnt/ drives, where the underlying fs.watch calls block long
+    // enough to break MCP startup handshakes (issue #199). Skip watching
+    // there; callers fall back to manual `codegraph sync` or git sync hooks.
     const disabledReason = watchDisabledReason(this.projectRoot);
     const disabledReason = watchDisabledReason(this.projectRoot);
     if (disabledReason) {
     if (disabledReason) {
       logDebug('File watcher disabled', { reason: disabledReason, projectRoot: this.projectRoot });
       logDebug('File watcher disabled', { reason: disabledReason, projectRoot: this.projectRoot });
       return false;
       return false;
     }
     }
 
 
+    // Reuse the indexer's ignore set so the watcher and indexer agree on scope.
+    // chokidar only registers an inotify watch on directories that pass this
+    // filter — that's the #276 fix.
+    this.ignoreMatcher = buildDefaultIgnore(this.projectRoot);
+
     try {
     try {
-      this.watcher = fs.watch(
-        this.projectRoot,
-        { recursive: true },
-        (_eventType, filename) => {
-          if (!filename || this.stopped) return;
-
-          // Normalize path separators
-          const normalized = normalizePath(filename);
-
-          // Ignore .codegraph/ directory changes (our own DB writes)
-          if (
-            normalized === '.codegraph' ||
-            normalized.startsWith('.codegraph/') ||
-            normalized.startsWith('.codegraph\\')
-          ) {
-            return;
-          }
-
-          // Only sync changes to files we can actually parse.
-          if (!isSourceFile(normalized)) {
-            return;
-          }
-
-          logDebug('File change detected', { file: normalized });
-          this.hasChanges = true;
-          this.scheduleSync();
-        }
-      );
-
-      // Handle watcher errors gracefully
-      this.watcher.on('error', (err) => {
+      this.watcher = chokidar.watch(this.projectRoot, {
+        // chokidar calls this for every path it encounters and only watches
+        // those that pass — so excluded trees (node_modules/, dist/, .git/, …)
+        // never get an inotify watch in the first place.
+        ignored: (testPath: string, stats?: Stats) => this.shouldIgnore(testPath, stats),
+      });
+
+      // chokidar emits 'all' for every event type; we only sync source files.
+      this.watcher.on('all', (_event: string, filePath: string) => {
+        if (this.stopped) return;
+
+        const normalized = normalizePath(path.relative(this.projectRoot, filePath));
+
+        // Defense in depth: `ignored` should already keep these out, but events
+        // can still arrive during setup or via symlink traversal.
+        if (this.isAlwaysIgnored(normalized)) return;
+        if (!isSourceFile(normalized)) return;
+
+        logDebug('File change detected', { file: normalized });
+        this.hasChanges = true;
+        this.scheduleSync();
+      });
+
+      // Handle watcher errors gracefully — don't crash, the user can restart.
+      this.watcher.on('error', (err: unknown) => {
         logWarn('File watcher error', { error: String(err) });
         logWarn('File watcher error', { error: String(err) });
-        // Don't crash — watcher may recover or user can restart
       });
       });
 
 
       logDebug('File watcher started', { projectRoot: this.projectRoot, debounceMs: this.debounceMs });
       logDebug('File watcher started', { projectRoot: this.projectRoot, debounceMs: this.debounceMs });
       return true;
       return true;
     } catch (err) {
     } catch (err) {
-      // Recursive watch not supported (e.g., Linux < Node 19)
-      logWarn('Could not start file watcher — recursive fs.watch not supported on this platform', { error: String(err) });
+      // Watcher setup failed (e.g., permission denied, missing directory).
+      logWarn('Could not start file watcher', { error: String(err) });
       return false;
       return false;
     }
     }
   }
   }
 
 
+  /** Our own dirs are always ignored, regardless of .gitignore. */
+  private isAlwaysIgnored(rel: string): boolean {
+    return (
+      rel === '.codegraph' || rel.startsWith('.codegraph/') ||
+      rel === '.git' || rel.startsWith('.git/')
+    );
+  }
+
+  /**
+   * chokidar `ignored` predicate — true for any path that should NOT be watched.
+   * Uses chokidar's provided `stats` to decide directory-vs-file so a dir-only
+   * rule like `build/` matches, without an extra `statSync` per path.
+   */
+  private shouldIgnore(testPath: string, stats?: Stats): boolean {
+    const rel = normalizePath(path.relative(this.projectRoot, testPath));
+    if (!rel || rel === '.' || rel.startsWith('..')) return false; // root / outside
+    if (this.isAlwaysIgnored(rel)) return true;
+    if (!this.ignoreMatcher) return false;
+    if (stats) {
+      return this.ignoreMatcher.ignores(stats.isDirectory() ? rel + '/' : rel);
+    }
+    // Stats unknown: test both forms so a directory match isn't missed.
+    return this.ignoreMatcher.ignores(rel) || this.ignoreMatcher.ignores(rel + '/');
+  }
+
   /**
   /**
    * Stop watching for file changes.
    * Stop watching for file changes.
    */
    */
@@ -151,6 +188,7 @@ export class FileWatcher {
     }
     }
 
 
     this.hasChanges = false;
     this.hasChanges = false;
+    this.ignoreMatcher = null;
     logDebug('File watcher stopped');
     logDebug('File watcher stopped');
   }
   }