소스 검색

fix(watcher): warn (don't degrade) on Linux inotify watch exhaustion (ENOSPC) (#893)

On the Linux per-directory watch path, hitting fs.inotify.max_user_watches
surfaces as ENOSPC — which the degrade logic added for #876 (EMFILE/ENFILE
only) did not catch, so it fell through to the silent "skip this directory"
branch: a large repo got a partial watch set with no hint why edits in
unwatched directories stopped auto-syncing.

ENOSPC is non-fatal — raise the limit and partial watching keeps working — so
it now warns ONCE, naming the exact knob (fs.inotify.max_user_watches, with the
sysctl to set it), instead of degrading. It also stops attempting further doomed
watches for the session (every inotify_add_watch would fail too). Installed
watches keep firing; `codegraph sync` / git sync hooks cover the remainder.

Validated on macOS (forced per-directory path) and real Linux (Docker) — the
new test asserts a single warning naming fs.inotify.max_user_watches, no
degrade, and a live partial watch.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Colby Mchenry 1 주 전
부모
커밋
ab107b325a
3개의 변경된 파일116개의 추가작업 그리고 2개의 파일을 삭제
  1. 1 0
      CHANGELOG.md
  2. 57 0
      __tests__/watcher.test.ts
  3. 58 2
      src/sync/watcher.ts

+ 1 - 0
CHANGELOG.md

@@ -12,6 +12,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ### Fixes
 
 - The file watcher that auto-syncs the graph now fails cleanly when live watching can no longer be trusted, instead of looking healthy while the index quietly goes stale. If the operating system runs out of file-watch resources, or another process holds the write lock far longer than a normal save, CodeGraph now disables auto-sync once — with a single clear message telling you to run `codegraph sync` (or rely on the git sync hooks) to refresh — rather than retrying forever or repeating the same error on a loop. And while auto-sync is disabled, CodeGraph's tool responses (and `codegraph status`) now say so plainly, so your AI agent knows to read files directly instead of trusting a frozen index. This mostly matters for long-running MCP/daemon sessions, which could otherwise keep serving stale results while appearing to work. Thanks @thismilktea. (#876)
+- On Linux, hitting the kernel's inotify watch limit on a large project no longer silently leaves half the tree unwatched. CodeGraph now tells you once — naming the exact setting to raise (`fs.inotify.max_user_watches`, e.g. `sudo sysctl fs.inotify.max_user_watches=1048576`) — and keeps live-watching the directories it could register while `codegraph sync` (or the git sync hooks) covers the rest. (#876)
 
 
 ## [1.0.1] - 2026-06-13

+ 57 - 0
__tests__/watcher.test.ts

@@ -205,6 +205,63 @@ describe('FileWatcher', () => {
       expect(watcher.getDegradedReason()).toBeNull();
       watcher.stop();
     });
+
+    it('warns once (NOT degrade) when Linux inotify watches are exhausted (ENOSPC)', () => {
+      // ENOSPC only arises on the Linux per-directory path; force it so the test
+      // runs the per-directory branch on any host. Synchronous test, restored in
+      // finally — no await window for another test to observe the override.
+      const realPlatform = process.platform;
+      Object.defineProperty(process, 'platform', { value: 'linux', configurable: true });
+      try {
+        // Empty-but-for-one-subdir temp dir: the root watch succeeds, then the
+        // child watch hits the (simulated) inotify budget — the realistic
+        // "partial watch installed, then exhausted" shape.
+        const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-inotify-'));
+        fs.mkdirSync(path.join(dir, 'sub'));
+        const onDegraded = vi.fn();
+        const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
+        const emitter = new EventEmitter();
+        let calls = 0;
+        const okWatcher = {
+          on: (event: string, handler: (...a: unknown[]) => void) => {
+            emitter.on(event, handler);
+            return okWatcher;
+          },
+          close: () => {},
+        } as unknown as fs.FSWatcher;
+        __setFsWatchForTests(() => {
+          calls += 1;
+          if (calls === 1) return okWatcher; // root dir watch succeeds
+          const err = new Error('ENOSPC: System limit for number of file watchers reached') as NodeJS.ErrnoException;
+          err.code = 'ENOSPC';
+          throw err; // every subsequent dir exhausts the inotify budget
+        });
+        const watcher = new FileWatcher(
+          dir,
+          vi.fn().mockResolvedValue({ filesChanged: 0, durationMs: 0 }),
+          { debounceMs: 100, onDegraded }
+        );
+
+        try {
+          // NON-fatal: the watcher starts (partial watch on the root), does NOT
+          // degrade, and warns exactly once with the actionable sysctl remedy.
+          expect(watcher.start()).toBe(true);
+          expect(watcher.isActive()).toBe(true);
+          expect(watcher.isDegraded()).toBe(false);
+          expect(onDegraded).not.toHaveBeenCalled();
+          const inotifyWarnings = warnSpy.mock.calls.filter(
+            (c) => typeof c[0] === 'string' && c[0].includes('inotify watch limit')
+          );
+          expect(inotifyWarnings).toHaveLength(1);
+          expect(String(inotifyWarnings[0]![0])).toContain('fs.inotify.max_user_watches');
+        } finally {
+          watcher.stop();
+          fs.rmSync(dir, { recursive: true, force: true });
+        }
+      } finally {
+        Object.defineProperty(process, 'platform', { value: realPlatform, configurable: true });
+      }
+    });
   });
 
   describe('lock contention degradation (#876)', () => {

+ 58 - 2
src/sync/watcher.ts

@@ -53,6 +53,19 @@ const EXHAUSTION_REASON =
   'OS watch/file limit exhausted; auto-sync disabled. Run `codegraph sync` ' +
   '(or install git sync hooks) to refresh the graph after changes.';
 
+/**
+ * Actionable, NON-fatal warning for Linux inotify watch-count exhaustion.
+ * Unlike {@link EXHAUSTION_REASON} this does not disable the watcher — the
+ * watches already installed keep working — so it names the exact kernel knob to
+ * raise instead.
+ */
+const INOTIFY_LIMIT_REASON =
+  'Linux inotify watch limit reached (fs.inotify.max_user_watches); live ' +
+  'watching now covers only part of the project, so edits in unwatched ' +
+  'directories will not auto-sync. Raise the limit (e.g. `sudo sysctl ' +
+  'fs.inotify.max_user_watches=1048576`, persisted in /etc/sysctl.d) and ' +
+  'restart, or run `codegraph sync` (or install git sync hooks) to refresh.';
+
 /**
  * True when an error is OS watch/file-descriptor exhaustion (EMFILE/ENFILE).
  * Prefers the structured `err.code`; falls back to message matching ONLY when
@@ -67,6 +80,17 @@ function isWatchResourceExhaustion(err: unknown): boolean {
   return false;
 }
 
+/**
+ * True when an error is Linux inotify *watch-count* exhaustion. `fs.watch`
+ * surfaces a hit `fs.inotify.max_user_watches` as ENOSPC ("no space" = no watch
+ * descriptors left, NOT disk space). This only arises on the Linux
+ * per-directory path; it is non-fatal (raise the limit and partial watching
+ * keeps working), so it warns rather than degrading.
+ */
+function isInotifyWatchExhaustion(err: unknown): boolean {
+  return (err as NodeJS.ErrnoException | undefined)?.code === 'ENOSPC';
+}
+
 /**
  * Native recursive `fs.watch` is only reliable on macOS and Windows; on Linux
  * (and AIX) it throws `ERR_FEATURE_UNAVAILABLE_ON_PLATFORM`. We branch on this
@@ -215,6 +239,14 @@ export class FileWatcher {
   private dirWatchers = new Map<string, fs.FSWatcher>();
   /** Set once the per-directory watch cap is hit, so we log only once. */
   private dirCapWarned = false;
+  /**
+   * Set once the Linux inotify watch limit (ENOSPC) is hit. Double duty: we
+   * warn only once, AND we stop attempting new directory watches for the rest
+   * of the session — once the kernel budget is exhausted every further
+   * `inotify_add_watch` fails too, so trying the rest of the tree is pure
+   * waste. NON-fatal (does not degrade): installed watches keep working.
+   */
+  private inotifyLimitWarned = false;
   /**
    * One-way latch: the reason live watching was permanently disabled at runtime
    * (watch-resource exhaustion, or lock contention past the retry budget), or
@@ -401,8 +433,10 @@ export class FileWatcher {
   private watchTree(dir: string, markExisting: boolean): void {
     // A degrade() mid-walk (exhaustion on an earlier directory) calls stop(),
     // which sets `stopped`; bail so the recursion unwinds without adding more
-    // watches to a watcher that is shutting down.
-    if (this.stopped || this.degradedReason) return;
+    // watches to a watcher that is shutting down. `inotifyLimitWarned` does the
+    // same after ENOSPC — the kernel budget is gone, so stop trying the rest of
+    // the tree (every add would fail) while keeping the watches already set.
+    if (this.stopped || this.degradedReason || this.inotifyLimitWarned) return;
     if (this.dirWatchers.has(dir)) return;
     if (this.dirWatchers.size >= maxDirWatches()) {
       if (!this.dirCapWarned) {
@@ -425,6 +459,10 @@ export class FileWatcher {
       // limping along with a partial watch set.
       if (isWatchResourceExhaustion(err)) {
         this.degrade(EXHAUSTION_REASON, { error: String(err), dir });
+      } else if (isInotifyWatchExhaustion(err)) {
+        // ENOSPC = inotify watch budget exhausted. NON-fatal: keep the watches
+        // we have and tell the user the knob to raise (warn once).
+        this.warnInotifyLimit({ error: String(err), dir });
       }
       // ENOENT / EACCES on a single directory stays non-fatal: skip it quietly.
       return;
@@ -434,6 +472,9 @@ export class FileWatcher {
         this.degrade(EXHAUSTION_REASON, { error: String(err), dir });
         return;
       }
+      if (isInotifyWatchExhaustion(err)) {
+        this.warnInotifyLimit({ error: String(err), dir });
+      }
       this.unwatchDir(dir);
     });
     this.dirWatchers.set(dir, w);
@@ -560,6 +601,20 @@ export class FileWatcher {
     this.stop();
   }
 
+  /**
+   * Warn ONCE that the Linux inotify watch budget is exhausted (ENOSPC), and
+   * stop adding new watches for the rest of this session — every further
+   * `inotify_add_watch` would fail too, so walking the rest of the tree is
+   * waste. Unlike {@link degrade} this is NON-fatal: the watches already
+   * installed keep firing, and `codegraph sync` covers the unwatched remainder.
+   * The message names the kernel knob to raise (`fs.inotify.max_user_watches`).
+   */
+  private warnInotifyLimit(context: Record<string, unknown> = {}): void {
+    if (this.inotifyLimitWarned) return;
+    this.inotifyLimitWarned = true;
+    logWarn(INOTIFY_LIMIT_REASON, { watchedDirs: this.dirWatchers.size, ...context });
+  }
+
   /**
    * Whether live watching has degraded permanently (until the next start()).
    * Distinct from {@link isActive}: a degraded watcher is inactive, but an
@@ -603,6 +658,7 @@ export class FileWatcher {
     }
     this.dirWatchers.clear();
     this.dirCapWarned = false;
+    this.inotifyLimitWarned = false;
     this.lockRetryCount = 0;
     // NB: degradedReason is intentionally NOT reset here — it must survive the
     // stop() that degrade() triggers so isDegraded() stays true. start() clears it.