|
|
@@ -39,6 +39,34 @@ import { normalizePath } from '../utils';
|
|
|
import { isCodeGraphDataDir } from '../directory';
|
|
|
import { watchDisabledReason } from './watch-policy';
|
|
|
|
|
|
+/**
|
|
|
+ * Number of consecutive lock-contention retries the watcher tolerates before
|
|
|
+ * it gives up and degrades auto-sync. Brief contention (another writer for a
|
|
|
+ * few cycles) stays under this; a long-lived external writer crosses it.
|
|
|
+ */
|
|
|
+const MAX_LOCK_RETRIES = 5;
|
|
|
+/** Cap on the exponential lock-retry backoff so it never sleeps absurdly long. */
|
|
|
+const MAX_LOCK_RETRY_DELAY_MS = 30_000;
|
|
|
+
|
|
|
+/** Actionable degrade message; both exhaustion paths share it verbatim. */
|
|
|
+const EXHAUSTION_REASON =
|
|
|
+ 'OS watch/file limit exhausted; auto-sync disabled. Run `codegraph sync` ' +
|
|
|
+ '(or install git sync hooks) to refresh the graph after changes.';
|
|
|
+
|
|
|
+/**
|
|
|
+ * True when an error is OS watch/file-descriptor exhaustion (EMFILE/ENFILE).
|
|
|
+ * Prefers the structured `err.code`; falls back to message matching ONLY when
|
|
|
+ * no code is present (some platforms surface a bare Error from `fs.watch`).
|
|
|
+ */
|
|
|
+function isWatchResourceExhaustion(err: unknown): boolean {
|
|
|
+ const e = err as NodeJS.ErrnoException | undefined;
|
|
|
+ if (e?.code === 'EMFILE' || e?.code === 'ENFILE') return true;
|
|
|
+ if (!e?.code && e?.message) {
|
|
|
+ return /EMFILE|ENFILE|too many open files/i.test(e.message);
|
|
|
+ }
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* Native recursive `fs.watch` is only reliable on macOS and Windows; on Linux
|
|
|
* (and AIX) it throws `ERR_FEATURE_UNAVAILABLE_ON_PLATFORM`. We branch on this
|
|
|
@@ -48,6 +76,20 @@ function supportsRecursiveWatch(): boolean {
|
|
|
return process.platform === 'darwin' || process.platform === 'win32';
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * Indirection over `fs.watch` so tests can inject a fake that throws or emits
|
|
|
+ * `EMFILE`/`ENFILE` deterministically (real watch-resource exhaustion can't be
|
|
|
+ * provoked reliably, and `fs.watch` is a non-configurable property so it can't
|
|
|
+ * be spied). Production always uses the real `fs.watch`.
|
|
|
+ */
|
|
|
+type WatchFn = typeof fs.watch;
|
|
|
+let watchImpl: WatchFn = fs.watch;
|
|
|
+
|
|
|
+/** @internal Test-only seam to inject a fake fs.watch implementation. */
|
|
|
+export function __setFsWatchForTests(fn: WatchFn | null): void {
|
|
|
+ watchImpl = fn ?? fs.watch;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* Upper bound on simultaneously-watched directories on the Linux per-directory
|
|
|
* path. Each is one inotify watch; the kernel's `fs.inotify.max_user_watches`
|
|
|
@@ -98,6 +140,15 @@ export interface WatchOptions {
|
|
|
*/
|
|
|
onSyncError?: (error: Error) => void;
|
|
|
|
|
|
+ /**
|
|
|
+ * Callback fired ONCE when live watching degrades permanently and auto-sync
|
|
|
+ * is disabled — OS watch-resource exhaustion (EMFILE/ENFILE), or a write lock
|
|
|
+ * held past the retry budget. The string is an actionable, human-readable
|
|
|
+ * reason. Lets a host (MCP server, daemon, CLI) tell the user that the index
|
|
|
+ * will no longer auto-update instead of silently serving stale results.
|
|
|
+ */
|
|
|
+ onDegraded?: (reason: string) => void;
|
|
|
+
|
|
|
/**
|
|
|
* Test-only. When true, `start()` installs NO OS-level fs.watch — the
|
|
|
* watcher is "inert" and only the {@link __emitWatchEventForTests} /
|
|
|
@@ -164,6 +215,14 @@ export class FileWatcher {
|
|
|
private dirWatchers = new Map<string, fs.FSWatcher>();
|
|
|
/** Set once the per-directory watch cap is hit, so we log only once. */
|
|
|
private dirCapWarned = false;
|
|
|
+ /**
|
|
|
+ * One-way latch: the reason live watching was permanently disabled at runtime
|
|
|
+ * (watch-resource exhaustion, or lock contention past the retry budget), or
|
|
|
+ * null while healthy. Set by {@link degrade}; cleared only by a fresh start().
|
|
|
+ */
|
|
|
+ private degradedReason: string | null = null;
|
|
|
+ /** Consecutive lock-contention retries for watcher-triggered syncs. */
|
|
|
+ private lockRetryCount = 0;
|
|
|
/** Test-only inert mode: started, but with no OS watcher installed. */
|
|
|
private inert = false;
|
|
|
private debounceTimer: ReturnType<typeof setTimeout> | null = null;
|
|
|
@@ -211,6 +270,7 @@ export class FileWatcher {
|
|
|
private readonly syncFn: () => Promise<{ filesChanged: number; durationMs: number }>;
|
|
|
private readonly onSyncComplete?: WatchOptions['onSyncComplete'];
|
|
|
private readonly onSyncError?: WatchOptions['onSyncError'];
|
|
|
+ private readonly onDegraded?: WatchOptions['onDegraded'];
|
|
|
private readonly inertForTests: boolean;
|
|
|
|
|
|
constructor(
|
|
|
@@ -223,6 +283,7 @@ export class FileWatcher {
|
|
|
this.debounceMs = options.debounceMs ?? 2000;
|
|
|
this.onSyncComplete = options.onSyncComplete;
|
|
|
this.onSyncError = options.onSyncError;
|
|
|
+ this.onDegraded = options.onDegraded;
|
|
|
this.inertForTests = options.inertForTests ?? false;
|
|
|
}
|
|
|
|
|
|
@@ -233,6 +294,8 @@ export class FileWatcher {
|
|
|
start(): boolean {
|
|
|
if (this.recursiveWatcher || this.dirWatchers.size > 0 || this.inert) return true; // Already watching
|
|
|
this.stopped = false;
|
|
|
+ this.degradedReason = null;
|
|
|
+ this.lockRetryCount = 0;
|
|
|
|
|
|
// Some environments make filesystem watching unusable — most notably
|
|
|
// WSL2 /mnt/ drives, where the underlying fs.watch calls block long
|
|
|
@@ -257,6 +320,12 @@ export class FileWatcher {
|
|
|
this.startPerDirectory();
|
|
|
}
|
|
|
|
|
|
+ // The per-directory (Linux) path catches watch-resource exhaustion inside
|
|
|
+ // watchTree and degrades synchronously rather than throwing, so it never
|
|
|
+ // reaches the catch below. Surface that as a failed start here so both
|
|
|
+ // strategies report exhaustion identically (start() === false).
|
|
|
+ if (this.degradedReason) return false;
|
|
|
+
|
|
|
// No async crawl to wait on: as soon as the watch set is installed we
|
|
|
// have a clean baseline (pendingFiles is only populated by post-start
|
|
|
// events). Clear defensively and flip ready.
|
|
|
@@ -274,9 +343,16 @@ export class FileWatcher {
|
|
|
});
|
|
|
return true;
|
|
|
} catch (err) {
|
|
|
- // Watcher setup failed (e.g., permission denied, missing directory).
|
|
|
- logWarn('Could not start file watcher', { error: String(err) });
|
|
|
- this.stop();
|
|
|
+ // Watcher setup failed. Watch-resource exhaustion (EMFILE/ENFILE on the
|
|
|
+ // recursive path) is terminal — degrade cleanly with one actionable
|
|
|
+ // warning instead of leaving a half-broken watcher. Everything else
|
|
|
+ // (permission denied, missing directory) keeps the prior quiet-stop.
|
|
|
+ if (isWatchResourceExhaustion(err)) {
|
|
|
+ this.degrade(EXHAUSTION_REASON, { error: String(err) });
|
|
|
+ } else {
|
|
|
+ logWarn('Could not start file watcher', { error: String(err) });
|
|
|
+ this.stop();
|
|
|
+ }
|
|
|
return false;
|
|
|
}
|
|
|
}
|
|
|
@@ -287,7 +363,7 @@ export class FileWatcher {
|
|
|
* it maps straight to a project-relative path.
|
|
|
*/
|
|
|
private startRecursive(): void {
|
|
|
- this.recursiveWatcher = fs.watch(
|
|
|
+ this.recursiveWatcher = watchImpl(
|
|
|
this.projectRoot,
|
|
|
{ recursive: true, persistent: true },
|
|
|
(_event, filename) => {
|
|
|
@@ -296,6 +372,10 @@ export class FileWatcher {
|
|
|
}
|
|
|
);
|
|
|
this.recursiveWatcher.on('error', (err: unknown) => {
|
|
|
+ if (isWatchResourceExhaustion(err)) {
|
|
|
+ this.degrade(EXHAUSTION_REASON, { error: String(err) });
|
|
|
+ return;
|
|
|
+ }
|
|
|
logWarn('File watcher error', { error: String(err) });
|
|
|
});
|
|
|
}
|
|
|
@@ -319,6 +399,10 @@ export class FileWatcher {
|
|
|
* sync owns the baseline).
|
|
|
*/
|
|
|
private watchTree(dir: string, markExisting: boolean): void {
|
|
|
+ // A degrade() mid-walk (exhaustion on an earlier directory) calls stop(),
|
|
|
+ // which sets `stopped`; bail so the recursion unwinds without adding more
|
|
|
+ // watches to a watcher that is shutting down.
|
|
|
+ if (this.stopped || this.degradedReason) return;
|
|
|
if (this.dirWatchers.has(dir)) return;
|
|
|
if (this.dirWatchers.size >= maxDirWatches()) {
|
|
|
if (!this.dirCapWarned) {
|
|
|
@@ -332,14 +416,26 @@ export class FileWatcher {
|
|
|
|
|
|
let w: fs.FSWatcher;
|
|
|
try {
|
|
|
- w = fs.watch(dir, { persistent: true }, (_event, filename) =>
|
|
|
+ w = watchImpl(dir, { persistent: true }, (_event, filename) =>
|
|
|
this.handleDirEvent(dir, filename)
|
|
|
);
|
|
|
- } catch {
|
|
|
- // ENOENT / EACCES / too-many-open-files — skip this directory quietly.
|
|
|
+ } catch (err) {
|
|
|
+ // EMFILE/ENFILE means the PROCESS is out of descriptors — every further
|
|
|
+ // directory would fail too, so degrade the whole watcher rather than
|
|
|
+ // limping along with a partial watch set.
|
|
|
+ if (isWatchResourceExhaustion(err)) {
|
|
|
+ this.degrade(EXHAUSTION_REASON, { error: String(err), dir });
|
|
|
+ }
|
|
|
+ // ENOENT / EACCES on a single directory stays non-fatal: skip it quietly.
|
|
|
return;
|
|
|
}
|
|
|
- w.on('error', () => this.unwatchDir(dir));
|
|
|
+ w.on('error', (err: unknown) => {
|
|
|
+ if (isWatchResourceExhaustion(err)) {
|
|
|
+ this.degrade(EXHAUSTION_REASON, { error: String(err), dir });
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ this.unwatchDir(dir);
|
|
|
+ });
|
|
|
this.dirWatchers.set(dir, w);
|
|
|
|
|
|
let entries: fs.Dirent[];
|
|
|
@@ -450,6 +546,35 @@ export class FileWatcher {
|
|
|
return this.ignoreMatcher.ignores(rel + '/');
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Permanently disable live watching after a terminal runtime failure
|
|
|
+ * (watch-resource exhaustion, or lock contention past the retry budget).
|
|
|
+ * Idempotent: logs one actionable warning, fires {@link WatchOptions.onDegraded}
|
|
|
+ * once, and stops the watcher. A subsequent start() clears the latch.
|
|
|
+ */
|
|
|
+ private degrade(reason: string, context: Record<string, unknown> = {}): void {
|
|
|
+ if (this.degradedReason) return;
|
|
|
+ this.degradedReason = reason;
|
|
|
+ logWarn('File watcher disabled', { projectRoot: this.projectRoot, reason, ...context });
|
|
|
+ this.onDegraded?.(reason);
|
|
|
+ this.stop();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Whether live watching has degraded permanently (until the next start()).
|
|
|
+ * Distinct from {@link isActive}: a degraded watcher is inactive, but an
|
|
|
+ * inactive watcher is not necessarily degraded (it may simply be stopped or
|
|
|
+ * never started). Hosts use this to tell the user auto-sync is off.
|
|
|
+ */
|
|
|
+ isDegraded(): boolean {
|
|
|
+ return this.degradedReason !== null;
|
|
|
+ }
|
|
|
+
|
|
|
+ /** The reason live watching degraded, or null if it is healthy. */
|
|
|
+ getDegradedReason(): string | null {
|
|
|
+ return this.degradedReason;
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* Stop watching for file changes.
|
|
|
*/
|
|
|
@@ -478,6 +603,9 @@ export class FileWatcher {
|
|
|
}
|
|
|
this.dirWatchers.clear();
|
|
|
this.dirCapWarned = false;
|
|
|
+ this.lockRetryCount = 0;
|
|
|
+ // NB: degradedReason is intentionally NOT reset here — it must survive the
|
|
|
+ // stop() that degrade() triggers so isDegraded() stays true. start() clears it.
|
|
|
this.inert = false;
|
|
|
|
|
|
this.pendingFiles.clear();
|
|
|
@@ -528,7 +656,7 @@ export class FileWatcher {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Schedule a debounced sync.
|
|
|
+ * Schedule a normal debounced sync after a source edit.
|
|
|
*/
|
|
|
private scheduleSync(): void {
|
|
|
if (this.debounceTimer) {
|
|
|
@@ -540,6 +668,21 @@ export class FileWatcher {
|
|
|
}, this.debounceMs);
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Schedule a retry after a recoverable sync failure (lock contention). Kept
|
|
|
+ * separate from {@link scheduleSync} so prolonged contention backs off
|
|
|
+ * exponentially instead of hammering the lock every debounce cycle.
|
|
|
+ */
|
|
|
+ private scheduleRetrySync(delayMs: number): void {
|
|
|
+ if (this.debounceTimer) {
|
|
|
+ clearTimeout(this.debounceTimer);
|
|
|
+ }
|
|
|
+ this.debounceTimer = setTimeout(() => {
|
|
|
+ this.debounceTimer = null;
|
|
|
+ this.flush();
|
|
|
+ }, delayMs);
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* Flush pending changes by running sync.
|
|
|
*
|
|
|
@@ -561,6 +704,7 @@ export class FileWatcher {
|
|
|
|
|
|
try {
|
|
|
const result = await this.syncFn();
|
|
|
+ this.lockRetryCount = 0; // a clean sync clears any contention backoff
|
|
|
// Remove entries whose most recent event predates this sync — those
|
|
|
// edits are now in the DB. Entries with lastSeenMs > syncStartedMs
|
|
|
// arrived mid-sync; whether the in-flight sync captured them depends
|
|
|
@@ -576,13 +720,26 @@ export class FileWatcher {
|
|
|
this.onSyncComplete?.(result);
|
|
|
} catch (err) {
|
|
|
if (err instanceof LockUnavailableError) {
|
|
|
+ this.lockRetryCount += 1;
|
|
|
// Lock-failure no-op (another writer holds the lock). pendingFiles
|
|
|
- // stays intact and the `finally` block reschedules. Debug-only —
|
|
|
- // a long external index would otherwise spam stderr every cycle.
|
|
|
+ // stays intact and the `finally` block reschedules with backoff. Keep
|
|
|
+ // brief contention quiet (debug-only — a long external index would
|
|
|
+ // otherwise spam stderr every cycle), but stop retrying forever: once a
|
|
|
+ // writer holds the lock past the budget, degrade auto-sync explicitly.
|
|
|
logDebug('Watch sync skipped: file lock unavailable', {
|
|
|
pendingFiles: this.pendingFiles.size,
|
|
|
+ retryCount: this.lockRetryCount,
|
|
|
});
|
|
|
+ if (this.lockRetryCount > MAX_LOCK_RETRIES) {
|
|
|
+ this.degrade(
|
|
|
+ 'CodeGraph file lock held by another process past the retry budget; ' +
|
|
|
+ 'auto-sync disabled. Run `codegraph sync` once the other writer finishes ' +
|
|
|
+ '(or install git sync hooks) to refresh the graph.',
|
|
|
+ { pendingFiles: this.pendingFiles.size, retryCount: this.lockRetryCount }
|
|
|
+ );
|
|
|
+ }
|
|
|
} else {
|
|
|
+ this.lockRetryCount = 0; // a non-lock failure isn't contention; reset backoff
|
|
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
|
logWarn('Watch sync failed', { error: error.message });
|
|
|
this.onSyncError?.(error);
|
|
|
@@ -593,9 +750,21 @@ export class FileWatcher {
|
|
|
this.syncing = false;
|
|
|
|
|
|
// If pending files remain (mid-sync events, or this sync failed),
|
|
|
- // schedule another pass.
|
|
|
+ // schedule another pass. After lock contention, back off exponentially
|
|
|
+ // (debounceMs · 2^(n-1), capped) instead of retrying at the normal
|
|
|
+ // debounce cadence; a clean sync resets lockRetryCount so normal edits
|
|
|
+ // keep the fast debounce. A degrade() above already set `stopped`, so
|
|
|
+ // this won't reschedule a watcher that has given up.
|
|
|
if (this.pendingFiles.size > 0 && !this.stopped) {
|
|
|
- this.scheduleSync();
|
|
|
+ if (this.lockRetryCount > 0) {
|
|
|
+ const retryDelayMs = Math.min(
|
|
|
+ this.debounceMs * 2 ** Math.max(0, this.lockRetryCount - 1),
|
|
|
+ MAX_LOCK_RETRY_DELAY_MS
|
|
|
+ );
|
|
|
+ this.scheduleRetrySync(retryDelayMs);
|
|
|
+ } else {
|
|
|
+ this.scheduleSync();
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|