watcher.ts 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871
  1. /**
  2. * File Watcher
  3. *
  4. * Watches the project directory for file changes and triggers debounced sync
  5. * operations to keep the code graph up-to-date.
  6. *
  7. * Uses Node's built-in `fs.watch` directly (no third-party watcher, no native
  8. * addon) with a per-platform strategy chosen to keep the open-descriptor /
  9. * kernel-watch cost BOUNDED rather than growing with the number of files:
  10. *
  11. * - macOS / Windows: a SINGLE recursive `fs.watch(root, {recursive:true})`.
  12. * libuv maps this to one FSEvents stream (macOS) / one
  13. * ReadDirectoryChangesW handle (Windows), so it costs O(1) descriptors no
  14. * matter how large the tree. This is the fix for the macOS file-table
  15. * exhaustion (#644 / #496 / #555 / #628): the previous watcher held one
  16. * open fd PER WATCHED FILE on macOS (tens of thousands of REG fds), which
  17. * exhausted `kern.maxfiles` and crashed unrelated processes system-wide.
  18. *
  19. * - Linux: recursive `fs.watch` is unsupported, so we watch each (non-ignored)
  20. * DIRECTORY with one inotify watch — O(directories), NOT O(files). New
  21. * directories are picked up dynamically and an overall watch cap bounds
  22. * inotify usage on pathological monorepos (#579). A single inotify watch on
  23. * a directory already reports create/modify/delete for its children, so
  24. * per-file watches are never needed.
  25. *
  26. * Excluded trees (node_modules/, dist/, .git/, …) are filtered via the
  27. * indexer's `buildScopeIgnore` (built-in default-ignore dirs + the project's
  28. * .gitignore) — on Linux they're never descended into (so they cost no watch),
  29. * and on macOS/Windows the single recursive stream still covers them but their
  30. * events are dropped before any sync is scheduled. Either way the watcher's
  31. * scope matches the indexer's (#276 / #407).
  32. */
  33. import * as fs from 'fs';
  34. import * as path from 'path';
  35. import { isSourceFile, buildScopeIgnore, type ScopeIgnore } from '../extraction';
  36. import { loadExtensionOverrides } from '../project-config';
  37. import { logDebug, logWarn } from '../errors';
  38. import { normalizePath } from '../utils';
  39. import { isCodeGraphDataDir } from '../directory';
  40. import { watchDisabledReason } from './watch-policy';
  41. /**
  42. * Number of consecutive lock-contention retries the watcher tolerates before
  43. * it gives up and degrades auto-sync. Brief contention (another writer for a
  44. * few cycles) stays under this; a long-lived external writer crosses it.
  45. */
  46. const MAX_LOCK_RETRIES = 5;
  47. /** Cap on the exponential lock-retry backoff so it never sleeps absurdly long. */
  48. const MAX_LOCK_RETRY_DELAY_MS = 30_000;
  49. /** Actionable degrade message; both exhaustion paths share it verbatim. */
  50. const EXHAUSTION_REASON =
  51. 'OS watch/file limit exhausted; auto-sync disabled. Run `codegraph sync` ' +
  52. '(or install git sync hooks) to refresh the graph after changes.';
  53. /**
  54. * Actionable, NON-fatal warning for Linux inotify watch-count exhaustion.
  55. * Unlike {@link EXHAUSTION_REASON} this does not disable the watcher — the
  56. * watches already installed keep working — so it names the exact kernel knob to
  57. * raise instead.
  58. */
  59. const INOTIFY_LIMIT_REASON =
  60. 'Linux inotify watch limit reached (fs.inotify.max_user_watches); live ' +
  61. 'watching now covers only part of the project, so edits in unwatched ' +
  62. 'directories will not auto-sync. Raise the limit (e.g. `sudo sysctl ' +
  63. 'fs.inotify.max_user_watches=1048576`, persisted in /etc/sysctl.d) and ' +
  64. 'restart, or run `codegraph sync` (or install git sync hooks) to refresh.';
  65. /**
  66. * True when an error is OS watch/file-descriptor exhaustion (EMFILE/ENFILE).
  67. * Prefers the structured `err.code`; falls back to message matching ONLY when
  68. * no code is present (some platforms surface a bare Error from `fs.watch`).
  69. */
  70. function isWatchResourceExhaustion(err: unknown): boolean {
  71. const e = err as NodeJS.ErrnoException | undefined;
  72. if (e?.code === 'EMFILE' || e?.code === 'ENFILE') return true;
  73. if (!e?.code && e?.message) {
  74. return /EMFILE|ENFILE|too many open files/i.test(e.message);
  75. }
  76. return false;
  77. }
  78. /**
  79. * True when an error is Linux inotify *watch-count* exhaustion. `fs.watch`
  80. * surfaces a hit `fs.inotify.max_user_watches` as ENOSPC ("no space" = no watch
  81. * descriptors left, NOT disk space). This only arises on the Linux
  82. * per-directory path; it is non-fatal (raise the limit and partial watching
  83. * keeps working), so it warns rather than degrading.
  84. */
  85. function isInotifyWatchExhaustion(err: unknown): boolean {
  86. return (err as NodeJS.ErrnoException | undefined)?.code === 'ENOSPC';
  87. }
  88. /**
  89. * Native recursive `fs.watch` is only reliable on macOS and Windows; on Linux
  90. * (and AIX) it throws `ERR_FEATURE_UNAVAILABLE_ON_PLATFORM`. We branch on this
  91. * to pick the recursive vs per-directory strategy.
  92. */
  93. function supportsRecursiveWatch(): boolean {
  94. return process.platform === 'darwin' || process.platform === 'win32';
  95. }
  96. /**
  97. * Indirection over `fs.watch` so tests can inject a fake that throws or emits
  98. * `EMFILE`/`ENFILE` deterministically (real watch-resource exhaustion can't be
  99. * provoked reliably, and `fs.watch` is a non-configurable property so it can't
  100. * be spied). Production always uses the real `fs.watch`.
  101. */
  102. type WatchFn = typeof fs.watch;
  103. let watchImpl: WatchFn = fs.watch;
  104. /** @internal Test-only seam to inject a fake fs.watch implementation. */
  105. export function __setFsWatchForTests(fn: WatchFn | null): void {
  106. watchImpl = fn ?? fs.watch;
  107. }
  108. /**
  109. * Upper bound on simultaneously-watched directories on the Linux per-directory
  110. * path. Each is one inotify watch; the kernel's `fs.inotify.max_user_watches`
  111. * is the hard limit (commonly 8k–128k). We stop adding watches past this and
  112. * log once — partial live-watch (with `codegraph sync` as the backstop) is far
  113. * better than exhausting the user's inotify budget and breaking watching
  114. * system-wide (#579). Tunable via CODEGRAPH_MAX_DIR_WATCHES.
  115. */
  116. const DEFAULT_MAX_DIR_WATCHES = 50_000;
  117. function maxDirWatches(): number {
  118. const raw = process.env.CODEGRAPH_MAX_DIR_WATCHES;
  119. if (raw && /^\d+$/.test(raw)) {
  120. const n = Number(raw);
  121. if (n > 0) return n;
  122. }
  123. return DEFAULT_MAX_DIR_WATCHES;
  124. }
  125. /**
  126. * Test seam (see {@link __emitWatchEventForTests}). Maps a watcher's project
  127. * root to its live instance so tests can synthesize a change event
  128. * deterministically — real fs.watch delivery latency races under parallel
  129. * vitest (the reason the previous chokidar mock existed). Only populated under
  130. * a test runner, so production carries no bookkeeping or retained references.
  131. */
  132. const liveWatchersForTests = new Map<string, FileWatcher>();
  133. const IS_TEST_RUNTIME = !!(process.env.VITEST || process.env.NODE_ENV === 'test');
  134. /**
  135. * Options for the file watcher
  136. */
  137. export interface WatchOptions {
  138. /**
  139. * Debounce delay in milliseconds.
  140. * After the last file change, wait this long before triggering sync.
  141. * Default: 2000ms
  142. */
  143. debounceMs?: number;
  144. /**
  145. * Callback when a sync completes (for logging/diagnostics).
  146. */
  147. onSyncComplete?: (result: { filesChanged: number; durationMs: number }) => void;
  148. /**
  149. * Callback when a sync errors (for logging/diagnostics).
  150. */
  151. onSyncError?: (error: Error) => void;
  152. /**
  153. * Callback fired ONCE when live watching degrades permanently and auto-sync
  154. * is disabled — OS watch-resource exhaustion (EMFILE/ENFILE), or a write lock
  155. * held past the retry budget. The string is an actionable, human-readable
  156. * reason. Lets a host (MCP server, daemon, CLI) tell the user that the index
  157. * will no longer auto-update instead of silently serving stale results.
  158. */
  159. onDegraded?: (reason: string) => void;
  160. /**
  161. * Test-only. When true, `start()` installs NO OS-level fs.watch — the
  162. * watcher is "inert" and only the {@link __emitWatchEventForTests} /
  163. * {@link FileWatcher.ingestEventForTests} seam drives its pipeline. This
  164. * restores the deterministic, OS-free behavior the unit tests need (real
  165. * FSEvents/inotify delivery races under parallel vitest). Production never
  166. * sets it.
  167. */
  168. inertForTests?: boolean;
  169. }
  170. /**
  171. * Thrown by a `syncFn` to signal that the underlying sync couldn't acquire
  172. * the cross-process write lock (#449). The watcher treats this as "no
  173. * progress" — preserves `pendingFiles`, skips `onSyncComplete`, and the
  174. * `finally` block reschedules. Quiet (debug-only) because a long-running
  175. * external indexer can hit this every debounce cycle.
  176. */
  177. export class LockUnavailableError extends Error {
  178. constructor(message = 'CodeGraph file lock unavailable; another process is writing') {
  179. super(message);
  180. this.name = 'LockUnavailableError';
  181. }
  182. }
  183. /**
  184. * Per-file pending entry — tracks a source file the watcher saw an event for
  185. * but hasn't yet synced into the index. Exposed via {@link FileWatcher.getPendingFiles}
  186. * so MCP tool responses can mark stale results without forcing a wait.
  187. */
  188. export interface PendingFile {
  189. /** Project-relative POSIX path (e.g. "src/foo.ts"). */
  190. path: string;
  191. /** Wall-clock ms at the first event we saw for this path since the last sync. */
  192. firstSeenMs: number;
  193. /** Wall-clock ms at the most recent event we saw for this path. */
  194. lastSeenMs: number;
  195. /**
  196. * True when a sync is currently in flight that began AFTER this file's most
  197. * recent event — i.e. the next successful sync will pick it up. False when
  198. * the file is still in the debounce window (no sync running yet).
  199. */
  200. indexing: boolean;
  201. }
  202. /**
  203. * FileWatcher monitors a project directory for changes and triggers
  204. * debounced sync operations via a provided callback.
  205. *
  206. * Design goals:
  207. * - Bounded resource usage: O(1) descriptors on macOS/Windows (one recursive
  208. * watch), O(directories) inotify watches on Linux — never O(files), which
  209. * was the system-crashing fd leak on macOS (#644/#496/#555/#628).
  210. * - Debounced to avoid thrashing on rapid saves
  211. * - Filters to supported source files by extension
  212. * - Ignores .codegraph/ and .git/ regardless of .gitignore
  213. * - Tracks per-file pending state so MCP tools can flag stale results
  214. * without blocking on a sync (issue #403)
  215. */
  216. export class FileWatcher {
  217. /** macOS/Windows: the single recursive watcher. Null on Linux. */
  218. private recursiveWatcher: fs.FSWatcher | null = null;
  219. /** Linux: one watcher per watched directory (keyed by absolute path). */
  220. private dirWatchers = new Map<string, fs.FSWatcher>();
  221. /** Set once the per-directory watch cap is hit, so we log only once. */
  222. private dirCapWarned = false;
  223. /**
  224. * Set once the Linux inotify watch limit (ENOSPC) is hit. Double duty: we
  225. * warn only once, AND we stop attempting new directory watches for the rest
  226. * of the session — once the kernel budget is exhausted every further
  227. * `inotify_add_watch` fails too, so trying the rest of the tree is pure
  228. * waste. NON-fatal (does not degrade): installed watches keep working.
  229. */
  230. private inotifyLimitWarned = false;
  231. /**
  232. * One-way latch: the reason live watching was permanently disabled at runtime
  233. * (watch-resource exhaustion, or lock contention past the retry budget), or
  234. * null while healthy. Set by {@link degrade}; cleared only by a fresh start().
  235. */
  236. private degradedReason: string | null = null;
  237. /** Consecutive lock-contention retries for watcher-triggered syncs. */
  238. private lockRetryCount = 0;
  239. /** Test-only inert mode: started, but with no OS watcher installed. */
  240. private inert = false;
  241. private debounceTimer: ReturnType<typeof setTimeout> | null = null;
  242. /**
  243. * Files seen by the watcher since the last successful sync — populated on
  244. * every change event, cleared at the start of a sync, and re-populated by
  245. * events that arrive mid-sync (or restored on sync failure). Keyed by the
  246. * same project-relative POSIX path the rest of the codebase uses, so a
  247. * caller can intersect tool-response file paths against this map cheaply.
  248. */
  249. private pendingFiles = new Map<string, { firstSeenMs: number; lastSeenMs: number }>();
  250. /**
  251. * Wall-clock ms at which the in-flight sync began. Combined with
  252. * {@link pendingFiles}'s `lastSeenMs`, this distinguishes "still in the
  253. * debounce window" (lastSeen > syncStarted, sync hasn't started yet for
  254. * this edit) from "currently being indexed" (lastSeen <= syncStarted).
  255. */
  256. private syncStartedMs = 0;
  257. private syncing = false;
  258. private stopped = false;
  259. /**
  260. * True once the initial watch set is established. Unlike the previous
  261. * chokidar implementation there is no asynchronous initial "crawl" emitting
  262. * an `add` per existing file — `fs.watch` only reports changes from the
  263. * moment it's installed — so this flips to true synchronously at the end of
  264. * `start()`. The startup reconcile against on-disk state is handled
  265. * separately by the engine's catch-up sync, not by the watcher.
  266. */
  267. private ready = false;
  268. /**
  269. * Callbacks that resolve when the watch set is established. Used by tests
  270. * (and any production caller that cares about a clean baseline) to
  271. * deterministically gate on watcher readiness.
  272. */
  273. private readyWaiters: Array<() => void> = [];
  274. // The shared scope matcher (built-in defaults + project .gitignore, with
  275. // embedded child repos matched by their OWN rules — #514), built once at
  276. // start(). Same source of truth the indexer uses, so watcher scope can
  277. // never diverge from index scope. An embedded repo created after start()
  278. // joins the scope on the next watcher restart / re-index.
  279. private ignoreMatcher: ScopeIgnore | null = null;
  280. private readonly projectRoot: string;
  281. private readonly debounceMs: number;
  282. private readonly syncFn: () => Promise<{ filesChanged: number; durationMs: number }>;
  283. private readonly onSyncComplete?: WatchOptions['onSyncComplete'];
  284. private readonly onSyncError?: WatchOptions['onSyncError'];
  285. private readonly onDegraded?: WatchOptions['onDegraded'];
  286. private readonly inertForTests: boolean;
  287. constructor(
  288. projectRoot: string,
  289. syncFn: () => Promise<{ filesChanged: number; durationMs: number }>,
  290. options: WatchOptions = {}
  291. ) {
  292. this.projectRoot = projectRoot;
  293. this.syncFn = syncFn;
  294. this.debounceMs = options.debounceMs ?? 2000;
  295. this.onSyncComplete = options.onSyncComplete;
  296. this.onSyncError = options.onSyncError;
  297. this.onDegraded = options.onDegraded;
  298. this.inertForTests = options.inertForTests ?? false;
  299. }
  300. /**
  301. * Start watching for file changes.
  302. * Returns true if watching started successfully, false otherwise.
  303. */
  304. start(): boolean {
  305. if (this.recursiveWatcher || this.dirWatchers.size > 0 || this.inert) return true; // Already watching
  306. this.stopped = false;
  307. this.degradedReason = null;
  308. this.lockRetryCount = 0;
  309. // Some environments make filesystem watching unusable — most notably
  310. // WSL2 /mnt/ drives, where the underlying fs.watch calls block long
  311. // enough to break MCP startup handshakes (issue #199). Skip watching
  312. // there; callers fall back to manual `codegraph sync` or git sync hooks.
  313. const disabledReason = watchDisabledReason(this.projectRoot);
  314. if (disabledReason) {
  315. logDebug('File watcher disabled', { reason: disabledReason, projectRoot: this.projectRoot });
  316. return false;
  317. }
  318. // Reuse the indexer's ignore set so the watcher and indexer agree on scope.
  319. this.ignoreMatcher = buildScopeIgnore(this.projectRoot);
  320. try {
  321. if (this.inertForTests) {
  322. // Test-only: install no OS watcher; the seam drives events instead.
  323. this.inert = true;
  324. } else if (supportsRecursiveWatch()) {
  325. this.startRecursive();
  326. } else {
  327. this.startPerDirectory();
  328. }
  329. // The per-directory (Linux) path catches watch-resource exhaustion inside
  330. // watchTree and degrades synchronously rather than throwing, so it never
  331. // reaches the catch below. Surface that as a failed start here so both
  332. // strategies report exhaustion identically (start() === false).
  333. if (this.degradedReason) return false;
  334. // No async crawl to wait on: as soon as the watch set is installed we
  335. // have a clean baseline (pendingFiles is only populated by post-start
  336. // events). Clear defensively and flip ready.
  337. this.pendingFiles.clear();
  338. this.ready = true;
  339. for (const cb of this.readyWaiters) cb();
  340. this.readyWaiters.length = 0;
  341. if (IS_TEST_RUNTIME) liveWatchersForTests.set(this.projectRoot, this);
  342. logDebug('File watcher started', {
  343. projectRoot: this.projectRoot,
  344. debounceMs: this.debounceMs,
  345. mode: this.inertForTests ? 'inert' : supportsRecursiveWatch() ? 'recursive' : 'per-directory',
  346. watchedDirs: this.dirWatchers.size || undefined,
  347. });
  348. return true;
  349. } catch (err) {
  350. // Watcher setup failed. Watch-resource exhaustion (EMFILE/ENFILE on the
  351. // recursive path) is terminal — degrade cleanly with one actionable
  352. // warning instead of leaving a half-broken watcher. Everything else
  353. // (permission denied, missing directory) keeps the prior quiet-stop.
  354. if (isWatchResourceExhaustion(err)) {
  355. this.degrade(EXHAUSTION_REASON, { error: String(err) });
  356. } else {
  357. logWarn('Could not start file watcher', { error: String(err) });
  358. this.stop();
  359. }
  360. return false;
  361. }
  362. }
  363. /**
  364. * macOS/Windows: one recursive watcher for the whole tree. O(1) descriptors.
  365. * `filename` arrives relative to the project root (with subdirectories), so
  366. * it maps straight to a project-relative path.
  367. */
  368. private startRecursive(): void {
  369. this.recursiveWatcher = watchImpl(
  370. this.projectRoot,
  371. { recursive: true, persistent: true },
  372. (_event, filename) => {
  373. if (this.stopped || filename == null) return;
  374. this.handleChange(normalizePath(String(filename)));
  375. }
  376. );
  377. this.recursiveWatcher.on('error', (err: unknown) => {
  378. if (isWatchResourceExhaustion(err)) {
  379. this.degrade(EXHAUSTION_REASON, { error: String(err) });
  380. return;
  381. }
  382. logWarn('File watcher error', { error: String(err) });
  383. });
  384. }
  385. /**
  386. * Linux: walk the (non-ignored) tree and watch each directory. One inotify
  387. * watch per directory reports create/modify/delete for that directory's
  388. * direct children, so we never watch individual files.
  389. */
  390. private startPerDirectory(): void {
  391. this.watchTree(this.projectRoot, /* markExisting */ false);
  392. }
  393. /**
  394. * Add an inotify watch for `dir` and recurse into its non-ignored
  395. * subdirectories. When `markExisting` is true (a directory that appeared
  396. * AFTER startup), the source files already inside it are recorded as pending
  397. * — this closes the `mkdir + write` race where files created before the new
  398. * directory's watch is installed would otherwise be missed until the next
  399. * full sync. The initial startup walk passes false (the engine's catch-up
  400. * sync owns the baseline).
  401. */
  402. private watchTree(dir: string, markExisting: boolean): void {
  403. // A degrade() mid-walk (exhaustion on an earlier directory) calls stop(),
  404. // which sets `stopped`; bail so the recursion unwinds without adding more
  405. // watches to a watcher that is shutting down. `inotifyLimitWarned` does the
  406. // same after ENOSPC — the kernel budget is gone, so stop trying the rest of
  407. // the tree (every add would fail) while keeping the watches already set.
  408. if (this.stopped || this.degradedReason || this.inotifyLimitWarned) return;
  409. if (this.dirWatchers.has(dir)) return;
  410. if (this.dirWatchers.size >= maxDirWatches()) {
  411. if (!this.dirCapWarned) {
  412. this.dirCapWarned = true;
  413. logWarn('File watcher hit directory-watch cap; remaining subtrees rely on manual/periodic sync', {
  414. cap: maxDirWatches(),
  415. });
  416. }
  417. return;
  418. }
  419. let w: fs.FSWatcher;
  420. try {
  421. w = watchImpl(dir, { persistent: true }, (_event, filename) =>
  422. this.handleDirEvent(dir, filename)
  423. );
  424. } catch (err) {
  425. // EMFILE/ENFILE means the PROCESS is out of descriptors — every further
  426. // directory would fail too, so degrade the whole watcher rather than
  427. // limping along with a partial watch set.
  428. if (isWatchResourceExhaustion(err)) {
  429. this.degrade(EXHAUSTION_REASON, { error: String(err), dir });
  430. } else if (isInotifyWatchExhaustion(err)) {
  431. // ENOSPC = inotify watch budget exhausted. NON-fatal: keep the watches
  432. // we have and tell the user the knob to raise (warn once).
  433. this.warnInotifyLimit({ error: String(err), dir });
  434. }
  435. // ENOENT / EACCES on a single directory stays non-fatal: skip it quietly.
  436. return;
  437. }
  438. w.on('error', (err: unknown) => {
  439. if (isWatchResourceExhaustion(err)) {
  440. this.degrade(EXHAUSTION_REASON, { error: String(err), dir });
  441. return;
  442. }
  443. if (isInotifyWatchExhaustion(err)) {
  444. this.warnInotifyLimit({ error: String(err), dir });
  445. }
  446. this.unwatchDir(dir);
  447. });
  448. this.dirWatchers.set(dir, w);
  449. let entries: fs.Dirent[];
  450. try {
  451. entries = fs.readdirSync(dir, { withFileTypes: true });
  452. } catch {
  453. return;
  454. }
  455. for (const entry of entries) {
  456. const child = path.join(dir, entry.name);
  457. if (entry.isDirectory()) {
  458. if (this.shouldIgnoreDir(child)) continue;
  459. this.watchTree(child, markExisting);
  460. } else if (markExisting && entry.isFile()) {
  461. this.handleChange(normalizePath(path.relative(this.projectRoot, child)));
  462. }
  463. }
  464. }
  465. /**
  466. * Linux per-directory event handler. `filename` is relative to `dir`. A new
  467. * sub-directory is picked up by extending the watch tree; everything else is
  468. * routed through the shared change handler.
  469. */
  470. private handleDirEvent(dir: string, filename: string | Buffer | null): void {
  471. if (this.stopped || filename == null) return;
  472. const full = path.join(dir, String(filename));
  473. // A newly-created directory needs its own watch (recursive isn't available
  474. // on Linux). statSync is cheap and these events are rare relative to file
  475. // edits. If the path vanished (rapid create/delete) the stat throws and we
  476. // fall through to the change handler, which no-ops on a non-source path.
  477. try {
  478. if (fs.statSync(full).isDirectory()) {
  479. if (!this.shouldIgnoreDir(full)) this.watchTree(full, /* markExisting */ true);
  480. return;
  481. }
  482. } catch {
  483. // deleted/inaccessible — treat as a normal change below
  484. }
  485. this.handleChange(normalizePath(path.relative(this.projectRoot, full)));
  486. }
  487. /**
  488. * Shared change handler for both watch strategies. `rel` is a
  489. * project-relative POSIX path. Applies the ignore + source-file filters and,
  490. * for a real source change, records it as pending (#403) and schedules a
  491. * debounced sync.
  492. *
  493. * The recursive (macOS/Windows) watcher reports events for ignored trees too
  494. * (one stream covers the whole repo), so the ignore check here is load-bearing
  495. * — it drops node_modules/dist/.git churn before any sync is scheduled.
  496. */
  497. private handleChange(rel: string): void {
  498. if (!rel || rel === '.' || rel.startsWith('..')) return;
  499. if (this.isAlwaysIgnored(rel)) return;
  500. if (this.ignoreMatcher && this.ignoreMatcher.ignores(rel)) return;
  501. if (!isSourceFile(rel, loadExtensionOverrides(this.projectRoot))) return;
  502. logDebug('File change detected', { file: rel });
  503. if (this.ready) {
  504. const now = Date.now();
  505. const existing = this.pendingFiles.get(rel);
  506. this.pendingFiles.set(rel, {
  507. firstSeenMs: existing?.firstSeenMs ?? now,
  508. lastSeenMs: now,
  509. });
  510. }
  511. this.scheduleSync();
  512. }
  513. /** Close and forget the watch for a directory that errored/was removed. */
  514. private unwatchDir(dir: string): void {
  515. const w = this.dirWatchers.get(dir);
  516. if (w) {
  517. try {
  518. w.close();
  519. } catch {
  520. /* already closed */
  521. }
  522. this.dirWatchers.delete(dir);
  523. }
  524. }
  525. /** Our own dirs are always ignored, regardless of .gitignore. */
  526. private isAlwaysIgnored(rel: string): boolean {
  527. // First path segment. Ignore any CodeGraph data dir — the active one AND a
  528. // sibling like `.codegraph-win` a second environment (Windows/WSL) created
  529. // in the same tree, so neither side watches the other's index (#636).
  530. const top = rel.split('/')[0] ?? rel;
  531. return (
  532. isCodeGraphDataDir(top) ||
  533. rel === '.git' || rel.startsWith('.git/')
  534. );
  535. }
  536. /**
  537. * True for any directory that should NOT be watched (used while building the
  538. * Linux per-directory watch tree). Tests the directory form of the path so a
  539. * dir-only ignore rule like `build/` matches.
  540. */
  541. private shouldIgnoreDir(dirPath: string): boolean {
  542. const rel = normalizePath(path.relative(this.projectRoot, dirPath));
  543. if (!rel || rel === '.' || rel.startsWith('..')) return false; // root / outside
  544. if (this.isAlwaysIgnored(rel)) return true;
  545. if (!this.ignoreMatcher) return false;
  546. return this.ignoreMatcher.ignores(rel + '/');
  547. }
  548. /**
  549. * Permanently disable live watching after a terminal runtime failure
  550. * (watch-resource exhaustion, or lock contention past the retry budget).
  551. * Idempotent: logs one actionable warning, fires {@link WatchOptions.onDegraded}
  552. * once, and stops the watcher. A subsequent start() clears the latch.
  553. */
  554. private degrade(reason: string, context: Record<string, unknown> = {}): void {
  555. if (this.degradedReason) return;
  556. this.degradedReason = reason;
  557. logWarn('File watcher disabled', { projectRoot: this.projectRoot, reason, ...context });
  558. this.onDegraded?.(reason);
  559. this.stop();
  560. }
  561. /**
  562. * Warn ONCE that the Linux inotify watch budget is exhausted (ENOSPC), and
  563. * stop adding new watches for the rest of this session — every further
  564. * `inotify_add_watch` would fail too, so walking the rest of the tree is
  565. * waste. Unlike {@link degrade} this is NON-fatal: the watches already
  566. * installed keep firing, and `codegraph sync` covers the unwatched remainder.
  567. * The message names the kernel knob to raise (`fs.inotify.max_user_watches`).
  568. */
  569. private warnInotifyLimit(context: Record<string, unknown> = {}): void {
  570. if (this.inotifyLimitWarned) return;
  571. this.inotifyLimitWarned = true;
  572. logWarn(INOTIFY_LIMIT_REASON, { watchedDirs: this.dirWatchers.size, ...context });
  573. }
  574. /**
  575. * Whether live watching has degraded permanently (until the next start()).
  576. * Distinct from {@link isActive}: a degraded watcher is inactive, but an
  577. * inactive watcher is not necessarily degraded (it may simply be stopped or
  578. * never started). Hosts use this to tell the user auto-sync is off.
  579. */
  580. isDegraded(): boolean {
  581. return this.degradedReason !== null;
  582. }
  583. /** The reason live watching degraded, or null if it is healthy. */
  584. getDegradedReason(): string | null {
  585. return this.degradedReason;
  586. }
  587. /**
  588. * Stop watching for file changes.
  589. */
  590. stop(): void {
  591. this.stopped = true;
  592. if (this.debounceTimer) {
  593. clearTimeout(this.debounceTimer);
  594. this.debounceTimer = null;
  595. }
  596. if (this.recursiveWatcher) {
  597. try {
  598. this.recursiveWatcher.close();
  599. } catch {
  600. /* already closed */
  601. }
  602. this.recursiveWatcher = null;
  603. }
  604. for (const w of this.dirWatchers.values()) {
  605. try {
  606. w.close();
  607. } catch {
  608. /* already closed */
  609. }
  610. }
  611. this.dirWatchers.clear();
  612. this.dirCapWarned = false;
  613. this.inotifyLimitWarned = false;
  614. this.lockRetryCount = 0;
  615. // NB: degradedReason is intentionally NOT reset here — it must survive the
  616. // stop() that degrade() triggers so isDegraded() stays true. start() clears it.
  617. this.inert = false;
  618. this.pendingFiles.clear();
  619. this.ready = false;
  620. this.ignoreMatcher = null;
  621. if (IS_TEST_RUNTIME) liveWatchersForTests.delete(this.projectRoot);
  622. logDebug('File watcher stopped');
  623. }
  624. /**
  625. * @internal Test-only: feed a synthetic project-relative change through the
  626. * same filter → pendingFiles → debounced-sync path a real fs.watch event
  627. * takes. Lets the watcher / staleness-banner suites stay deterministic
  628. * instead of racing on OS watch-delivery latency. See
  629. * {@link __emitWatchEventForTests}.
  630. */
  631. ingestEventForTests(relPath: string): void {
  632. this.handleChange(normalizePath(relPath));
  633. }
  634. /**
  635. * Whether the watcher is currently active.
  636. */
  637. isActive(): boolean {
  638. return (this.recursiveWatcher !== null || this.dirWatchers.size > 0 || this.inert) && !this.stopped;
  639. }
  640. /**
  641. * Resolves once the watch set has been installed (or immediately if it
  642. * already has). Useful for tests that need a deterministic boundary before
  643. * asserting on `pendingFiles`.
  644. *
  645. * Production callers don't need this: `pendingFiles` is read continuously,
  646. * the staleness banner is always correct (empty or populated), and there is
  647. * no asynchronous initial-scan window with `fs.watch`.
  648. */
  649. waitUntilReady(timeoutMs = 10000): Promise<void> {
  650. if (this.ready) return Promise.resolve();
  651. return new Promise((resolve, reject) => {
  652. const t = setTimeout(() => {
  653. const idx = this.readyWaiters.indexOf(handler);
  654. if (idx >= 0) this.readyWaiters.splice(idx, 1);
  655. reject(new Error(`FileWatcher.waitUntilReady timed out after ${timeoutMs}ms`));
  656. }, timeoutMs);
  657. const handler = () => { clearTimeout(t); resolve(); };
  658. this.readyWaiters.push(handler);
  659. });
  660. }
  661. /**
  662. * Schedule a normal debounced sync after a source edit.
  663. */
  664. private scheduleSync(): void {
  665. if (this.debounceTimer) {
  666. clearTimeout(this.debounceTimer);
  667. }
  668. this.debounceTimer = setTimeout(() => {
  669. this.debounceTimer = null;
  670. this.flush();
  671. }, this.debounceMs);
  672. }
  673. /**
  674. * Schedule a retry after a recoverable sync failure (lock contention). Kept
  675. * separate from {@link scheduleSync} so prolonged contention backs off
  676. * exponentially instead of hammering the lock every debounce cycle.
  677. */
  678. private scheduleRetrySync(delayMs: number): void {
  679. if (this.debounceTimer) {
  680. clearTimeout(this.debounceTimer);
  681. }
  682. this.debounceTimer = setTimeout(() => {
  683. this.debounceTimer = null;
  684. this.flush();
  685. }, delayMs);
  686. }
  687. /**
  688. * Flush pending changes by running sync.
  689. *
  690. * pendingFiles is NOT cleared at the start of sync — entries are removed
  691. * only after sync commits successfully, and only for entries whose
  692. * lastSeenMs <= syncStartedMs. That way, a query that arrives mid-sync
  693. * still sees the affected files marked stale (the DB hasn't been updated
  694. * yet), and an event that lands mid-sync persists into the follow-up.
  695. *
  696. * On sync failure pendingFiles is left untouched — every edit is still
  697. * unindexed, and the rescheduled sync will absorb the same set next time.
  698. */
  699. private async flush(): Promise<void> {
  700. // If already syncing, the post-sync check will re-trigger
  701. if (this.syncing || this.stopped) return;
  702. this.syncStartedMs = Date.now();
  703. this.syncing = true;
  704. try {
  705. const result = await this.syncFn();
  706. this.lockRetryCount = 0; // a clean sync clears any contention backoff
  707. // Remove entries whose most recent event predates this sync — those
  708. // edits are now in the DB. Entries with lastSeenMs > syncStartedMs
  709. // arrived mid-sync; whether the in-flight sync captured them depends
  710. // on when sync read that file, so we keep them as pending and let
  711. // the follow-up sync handle them. We prefer false positives ("shown
  712. // stale, actually fresh" → at worst one extra Read) over false
  713. // negatives ("shown fresh, actually stale" → misleads the agent).
  714. for (const [filePath, info] of this.pendingFiles) {
  715. if (info.lastSeenMs <= this.syncStartedMs) {
  716. this.pendingFiles.delete(filePath);
  717. }
  718. }
  719. this.onSyncComplete?.(result);
  720. } catch (err) {
  721. if (err instanceof LockUnavailableError) {
  722. this.lockRetryCount += 1;
  723. // Lock-failure no-op (another writer holds the lock). pendingFiles
  724. // stays intact and the `finally` block reschedules with backoff. Keep
  725. // brief contention quiet (debug-only — a long external index would
  726. // otherwise spam stderr every cycle), but stop retrying forever: once a
  727. // writer holds the lock past the budget, degrade auto-sync explicitly.
  728. logDebug('Watch sync skipped: file lock unavailable', {
  729. pendingFiles: this.pendingFiles.size,
  730. retryCount: this.lockRetryCount,
  731. });
  732. if (this.lockRetryCount > MAX_LOCK_RETRIES) {
  733. this.degrade(
  734. 'CodeGraph file lock held by another process past the retry budget; ' +
  735. 'auto-sync disabled. Run `codegraph sync` once the other writer finishes ' +
  736. '(or install git sync hooks) to refresh the graph.',
  737. { pendingFiles: this.pendingFiles.size, retryCount: this.lockRetryCount }
  738. );
  739. }
  740. } else {
  741. this.lockRetryCount = 0; // a non-lock failure isn't contention; reset backoff
  742. const error = err instanceof Error ? err : new Error(String(err));
  743. logWarn('Watch sync failed', { error: error.message });
  744. this.onSyncError?.(error);
  745. }
  746. // Failure: leave pendingFiles untouched. Every edit it tracks is
  747. // still unindexed; the rescheduled sync sees the same set.
  748. } finally {
  749. this.syncing = false;
  750. // If pending files remain (mid-sync events, or this sync failed),
  751. // schedule another pass. After lock contention, back off exponentially
  752. // (debounceMs · 2^(n-1), capped) instead of retrying at the normal
  753. // debounce cadence; a clean sync resets lockRetryCount so normal edits
  754. // keep the fast debounce. A degrade() above already set `stopped`, so
  755. // this won't reschedule a watcher that has given up.
  756. if (this.pendingFiles.size > 0 && !this.stopped) {
  757. if (this.lockRetryCount > 0) {
  758. const retryDelayMs = Math.min(
  759. this.debounceMs * 2 ** Math.max(0, this.lockRetryCount - 1),
  760. MAX_LOCK_RETRY_DELAY_MS
  761. );
  762. this.scheduleRetrySync(retryDelayMs);
  763. } else {
  764. this.scheduleSync();
  765. }
  766. }
  767. }
  768. }
  769. /**
  770. * Snapshot of files seen by the watcher since the last successful sync.
  771. *
  772. * Used by MCP tool responses to mark stale results without blocking on a
  773. * sync: a tool that returns a hit in `src/foo.ts` while `src/foo.ts` is in
  774. * this list tells the agent "Read this file directly, the index lags."
  775. *
  776. * `indexing` is true when a sync is currently in flight whose start time is
  777. * AFTER this file's most recent event — i.e. that sync will absorb the
  778. * edit. False means the file is still inside the debounce window and no
  779. * sync has started yet (a follow-up call a few hundred ms later may show
  780. * `indexing: true` or the file may have left the list entirely).
  781. *
  782. * Cheap: O(pendingFiles.size), no I/O, no locks.
  783. */
  784. getPendingFiles(): PendingFile[] {
  785. const result: PendingFile[] = [];
  786. for (const [filePath, info] of this.pendingFiles) {
  787. result.push({
  788. path: filePath,
  789. firstSeenMs: info.firstSeenMs,
  790. lastSeenMs: info.lastSeenMs,
  791. indexing: this.syncing && this.syncStartedMs >= info.lastSeenMs,
  792. });
  793. }
  794. return result;
  795. }
  796. }
  797. /**
  798. * Test-only: synthesize a source-file change for the live watcher running at
  799. * `projectRoot`, exercising the real filter → pendingFiles → debounced-sync
  800. * logic without depending on fs.watch delivery timing (which races under
  801. * parallel vitest). `relPath` is project-relative POSIX (e.g. "src/foo.ts").
  802. * Returns false if no live watcher is registered for that root (e.g. outside a
  803. * test runtime, where the registry is intentionally not populated).
  804. */
  805. export function __emitWatchEventForTests(projectRoot: string, relPath: string): boolean {
  806. const w = liveWatchersForTests.get(projectRoot);
  807. if (!w) return false;
  808. w.ingestEventForTests(relPath);
  809. return true;
  810. }