watcher.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. /**
  2. * File Watcher
  3. *
  4. * Watches the project directory for file changes and triggers debounced sync
  5. * operations to keep the code graph up-to-date.
  6. *
  7. * Uses chokidar, whose `ignored` callback filters directories BEFORE they are
  8. * watched — so we never register inotify watches on excluded trees like
  9. * node_modules/, dist/, .git/ (fixes #276: recursive fs.watch exhausted the
  10. * kernel watch budget on large repos). The ignore decision reuses the indexer's
  11. * `buildDefaultIgnore` (built-in default-ignore dirs + the project's .gitignore)
  12. * so the watcher watches exactly the set the indexer indexes — in particular,
  13. * node_modules/build/cache dirs are excluded even when the repo has no
  14. * .gitignore (#407), which a .gitignore-only filter would miss.
  15. */
  16. import * as path from 'path';
  17. import type { Stats } from 'fs';
  18. import chokidar, { FSWatcher } from 'chokidar';
  19. import type { Ignore } from 'ignore';
  20. import { isSourceFile, buildDefaultIgnore } from '../extraction';
  21. import { logDebug, logWarn } from '../errors';
  22. import { normalizePath } from '../utils';
  23. import { watchDisabledReason } from './watch-policy';
  24. /**
  25. * Options for the file watcher
  26. */
  27. export interface WatchOptions {
  28. /**
  29. * Debounce delay in milliseconds.
  30. * After the last file change, wait this long before triggering sync.
  31. * Default: 2000ms
  32. */
  33. debounceMs?: number;
  34. /**
  35. * Callback when a sync completes (for logging/diagnostics).
  36. */
  37. onSyncComplete?: (result: { filesChanged: number; durationMs: number }) => void;
  38. /**
  39. * Callback when a sync errors (for logging/diagnostics).
  40. */
  41. onSyncError?: (error: Error) => void;
  42. }
  43. /**
  44. * Per-file pending entry — tracks a source file the watcher saw an event for
  45. * but hasn't yet synced into the index. Exposed via {@link FileWatcher.getPendingFiles}
  46. * so MCP tool responses can mark stale results without forcing a wait.
  47. */
  48. export interface PendingFile {
  49. /** Project-relative POSIX path (e.g. "src/foo.ts"). */
  50. path: string;
  51. /** Wall-clock ms at the first event we saw for this path since the last sync. */
  52. firstSeenMs: number;
  53. /** Wall-clock ms at the most recent event we saw for this path. */
  54. lastSeenMs: number;
  55. /**
  56. * True when a sync is currently in flight that began AFTER this file's most
  57. * recent event — i.e. the next successful sync will pick it up. False when
  58. * the file is still in the debounce window (no sync running yet).
  59. */
  60. indexing: boolean;
  61. }
  62. /**
  63. * FileWatcher monitors a project directory for changes and triggers
  64. * debounced sync operations via a provided callback.
  65. *
  66. * Design goals:
  67. * - Minimal resource usage (chokidar filters excluded directories before
  68. * registering an inotify watch — see module docs / #276)
  69. * - Debounced to avoid thrashing on rapid saves
  70. * - Filters to supported source files by extension
  71. * - Ignores .codegraph/ and .git/ regardless of .gitignore
  72. * - Tracks per-file pending state so MCP tools can flag stale results
  73. * without blocking on a sync (issue #403)
  74. */
  75. export class FileWatcher {
  76. private watcher: FSWatcher | null = null;
  77. private debounceTimer: ReturnType<typeof setTimeout> | null = null;
  78. /**
  79. * Files seen by the watcher since the last successful sync — populated on
  80. * every chokidar event, cleared at the start of a sync, and re-populated by
  81. * events that arrive mid-sync (or restored on sync failure). Keyed by the
  82. * same project-relative POSIX path the rest of the codebase uses, so a
  83. * caller can intersect tool-response file paths against this map cheaply.
  84. */
  85. private pendingFiles = new Map<string, { firstSeenMs: number; lastSeenMs: number }>();
  86. /**
  87. * Wall-clock ms at which the in-flight sync began. Combined with
  88. * {@link pendingFiles}'s `lastSeenMs`, this distinguishes "still in the
  89. * debounce window" (lastSeen > syncStarted, sync hasn't started yet for
  90. * this edit) from "currently being indexed" (lastSeen <= syncStarted).
  91. */
  92. private syncStartedMs = 0;
  93. private syncing = false;
  94. private stopped = false;
  95. /**
  96. * False until chokidar fires its `ready` event. Gates `pendingFiles`
  97. * insertion so the initial crawl's `add` events (one per pre-existing
  98. * source file) don't pollute the per-file staleness signal. The events
  99. * still flow into `scheduleSync()` to preserve the previous "initial
  100. * scan triggers a reconciling sync" behavior.
  101. */
  102. private chokidarReady = false;
  103. /**
  104. * Callbacks that resolve when chokidar fires `ready`. Used by tests (and
  105. * any production caller that cares about a clean baseline) to deterministically
  106. * gate on the end of the initial scan instead of guessing at a sleep duration.
  107. */
  108. private readyWaiters: Array<() => void> = [];
  109. // The shared ignore matcher (built-in defaults + project .gitignore), built
  110. // once at start(). Same source of truth the indexer uses, so watcher scope
  111. // can never diverge from index scope.
  112. private ignoreMatcher: Ignore | null = null;
  113. private readonly projectRoot: string;
  114. private readonly debounceMs: number;
  115. private readonly syncFn: () => Promise<{ filesChanged: number; durationMs: number }>;
  116. private readonly onSyncComplete?: WatchOptions['onSyncComplete'];
  117. private readonly onSyncError?: WatchOptions['onSyncError'];
  118. constructor(
  119. projectRoot: string,
  120. syncFn: () => Promise<{ filesChanged: number; durationMs: number }>,
  121. options: WatchOptions = {}
  122. ) {
  123. this.projectRoot = projectRoot;
  124. this.syncFn = syncFn;
  125. this.debounceMs = options.debounceMs ?? 2000;
  126. this.onSyncComplete = options.onSyncComplete;
  127. this.onSyncError = options.onSyncError;
  128. }
  129. /**
  130. * Start watching for file changes.
  131. * Returns true if watching started successfully, false otherwise.
  132. */
  133. start(): boolean {
  134. if (this.watcher) return true; // Already watching
  135. this.stopped = false;
  136. // Some environments make filesystem watching unusable — most notably
  137. // WSL2 /mnt/ drives, where the underlying fs.watch calls block long
  138. // enough to break MCP startup handshakes (issue #199). Skip watching
  139. // there; callers fall back to manual `codegraph sync` or git sync hooks.
  140. const disabledReason = watchDisabledReason(this.projectRoot);
  141. if (disabledReason) {
  142. logDebug('File watcher disabled', { reason: disabledReason, projectRoot: this.projectRoot });
  143. return false;
  144. }
  145. // Reuse the indexer's ignore set so the watcher and indexer agree on scope.
  146. // chokidar only registers an inotify watch on directories that pass this
  147. // filter — that's the #276 fix.
  148. this.ignoreMatcher = buildDefaultIgnore(this.projectRoot);
  149. try {
  150. this.watcher = chokidar.watch(this.projectRoot, {
  151. // chokidar calls this for every path it encounters and only watches
  152. // those that pass — so excluded trees (node_modules/, dist/, .git/, …)
  153. // never get an inotify watch in the first place.
  154. ignored: (testPath: string, stats?: Stats) => this.shouldIgnore(testPath, stats),
  155. });
  156. // Chokidar emits `add` for every pre-existing source file during its
  157. // initial scan. Those events should still trigger the post-startup
  158. // reconciling sync (preserving prior behavior), but they must NOT land
  159. // in pendingFiles — otherwise every file in the project shows up as
  160. // "edited but not indexed" on startup, which is the opposite of the
  161. // signal #403 is supposed to provide. Flip the flag on chokidar's
  162. // `ready` event; from then on, real edits populate pendingFiles.
  163. //
  164. // We also clear `pendingFiles` here as defense-in-depth: chokidar can
  165. // emit late initial-scan `add` events via setImmediate AFTER the
  166. // `ready` callback runs (observed under test-parallelism load).
  167. // Clearing once at ready guarantees a clean baseline; real subsequent
  168. // edits repopulate the set normally.
  169. this.watcher.on('ready', () => {
  170. this.chokidarReady = true;
  171. this.pendingFiles.clear();
  172. for (const cb of this.readyWaiters) cb();
  173. this.readyWaiters.length = 0;
  174. });
  175. // chokidar emits 'all' for every event type; we only sync source files.
  176. this.watcher.on('all', (_event: string, filePath: string) => {
  177. if (this.stopped) return;
  178. const normalized = normalizePath(path.relative(this.projectRoot, filePath));
  179. // Defense in depth: `ignored` should already keep these out, but events
  180. // can still arrive during setup or via symlink traversal.
  181. if (this.isAlwaysIgnored(normalized)) return;
  182. if (!isSourceFile(normalized)) return;
  183. logDebug('File change detected', { file: normalized });
  184. // Only track events from after chokidar's initial scan as pending
  185. // edits — pre-existing files on disk are already represented by
  186. // (or about to be reconciled by) the index, not a user edit.
  187. if (this.chokidarReady) {
  188. const now = Date.now();
  189. const existing = this.pendingFiles.get(normalized);
  190. this.pendingFiles.set(normalized, {
  191. firstSeenMs: existing?.firstSeenMs ?? now,
  192. lastSeenMs: now,
  193. });
  194. }
  195. this.scheduleSync();
  196. });
  197. // Handle watcher errors gracefully — don't crash, the user can restart.
  198. this.watcher.on('error', (err: unknown) => {
  199. logWarn('File watcher error', { error: String(err) });
  200. });
  201. logDebug('File watcher started', { projectRoot: this.projectRoot, debounceMs: this.debounceMs });
  202. return true;
  203. } catch (err) {
  204. // Watcher setup failed (e.g., permission denied, missing directory).
  205. logWarn('Could not start file watcher', { error: String(err) });
  206. return false;
  207. }
  208. }
  209. /** Our own dirs are always ignored, regardless of .gitignore. */
  210. private isAlwaysIgnored(rel: string): boolean {
  211. return (
  212. rel === '.codegraph' || rel.startsWith('.codegraph/') ||
  213. rel === '.git' || rel.startsWith('.git/')
  214. );
  215. }
  216. /**
  217. * chokidar `ignored` predicate — true for any path that should NOT be watched.
  218. * Uses chokidar's provided `stats` to decide directory-vs-file so a dir-only
  219. * rule like `build/` matches, without an extra `statSync` per path.
  220. */
  221. private shouldIgnore(testPath: string, stats?: Stats): boolean {
  222. const rel = normalizePath(path.relative(this.projectRoot, testPath));
  223. if (!rel || rel === '.' || rel.startsWith('..')) return false; // root / outside
  224. if (this.isAlwaysIgnored(rel)) return true;
  225. if (!this.ignoreMatcher) return false;
  226. if (stats) {
  227. return this.ignoreMatcher.ignores(stats.isDirectory() ? rel + '/' : rel);
  228. }
  229. // Stats unknown: test both forms so a directory match isn't missed.
  230. return this.ignoreMatcher.ignores(rel) || this.ignoreMatcher.ignores(rel + '/');
  231. }
  232. /**
  233. * Stop watching for file changes.
  234. */
  235. stop(): void {
  236. this.stopped = true;
  237. if (this.debounceTimer) {
  238. clearTimeout(this.debounceTimer);
  239. this.debounceTimer = null;
  240. }
  241. if (this.watcher) {
  242. this.watcher.close();
  243. this.watcher = null;
  244. }
  245. this.pendingFiles.clear();
  246. this.chokidarReady = false;
  247. this.ignoreMatcher = null;
  248. logDebug('File watcher stopped');
  249. }
  250. /**
  251. * Whether the watcher is currently active.
  252. */
  253. isActive(): boolean {
  254. return this.watcher !== null && !this.stopped;
  255. }
  256. /**
  257. * Resolves once chokidar has fired its `ready` event (or immediately if
  258. * it has already done so). Useful for tests that need a deterministic
  259. * boundary before asserting on `pendingFiles` — guessing a sleep duration
  260. * is flaky under load because chokidar can take longer than expected to
  261. * finish its initial crawl on slow filesystems / parallel test runs.
  262. *
  263. * Production callers don't need this: `pendingFiles` is read continuously,
  264. * the staleness banner is always correct (empty or populated), and the
  265. * initial-scan window is a small one-time startup cost.
  266. */
  267. waitUntilReady(timeoutMs = 10000): Promise<void> {
  268. if (this.chokidarReady) return Promise.resolve();
  269. return new Promise((resolve, reject) => {
  270. const t = setTimeout(() => {
  271. const idx = this.readyWaiters.indexOf(handler);
  272. if (idx >= 0) this.readyWaiters.splice(idx, 1);
  273. reject(new Error(`FileWatcher.waitUntilReady timed out after ${timeoutMs}ms`));
  274. }, timeoutMs);
  275. const handler = () => { clearTimeout(t); resolve(); };
  276. this.readyWaiters.push(handler);
  277. });
  278. }
  279. /**
  280. * Schedule a debounced sync.
  281. */
  282. private scheduleSync(): void {
  283. if (this.debounceTimer) {
  284. clearTimeout(this.debounceTimer);
  285. }
  286. this.debounceTimer = setTimeout(() => {
  287. this.debounceTimer = null;
  288. this.flush();
  289. }, this.debounceMs);
  290. }
  291. /**
  292. * Flush pending changes by running sync.
  293. *
  294. * pendingFiles is NOT cleared at the start of sync — entries are removed
  295. * only after sync commits successfully, and only for entries whose
  296. * lastSeenMs <= syncStartedMs. That way, a query that arrives mid-sync
  297. * still sees the affected files marked stale (the DB hasn't been updated
  298. * yet), and an event that lands mid-sync persists into the follow-up.
  299. *
  300. * On sync failure pendingFiles is left untouched — every edit is still
  301. * unindexed, and the rescheduled sync will absorb the same set next time.
  302. */
  303. private async flush(): Promise<void> {
  304. // If already syncing, the post-sync check will re-trigger
  305. if (this.syncing || this.stopped) return;
  306. this.syncStartedMs = Date.now();
  307. this.syncing = true;
  308. try {
  309. const result = await this.syncFn();
  310. // Remove entries whose most recent event predates this sync — those
  311. // edits are now in the DB. Entries with lastSeenMs > syncStartedMs
  312. // arrived mid-sync; whether the in-flight sync captured them depends
  313. // on when sync read that file, so we keep them as pending and let
  314. // the follow-up sync handle them. We prefer false positives ("shown
  315. // stale, actually fresh" → at worst one extra Read) over false
  316. // negatives ("shown fresh, actually stale" → misleads the agent).
  317. for (const [filePath, info] of this.pendingFiles) {
  318. if (info.lastSeenMs <= this.syncStartedMs) {
  319. this.pendingFiles.delete(filePath);
  320. }
  321. }
  322. this.onSyncComplete?.(result);
  323. } catch (err) {
  324. const error = err instanceof Error ? err : new Error(String(err));
  325. logWarn('Watch sync failed', { error: error.message });
  326. // Failure: leave pendingFiles untouched. Every edit it tracks is
  327. // still unindexed; the rescheduled sync sees the same set.
  328. this.onSyncError?.(error);
  329. } finally {
  330. this.syncing = false;
  331. // If pending files remain (mid-sync events, or this sync failed),
  332. // schedule another pass.
  333. if (this.pendingFiles.size > 0 && !this.stopped) {
  334. this.scheduleSync();
  335. }
  336. }
  337. }
  338. /**
  339. * Snapshot of files seen by the watcher since the last successful sync.
  340. *
  341. * Used by MCP tool responses to mark stale results without blocking on a
  342. * sync: a tool that returns a hit in `src/foo.ts` while `src/foo.ts` is in
  343. * this list tells the agent "Read this file directly, the index lags."
  344. *
  345. * `indexing` is true when a sync is currently in flight whose start time is
  346. * AFTER this file's most recent event — i.e. that sync will absorb the
  347. * edit. False means the file is still inside the debounce window and no
  348. * sync has started yet (a follow-up call a few hundred ms later may show
  349. * `indexing: true` or the file may have left the list entirely).
  350. *
  351. * Cheap: O(pendingFiles.size), no I/O, no locks.
  352. */
  353. getPendingFiles(): PendingFile[] {
  354. const result: PendingFile[] = [];
  355. for (const [filePath, info] of this.pendingFiles) {
  356. result.push({
  357. path: filePath,
  358. firstSeenMs: info.firstSeenMs,
  359. lastSeenMs: info.lastSeenMs,
  360. indexing: this.syncing && this.syncStartedMs >= info.lastSeenMs,
  361. });
  362. }
  363. return result;
  364. }
  365. }