index.ts 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157
  1. /**
  2. * CodeGraph
  3. *
  4. * A local-first code intelligence system that builds a semantic
  5. * knowledge graph from any codebase.
  6. */
  7. import * as path from 'path';
  8. import {
  9. Node,
  10. Edge,
  11. FileRecord,
  12. ExtractionResult,
  13. Subgraph,
  14. TraversalOptions,
  15. SearchOptions,
  16. SearchResult,
  17. Context,
  18. GraphStats,
  19. TaskInput,
  20. TaskContext,
  21. BuildContextOptions,
  22. FindRelevantContextOptions,
  23. } from './types';
  24. import { DatabaseConnection, getDatabasePath } from './db';
  25. import { QueryBuilder } from './db/queries';
  26. import {
  27. isInitialized,
  28. createDirectory,
  29. removeDirectory,
  30. validateDirectory,
  31. } from './directory';
  32. import {
  33. ExtractionOrchestrator,
  34. IndexProgress,
  35. IndexResult,
  36. SyncResult,
  37. extractFromSource,
  38. initGrammars,
  39. } from './extraction';
  40. import {
  41. ReferenceResolver,
  42. createResolver,
  43. ResolutionResult,
  44. } from './resolution';
  45. import { GraphTraverser, GraphQueryManager } from './graph';
  46. import { ContextBuilder, createContextBuilder } from './context';
  47. import { Mutex, FileLock } from './utils';
  48. import { FileWatcher, WatchOptions, PendingFile, LockUnavailableError } from './sync';
  49. import { EXTRACTION_VERSION } from './extraction/extraction-version';
  50. import { getCodeGraphDir } from './directory';
  51. import { deriveProjectNameTokens } from './search/query-utils';
  52. import { CodeGraphPackageVersion } from './mcp/version';
  53. // Re-export types for consumers
  54. export * from './types';
  55. // Storage building blocks for embedded/SDK consumers that drive the graph
  56. // directly (open a DB, run prepared queries) rather than through the CodeGraph
  57. // facade. Exposed from the package entry so they no longer require deep imports
  58. // into dist/ (issue #354).
  59. export { getDatabasePath, DatabaseConnection } from './db';
  60. export { QueryBuilder } from './db/queries';
  61. export {
  62. getCodeGraphDir,
  63. isInitialized,
  64. findNearestCodeGraphRoot,
  65. CODEGRAPH_DIR,
  66. } from './directory';
  67. export { IndexProgress, IndexResult, SyncResult } from './extraction';
  68. export { detectLanguage, isLanguageSupported, isGrammarLoaded, getSupportedLanguages, initGrammars, loadGrammarsForLanguages, loadAllGrammars } from './extraction';
  69. export { ResolutionResult } from './resolution';
  70. export {
  71. CodeGraphError,
  72. FileError,
  73. ParseError,
  74. DatabaseError,
  75. SearchError,
  76. VectorError,
  77. ConfigError,
  78. Logger,
  79. setLogger,
  80. getLogger,
  81. silentLogger,
  82. defaultLogger,
  83. } from './errors';
  84. export { Mutex, FileLock, processInBatches, debounce, throttle, MemoryMonitor } from './utils';
  85. export { FileWatcher, WatchOptions, PendingFile, LockUnavailableError } from './sync';
  86. export { MCPServer } from './mcp';
  87. /**
  88. * Options for initializing a new CodeGraph project
  89. */
  90. export interface InitOptions {
  91. /** Whether to run initial indexing after init */
  92. index?: boolean;
  93. /** Progress callback for indexing */
  94. onProgress?: (progress: IndexProgress) => void;
  95. }
  96. /**
  97. * Options for opening an existing CodeGraph project
  98. */
  99. export interface OpenOptions {
  100. /** Whether to run sync if files have changed */
  101. sync?: boolean;
  102. /** Whether to run in read-only mode */
  103. readOnly?: boolean;
  104. }
  105. /**
  106. * Options for indexing
  107. */
  108. export interface IndexOptions {
  109. /** Progress callback */
  110. onProgress?: (progress: IndexProgress) => void;
  111. /** Abort signal for cancellation */
  112. signal?: AbortSignal;
  113. /** Enable verbose logging (worker lifecycle, memory, timeouts) */
  114. verbose?: boolean;
  115. }
  116. /**
  117. * Main CodeGraph class
  118. *
  119. * Provides the primary interface for interacting with the code knowledge graph.
  120. */
  121. export class CodeGraph {
  122. private db: DatabaseConnection;
  123. private queries: QueryBuilder;
  124. private projectRoot: string;
  125. private orchestrator: ExtractionOrchestrator;
  126. private resolver: ReferenceResolver;
  127. private graphManager: GraphQueryManager;
  128. private traverser: GraphTraverser;
  129. private contextBuilder: ContextBuilder;
  130. // Mutex for preventing concurrent indexing operations (in-process)
  131. private indexMutex = new Mutex();
  132. // File lock for preventing concurrent writes across processes (CLI, MCP, git hooks)
  133. private fileLock: FileLock;
  134. // File watcher for auto-sync on file changes
  135. private watcher: FileWatcher | null = null;
  136. private constructor(
  137. db: DatabaseConnection,
  138. queries: QueryBuilder,
  139. projectRoot: string
  140. ) {
  141. this.db = db;
  142. this.queries = queries;
  143. this.projectRoot = projectRoot;
  144. // Down-weight the project name as a query term in search ranking — it names
  145. // the whole repo, not a symbol, so it has no discriminative value (#720).
  146. try {
  147. this.queries.setProjectNameTokens(deriveProjectNameTokens(projectRoot));
  148. } catch {
  149. // Best-effort: ranking still works without it.
  150. }
  151. this.fileLock = new FileLock(
  152. path.join(getCodeGraphDir(projectRoot), 'codegraph.lock')
  153. );
  154. this.orchestrator = new ExtractionOrchestrator(projectRoot, queries);
  155. this.resolver = createResolver(projectRoot, queries);
  156. this.graphManager = new GraphQueryManager(queries);
  157. this.traverser = new GraphTraverser(queries);
  158. this.contextBuilder = createContextBuilder(
  159. projectRoot,
  160. queries,
  161. this.traverser
  162. );
  163. }
  164. // ===========================================================================
  165. // Lifecycle Methods
  166. // ===========================================================================
  167. /**
  168. * Initialize a new CodeGraph project
  169. *
  170. * Creates the .CodeGraph directory, database, and configuration.
  171. *
  172. * @param projectRoot - Path to the project root directory
  173. * @param options - Initialization options
  174. * @returns A new CodeGraph instance
  175. */
  176. static async init(projectRoot: string, options: InitOptions = {}): Promise<CodeGraph> {
  177. await initGrammars();
  178. const resolvedRoot = path.resolve(projectRoot);
  179. // Check if already initialized
  180. if (isInitialized(resolvedRoot)) {
  181. throw new Error(`CodeGraph already initialized in ${resolvedRoot}`);
  182. }
  183. // Create directory structure
  184. createDirectory(resolvedRoot);
  185. // Initialize database
  186. const dbPath = getDatabasePath(resolvedRoot);
  187. const db = DatabaseConnection.initialize(dbPath);
  188. const queries = new QueryBuilder(db.getDb());
  189. const instance = new CodeGraph(db, queries, resolvedRoot);
  190. // Run initial indexing if requested
  191. if (options.index) {
  192. await instance.indexAll({ onProgress: options.onProgress });
  193. }
  194. return instance;
  195. }
  196. /**
  197. * Initialize synchronously (without indexing)
  198. */
  199. static initSync(projectRoot: string): CodeGraph {
  200. const resolvedRoot = path.resolve(projectRoot);
  201. // Check if already initialized
  202. if (isInitialized(resolvedRoot)) {
  203. throw new Error(`CodeGraph already initialized in ${resolvedRoot}`);
  204. }
  205. // Create directory structure
  206. createDirectory(resolvedRoot);
  207. // Initialize database
  208. const dbPath = getDatabasePath(resolvedRoot);
  209. const db = DatabaseConnection.initialize(dbPath);
  210. const queries = new QueryBuilder(db.getDb());
  211. return new CodeGraph(db, queries, resolvedRoot);
  212. }
  213. /**
  214. * Open an existing CodeGraph project
  215. *
  216. * @param projectRoot - Path to the project root directory
  217. * @param options - Open options
  218. * @returns A CodeGraph instance
  219. */
  220. static async open(projectRoot: string, options: OpenOptions = {}): Promise<CodeGraph> {
  221. await initGrammars();
  222. const resolvedRoot = path.resolve(projectRoot);
  223. // Check if initialized
  224. if (!isInitialized(resolvedRoot)) {
  225. throw new Error(`CodeGraph not initialized in ${resolvedRoot}. Run init() first.`);
  226. }
  227. // Validate directory structure
  228. const validation = validateDirectory(resolvedRoot);
  229. if (!validation.valid) {
  230. throw new Error(`Invalid CodeGraph directory: ${validation.errors.join(', ')}`);
  231. }
  232. // Open database
  233. const dbPath = getDatabasePath(resolvedRoot);
  234. const db = DatabaseConnection.open(dbPath);
  235. const queries = new QueryBuilder(db.getDb());
  236. const instance = new CodeGraph(db, queries, resolvedRoot);
  237. // Sync if requested
  238. if (options.sync) {
  239. await instance.sync();
  240. }
  241. return instance;
  242. }
  243. /**
  244. * Open synchronously (without sync)
  245. */
  246. static openSync(projectRoot: string): CodeGraph {
  247. const resolvedRoot = path.resolve(projectRoot);
  248. // Check if initialized
  249. if (!isInitialized(resolvedRoot)) {
  250. throw new Error(`CodeGraph not initialized in ${resolvedRoot}. Run init() first.`);
  251. }
  252. // Validate directory structure
  253. const validation = validateDirectory(resolvedRoot);
  254. if (!validation.valid) {
  255. throw new Error(`Invalid CodeGraph directory: ${validation.errors.join(', ')}`);
  256. }
  257. // Open database
  258. const dbPath = getDatabasePath(resolvedRoot);
  259. const db = DatabaseConnection.open(dbPath);
  260. const queries = new QueryBuilder(db.getDb());
  261. return new CodeGraph(db, queries, resolvedRoot);
  262. }
  263. /**
  264. * Check if a directory has been initialized as a CodeGraph project
  265. */
  266. static isInitialized(projectRoot: string): boolean {
  267. return isInitialized(path.resolve(projectRoot));
  268. }
  269. /**
  270. * Close the CodeGraph instance and release resources
  271. */
  272. close(): void {
  273. this.unwatch();
  274. // Release file lock if held
  275. this.fileLock.release();
  276. this.db.close();
  277. }
  278. /**
  279. * Get the project root directory
  280. */
  281. getProjectRoot(): string {
  282. return this.projectRoot;
  283. }
  284. // ===========================================================================
  285. // Indexing
  286. // ===========================================================================
  287. /**
  288. * Index all files in the project
  289. *
  290. * Uses a mutex to prevent concurrent indexing operations.
  291. */
  292. async indexAll(options: IndexOptions = {}): Promise<IndexResult> {
  293. return this.indexMutex.withLock(async () => {
  294. try {
  295. this.fileLock.acquire();
  296. } catch {
  297. return { success: false, filesIndexed: 0, filesSkipped: 0, filesErrored: 0, nodesCreated: 0, edgesCreated: 0, errors: [{ message: 'Could not acquire file lock - another process may be indexing', severity: 'error' as const }], durationMs: 0 };
  298. }
  299. try {
  300. const before = this.queries.getNodeAndEdgeCount();
  301. const result = await this.orchestrator.indexAll(options.onProgress, options.signal, options.verbose);
  302. // Re-detect frameworks now that the index is populated. The resolver
  303. // is constructed with createResolver() before any files exist, so
  304. // framework resolvers whose detect() consults the indexed file list
  305. // (e.g. UIKit/SwiftUI scanning for imports, swift-objc-bridge looking
  306. // for both Swift and ObjC files) all return false on that initial pass
  307. // and silently drop themselves. Re-initializing here gives them a
  308. // chance to see the actual project before resolution runs.
  309. if (result.success && result.filesIndexed > 0) {
  310. this.resolver.initialize();
  311. // Cross-file finalization (e.g. NestJS RouterModule prefixes). Runs
  312. // before resolution so updated names show up in subsequent reads.
  313. this.resolver.runPostExtract();
  314. }
  315. // Resolve references to create call/import/extends edges
  316. if (result.success && result.filesIndexed > 0) {
  317. // Get count without loading all refs into memory
  318. const unresolvedCount = this.queries.getUnresolvedReferencesCount();
  319. options.onProgress?.({
  320. phase: 'resolving',
  321. current: 0,
  322. total: unresolvedCount,
  323. });
  324. await this.resolveReferencesBatched((current, total) => {
  325. options.onProgress?.({
  326. phase: 'resolving',
  327. current,
  328. total,
  329. });
  330. });
  331. // Second pass: chained calls whose method lives on a supertype the
  332. // receiver conforms to (protocol-extension / inherited / default-
  333. // interface). Needs the implements/extends edges the main pass just
  334. // built, so it runs after resolution (#750).
  335. this.resolver.resolveChainedCallsViaConformance();
  336. // Same lifecycle for `this.<member>` callback registrations whose
  337. // member is inherited from a supertype (#808).
  338. this.resolver.resolveDeferredThisMemberRefs();
  339. }
  340. // Refresh planner stats + checkpoint the WAL after bulk writes.
  341. // Cheap and non-blocking; never load-bearing for correctness.
  342. if (result.success && result.filesIndexed > 0) {
  343. this.db.runMaintenance();
  344. }
  345. // The orchestrator only sees extraction-phase counts; resolution and
  346. // synthesizer edges (often >50% of the graph on JVM repos) come later.
  347. // Recompute against the DB so the CLI summary reports the true totals.
  348. if (result.success && result.filesIndexed > 0) {
  349. const after = this.queries.getNodeAndEdgeCount();
  350. result.nodesCreated = after.nodes - before.nodes;
  351. result.edgesCreated = after.edges - before.edges;
  352. }
  353. // Stamp the index with the engine that built it, so `codegraph status`
  354. // and `codegraph upgrade` can recommend a re-index when the running
  355. // engine produces richer extraction than the one on disk. Only on a
  356. // real full index — a sync touches a subset, so it must NOT advance the
  357. // extraction stamp (the bulk would still be stale). See extraction-version.ts.
  358. if (result.success && result.filesIndexed > 0) {
  359. try {
  360. this.queries.setMetadata('indexed_with_version', CodeGraphPackageVersion);
  361. this.queries.setMetadata('indexed_with_extraction_version', String(EXTRACTION_VERSION));
  362. } catch { /* metadata is advisory — never fail an index over it */ }
  363. }
  364. return result;
  365. } finally {
  366. this.fileLock.release();
  367. }
  368. });
  369. }
  370. /**
  371. * Index specific files
  372. *
  373. * Uses a mutex to prevent concurrent indexing operations.
  374. */
  375. async indexFiles(filePaths: string[]): Promise<IndexResult> {
  376. return this.indexMutex.withLock(async () => {
  377. try {
  378. this.fileLock.acquire();
  379. } catch {
  380. return { success: false, filesIndexed: 0, filesSkipped: 0, filesErrored: 0, nodesCreated: 0, edgesCreated: 0, errors: [{ message: 'Could not acquire file lock - another process may be indexing', severity: 'error' as const }], durationMs: 0 };
  381. }
  382. try {
  383. return this.orchestrator.indexFiles(filePaths);
  384. } finally {
  385. this.fileLock.release();
  386. }
  387. });
  388. }
  389. /**
  390. * Sync with current file state (incremental update)
  391. *
  392. * Uses a mutex to prevent concurrent indexing operations.
  393. */
  394. async sync(options: IndexOptions = {}): Promise<SyncResult> {
  395. return this.indexMutex.withLock(async () => {
  396. try {
  397. this.fileLock.acquire();
  398. } catch {
  399. return { filesChecked: 0, filesAdded: 0, filesModified: 0, filesRemoved: 0, nodesUpdated: 0, durationMs: 0 };
  400. }
  401. try {
  402. const result = await this.orchestrator.sync(options.onProgress);
  403. // Cross-file finalization (e.g. NestJS RouterModule prefixes). Run on
  404. // every sync that touched files so edits to `app.module.ts` propagate
  405. // to controllers in unchanged files. The pass is idempotent and cheap
  406. // (regex over *.module.ts only).
  407. if (result.filesAdded > 0 || result.filesModified > 0) {
  408. this.resolver.runPostExtract();
  409. }
  410. // Resolve references if files were updated
  411. if (result.filesAdded > 0 || result.filesModified > 0) {
  412. if (result.changedFilePaths) {
  413. // Scope resolution to changed files (git fast path — bounded set)
  414. const unresolvedRefs = this.queries.getUnresolvedReferencesByFiles(result.changedFilePaths);
  415. options.onProgress?.({
  416. phase: 'resolving',
  417. current: 0,
  418. total: unresolvedRefs.length,
  419. });
  420. this.resolver.resolveAndPersist(unresolvedRefs, (current, total) => {
  421. options.onProgress?.({
  422. phase: 'resolving',
  423. current,
  424. total,
  425. });
  426. });
  427. } else {
  428. // No git info — use batched resolution to avoid OOM
  429. const unresolvedCount = this.queries.getUnresolvedReferencesCount();
  430. options.onProgress?.({
  431. phase: 'resolving',
  432. current: 0,
  433. total: unresolvedCount,
  434. });
  435. await this.resolveReferencesBatched((current, total) => {
  436. options.onProgress?.({
  437. phase: 'resolving',
  438. current,
  439. total,
  440. });
  441. });
  442. }
  443. // Second pass: chained calls whose method lives on a supertype the
  444. // receiver conforms to (protocol-extension / inherited). Needs the
  445. // implements/extends edges built above (#750).
  446. this.resolver.resolveChainedCallsViaConformance();
  447. // Same lifecycle for `this.<member>` callback registrations whose
  448. // member is inherited from a supertype (#808).
  449. this.resolver.resolveDeferredThisMemberRefs();
  450. }
  451. // Refresh planner stats + checkpoint the WAL after bulk writes.
  452. if (result.filesAdded > 0 || result.filesModified > 0 || result.filesRemoved > 0) {
  453. this.db.runMaintenance();
  454. }
  455. return result;
  456. } finally {
  457. this.fileLock.release();
  458. }
  459. });
  460. }
  461. /**
  462. * Check if an indexing operation is currently in progress
  463. */
  464. isIndexing(): boolean {
  465. return this.indexMutex.isLocked();
  466. }
  467. // ===========================================================================
  468. // File Watching
  469. // ===========================================================================
  470. /**
  471. * Start watching for file changes and auto-syncing.
  472. *
  473. * Uses native OS file events (FSEvents on macOS, inotify on Linux 19+,
  474. * ReadDirectoryChangesW on Windows) with debouncing to avoid thrashing.
  475. *
  476. * @param options - Watch options (debounce delay, callbacks)
  477. * @returns true if watching started successfully
  478. */
  479. watch(options: WatchOptions = {}): boolean {
  480. if (this.watcher?.isActive()) return true;
  481. this.watcher = new FileWatcher(
  482. this.projectRoot,
  483. async () => {
  484. const result = await this.sync();
  485. // sync() returns this exact zero-shape iff it failed to acquire the
  486. // file lock (a real empty sync always has filesChecked > 0 because
  487. // scanDirectory ran). Surface that to the watcher as a typed error
  488. // so it keeps pendingFiles + reschedules instead of clearing them
  489. // (#449).
  490. if (result.filesChecked === 0 && result.durationMs === 0) {
  491. throw new LockUnavailableError();
  492. }
  493. const filesChanged = result.filesAdded + result.filesModified + result.filesRemoved;
  494. return { filesChanged, durationMs: result.durationMs };
  495. },
  496. options
  497. );
  498. return this.watcher.start();
  499. }
  500. /**
  501. * Stop watching for file changes.
  502. */
  503. unwatch(): void {
  504. if (this.watcher) {
  505. this.watcher.stop();
  506. this.watcher = null;
  507. }
  508. }
  509. /**
  510. * Check if the file watcher is active.
  511. */
  512. isWatching(): boolean {
  513. return this.watcher?.isActive() ?? false;
  514. }
  515. /**
  516. * True once live watching has permanently degraded (OS watch-resource
  517. * exhaustion, or a write lock held past the retry budget) and auto-sync is
  518. * disabled until the next {@link watch} call. Distinct from `!isWatching()`:
  519. * a stopped/never-started watcher is inactive but NOT degraded. MCP tools use
  520. * this to surface a whole-index "results may be stale" notice, since
  521. * `getPendingFiles()` goes empty once watching stops (#876).
  522. */
  523. isWatcherDegraded(): boolean {
  524. return this.watcher?.isDegraded() ?? false;
  525. }
  526. /** The reason live watching degraded, or null if it is healthy (#876). */
  527. getWatcherDegradedReason(): string | null {
  528. return this.watcher?.getDegradedReason() ?? null;
  529. }
  530. /**
  531. * Files seen by the file watcher since the last successful sync —
  532. * the per-file "stale" signal MCP tools attach to responses so an agent
  533. * can fall back to {@link Read} for just the affected file without
  534. * waiting for a debounced sync to complete (issue #403).
  535. *
  536. * Returns an empty list when the watcher isn't active, or no events have
  537. * arrived. Each entry includes `firstSeenMs` and `lastSeenMs` (wall-clock
  538. * `Date.now()` values) so callers can render "edited Nms ago", plus an
  539. * `indexing` flag indicating whether the in-flight sync (if any) will
  540. * absorb that file.
  541. */
  542. getPendingFiles(): PendingFile[] {
  543. return this.watcher?.getPendingFiles() ?? [];
  544. }
  545. /**
  546. * Resolves once the file watcher has installed its watch set. Useful for
  547. * tests that need a deterministic boundary before asserting on
  548. * `getPendingFiles()`. Resolves immediately when no watcher is active.
  549. */
  550. waitUntilWatcherReady(timeoutMs?: number): Promise<void> {
  551. return this.watcher ? this.watcher.waitUntilReady(timeoutMs) : Promise.resolve();
  552. }
  553. /**
  554. * Get files that have changed since last index
  555. */
  556. getChangedFiles(): { added: string[]; modified: string[]; removed: string[] } {
  557. return this.orchestrator.getChangedFiles();
  558. }
  559. /**
  560. * Most recent index timestamp (ms since epoch) across all tracked files, or
  561. * null when nothing is indexed yet. Lets library consumers check index
  562. * freshness without shelling out to `codegraph status --json`. (#329)
  563. */
  564. getLastIndexedAt(): number | null {
  565. return this.queries.getLastIndexedAt();
  566. }
  567. /**
  568. * Which engine built the current index: the package version + extraction
  569. * version stamped at the last full `indexAll`. Either field is null for an
  570. * index built before stamping existed (treated as stale). See
  571. * `extraction-version.ts` and `isIndexStale()`.
  572. */
  573. getIndexBuildInfo(): { version: string | null; extractionVersion: number | null } {
  574. const version = this.queries.getMetadata('indexed_with_version');
  575. const ev = this.queries.getMetadata('indexed_with_extraction_version');
  576. const parsed = ev != null ? parseInt(ev, 10) : NaN;
  577. return { version, extractionVersion: Number.isFinite(parsed) ? parsed : null };
  578. }
  579. /**
  580. * True when the on-disk index was built by an engine whose extraction is
  581. * older than the one now running — i.e. a re-index would add data a migration
  582. * can't backfill. False when there's no index yet (nothing to refresh) or the
  583. * stamp is current. This is the signal behind `codegraph status`'s re-index
  584. * hint and `codegraph upgrade`'s reminder.
  585. */
  586. isIndexStale(): boolean {
  587. if (this.queries.getLastIndexedAt() == null) return false;
  588. const { extractionVersion } = this.getIndexBuildInfo();
  589. return extractionVersion == null || extractionVersion < EXTRACTION_VERSION;
  590. }
  591. /**
  592. * Extract nodes and edges from source code (without storing)
  593. */
  594. extractFromSource(filePath: string, source: string): ExtractionResult {
  595. return extractFromSource(filePath, source);
  596. }
  597. // ===========================================================================
  598. // Reference Resolution
  599. // ===========================================================================
  600. /**
  601. * Resolve unresolved references and create edges
  602. *
  603. * This method takes unresolved references from extraction and attempts
  604. * to resolve them using multiple strategies:
  605. * - Framework-specific patterns (React, Express, Laravel)
  606. * - Import-based resolution
  607. * - Name-based symbol matching
  608. */
  609. resolveReferences(onProgress?: (current: number, total: number) => void): ResolutionResult {
  610. // Get all unresolved references from the database
  611. const unresolvedRefs = this.queries.getUnresolvedReferences();
  612. return this.resolver.resolveAndPersist(unresolvedRefs, onProgress);
  613. }
  614. /**
  615. * Resolve references in batches to keep memory bounded on large codebases.
  616. * Processes chunks of unresolved refs, persisting results after each batch.
  617. */
  618. async resolveReferencesBatched(onProgress?: (current: number, total: number) => void): Promise<ResolutionResult> {
  619. return this.resolver.resolveAndPersistBatched(onProgress);
  620. }
  621. /**
  622. * Get detected frameworks in the project
  623. */
  624. getDetectedFrameworks(): string[] {
  625. return this.resolver.getDetectedFrameworks();
  626. }
  627. /**
  628. * Re-initialize the resolver (useful after adding new files)
  629. */
  630. reinitializeResolver(): void {
  631. this.resolver.initialize();
  632. }
  633. // ===========================================================================
  634. // Graph Statistics
  635. // ===========================================================================
  636. /**
  637. * Get statistics about the knowledge graph
  638. */
  639. getStats(): GraphStats {
  640. const stats = this.queries.getStats();
  641. stats.dbSizeBytes = this.db.getSize();
  642. return stats;
  643. }
  644. /**
  645. * Active SQLite backend for this project's connection (`node-sqlite` — Node's
  646. * built-in real-SQLite module). Surfaced via `codegraph status` and the
  647. * `codegraph_status` MCP tool alongside the effective journal mode.
  648. */
  649. getBackend(): import('./db').SqliteBackend {
  650. return this.db.getBackend();
  651. }
  652. /**
  653. * The journal mode actually in effect ('wal', 'delete', …). 'wal' means
  654. * readers never block on a concurrent writer; anything else means they can,
  655. * which is the precondition for the "database is locked" failures in issue
  656. * #238. Surfaced via `codegraph status` and the `codegraph_status` MCP tool.
  657. */
  658. getJournalMode(): string {
  659. return this.db.getJournalMode();
  660. }
  661. // ===========================================================================
  662. // Node Operations
  663. // ===========================================================================
  664. /**
  665. * Get a node by ID
  666. */
  667. getNode(id: string): Node | null {
  668. return this.queries.getNodeById(id);
  669. }
  670. /**
  671. * Get all nodes in a file
  672. */
  673. getNodesInFile(filePath: string): Node[] {
  674. return this.queries.getNodesByFile(filePath);
  675. }
  676. /**
  677. * Get all nodes of a specific kind
  678. */
  679. getNodesByKind(kind: Node['kind']): Node[] {
  680. return this.queries.getNodesByKind(kind);
  681. }
  682. /**
  683. * Get ALL nodes with an exact name (direct index lookup, not FTS-ranked/capped).
  684. * Used to enumerate every overload of a heavily-overloaded name so the specific
  685. * definition the caller wants is never dropped below a search cut.
  686. */
  687. getNodesByName(name: string): Node[] {
  688. return this.queries.getNodesByName(name);
  689. }
  690. /**
  691. * Search nodes by text
  692. */
  693. searchNodes(query: string, options?: SearchOptions): SearchResult[] {
  694. return this.queries.searchNodes(query, options);
  695. }
  696. /**
  697. * Normalized project-name tokens (go.mod / package.json / repo dir) used to
  698. * down-weight the non-discriminative project name in search ranking (#720).
  699. * Exposed so explore can exclude it from the PascalCase type-disambiguation
  700. * bias, which would otherwise pull overloaded tokens toward whichever stack
  701. * embeds the project name.
  702. */
  703. getProjectNameTokens(): Set<string> {
  704. return this.queries.getProjectNameTokens();
  705. }
  706. /**
  707. * Find the project's "primary route file" — the file with the densest
  708. * concentration of framework-emitted `route` nodes (≥3 routes, ≥30%
  709. * of all non-test routes). Used to inline the routing config in
  710. * `codegraph_explore` responses on small realworld template repos
  711. * (rails-realworld, laravel-realworld, drupal-admintoolbar, …) where
  712. * Glob+Read of `routes.rb`/`urls.py`/etc. otherwise beats codegraph.
  713. */
  714. getTopRouteFile(): { filePath: string; routeCount: number; totalRoutes: number } | null {
  715. return this.queries.getTopRouteFile();
  716. }
  717. /**
  718. * Build a URL → handler routing manifest from the index. Each entry
  719. * pairs a route node (URL + method) with its handler function/method
  720. * via the `references` edge that framework resolvers emit. Returns
  721. * null when fewer than 3 valid (non-test) routes exist.
  722. */
  723. getRoutingManifest(limit?: number): {
  724. entries: Array<{ url: string; handler: string; handlerFile: string; handlerLine: number; handlerKind: string }>;
  725. topHandlerFile: string | null;
  726. topHandlerFileCount: number;
  727. totalRoutes: number;
  728. } | null {
  729. return this.queries.getRoutingManifest(limit);
  730. }
  731. // ===========================================================================
  732. // Edge Operations
  733. // ===========================================================================
  734. /**
  735. * Get outgoing edges from a node
  736. */
  737. getOutgoingEdges(nodeId: string): Edge[] {
  738. return this.queries.getOutgoingEdges(nodeId);
  739. }
  740. /**
  741. * Get incoming edges to a node
  742. */
  743. getIncomingEdges(nodeId: string): Edge[] {
  744. return this.queries.getIncomingEdges(nodeId);
  745. }
  746. // ===========================================================================
  747. // File Operations
  748. // ===========================================================================
  749. /**
  750. * Get a file record by path
  751. */
  752. getFile(filePath: string): FileRecord | null {
  753. return this.queries.getFileByPath(filePath);
  754. }
  755. /**
  756. * Get all tracked files
  757. */
  758. getFiles(): FileRecord[] {
  759. return this.queries.getAllFiles();
  760. }
  761. // ===========================================================================
  762. // Graph Query Methods
  763. // ===========================================================================
  764. /**
  765. * Get the context for a node (ancestors, children, references)
  766. *
  767. * Returns comprehensive context about a node including its containment
  768. * hierarchy, children, incoming/outgoing references, type information,
  769. * and relevant imports.
  770. *
  771. * @param nodeId - ID of the focal node
  772. * @returns Context object with all related information
  773. */
  774. getContext(nodeId: string): Context {
  775. return this.graphManager.getContext(nodeId);
  776. }
  777. /**
  778. * Traverse the graph from a starting node
  779. *
  780. * Uses breadth-first search by default. Supports filtering by edge types,
  781. * node types, and traversal direction.
  782. *
  783. * @param startId - Starting node ID
  784. * @param options - Traversal options
  785. * @returns Subgraph containing traversed nodes and edges
  786. */
  787. traverse(startId: string, options?: TraversalOptions): Subgraph {
  788. return this.traverser.traverseBFS(startId, options);
  789. }
  790. /**
  791. * Get the call graph for a function
  792. *
  793. * Returns both callers (functions that call this function) and
  794. * callees (functions called by this function) up to the specified depth.
  795. *
  796. * @param nodeId - ID of the function/method node
  797. * @param depth - Maximum depth in each direction (default: 2)
  798. * @returns Subgraph containing the call graph
  799. */
  800. getCallGraph(nodeId: string, depth: number = 2): Subgraph {
  801. return this.traverser.getCallGraph(nodeId, depth);
  802. }
  803. /**
  804. * Get the type hierarchy for a class/interface
  805. *
  806. * Returns both ancestors (types this extends/implements) and
  807. * descendants (types that extend/implement this).
  808. *
  809. * @param nodeId - ID of the class/interface node
  810. * @returns Subgraph containing the type hierarchy
  811. */
  812. getTypeHierarchy(nodeId: string): Subgraph {
  813. return this.traverser.getTypeHierarchy(nodeId);
  814. }
  815. /**
  816. * Find all usages of a symbol
  817. *
  818. * Returns all nodes that reference the specified symbol through
  819. * any edge type (calls, references, type_of, etc.).
  820. *
  821. * @param nodeId - ID of the symbol node
  822. * @returns Array of nodes and edges that reference this symbol
  823. */
  824. findUsages(nodeId: string): Array<{ node: Node; edge: Edge }> {
  825. return this.traverser.findUsages(nodeId);
  826. }
  827. /**
  828. * Get callers of a function/method
  829. *
  830. * @param nodeId - ID of the function/method node
  831. * @param maxDepth - Maximum depth to traverse (default: 1)
  832. * @returns Array of nodes that call this function
  833. */
  834. getCallers(nodeId: string, maxDepth: number = 1): Array<{ node: Node; edge: Edge }> {
  835. return this.traverser.getCallers(nodeId, maxDepth);
  836. }
  837. /**
  838. * Get callees of a function/method
  839. *
  840. * @param nodeId - ID of the function/method node
  841. * @param maxDepth - Maximum depth to traverse (default: 1)
  842. * @returns Array of nodes called by this function
  843. */
  844. getCallees(nodeId: string, maxDepth: number = 1): Array<{ node: Node; edge: Edge }> {
  845. return this.traverser.getCallees(nodeId, maxDepth);
  846. }
  847. /**
  848. * Calculate the impact radius of a node
  849. *
  850. * Returns all nodes that could be affected by changes to this node.
  851. *
  852. * @param nodeId - ID of the node
  853. * @param maxDepth - Maximum depth to traverse (default: 3)
  854. * @returns Subgraph containing potentially impacted nodes
  855. */
  856. getImpactRadius(nodeId: string, maxDepth: number = 3): Subgraph {
  857. return this.traverser.getImpactRadius(nodeId, maxDepth);
  858. }
  859. /**
  860. * Find the shortest path between two nodes
  861. *
  862. * @param fromId - Starting node ID
  863. * @param toId - Target node ID
  864. * @param edgeKinds - Edge types to consider (all if empty)
  865. * @returns Array of nodes and edges forming the path, or null if no path exists
  866. */
  867. findPath(
  868. fromId: string,
  869. toId: string,
  870. edgeKinds?: Edge['kind'][]
  871. ): Array<{ node: Node; edge: Edge | null }> | null {
  872. return this.traverser.findPath(fromId, toId, edgeKinds);
  873. }
  874. /**
  875. * Get ancestors of a node in the containment hierarchy
  876. *
  877. * @param nodeId - ID of the node
  878. * @returns Array of ancestor nodes from immediate parent to root
  879. */
  880. getAncestors(nodeId: string): Node[] {
  881. return this.traverser.getAncestors(nodeId);
  882. }
  883. /**
  884. * Get immediate children of a node
  885. *
  886. * @param nodeId - ID of the node
  887. * @returns Array of child nodes
  888. */
  889. getChildren(nodeId: string): Node[] {
  890. return this.traverser.getChildren(nodeId);
  891. }
  892. /**
  893. * Get dependencies of a file
  894. *
  895. * @param filePath - Path to the file
  896. * @returns Array of file paths this file depends on
  897. */
  898. getFileDependencies(filePath: string): string[] {
  899. return this.graphManager.getFileDependencies(filePath);
  900. }
  901. /**
  902. * Get dependents of a file
  903. *
  904. * @param filePath - Path to the file
  905. * @returns Array of file paths that depend on this file
  906. */
  907. getFileDependents(filePath: string): string[] {
  908. return this.graphManager.getFileDependents(filePath);
  909. }
  910. /**
  911. * Find circular dependencies in the codebase
  912. *
  913. * @returns Array of cycles, each cycle is an array of file paths
  914. */
  915. findCircularDependencies(): string[][] {
  916. return this.graphManager.findCircularDependencies();
  917. }
  918. /**
  919. * Find dead code (unreferenced symbols)
  920. *
  921. * @param kinds - Node kinds to check (default: functions, methods, classes)
  922. * @returns Array of unreferenced nodes
  923. */
  924. findDeadCode(kinds?: Node['kind'][]): Node[] {
  925. return this.graphManager.findDeadCode(kinds);
  926. }
  927. /**
  928. * Get complexity metrics for a node
  929. *
  930. * @param nodeId - ID of the node
  931. * @returns Object containing various complexity metrics
  932. */
  933. getNodeMetrics(nodeId: string): {
  934. incomingEdgeCount: number;
  935. outgoingEdgeCount: number;
  936. callCount: number;
  937. callerCount: number;
  938. childCount: number;
  939. depth: number;
  940. } {
  941. return this.graphManager.getNodeMetrics(nodeId);
  942. }
  943. // ===========================================================================
  944. // Context Building
  945. // ===========================================================================
  946. /**
  947. * Get the source code for a node
  948. *
  949. * Reads the file and extracts the code between startLine and endLine.
  950. *
  951. * @param nodeId - ID of the node
  952. * @returns Code string or null if not found
  953. */
  954. async getCode(nodeId: string): Promise<string | null> {
  955. return this.contextBuilder.getCode(nodeId);
  956. }
  957. /**
  958. * Find relevant subgraph for a query
  959. *
  960. * Combines semantic search with graph traversal to find the most
  961. * relevant nodes and their relationships for a given query.
  962. *
  963. * @param query - Natural language query describing the task
  964. * @param options - Search and traversal options
  965. * @returns Subgraph of relevant nodes and edges
  966. */
  967. async findRelevantContext(
  968. query: string,
  969. options?: FindRelevantContextOptions
  970. ): Promise<Subgraph> {
  971. return this.contextBuilder.findRelevantContext(query, options);
  972. }
  973. /**
  974. * Build context for a task
  975. *
  976. * Creates comprehensive context by:
  977. * 1. Running FTS search to find entry points
  978. * 2. Expanding the graph around entry points
  979. * 3. Extracting code blocks for key nodes
  980. * 4. Formatting output for Claude
  981. *
  982. * @param input - Task description (string or {title, description})
  983. * @param options - Build options (maxNodes, includeCode, format, etc.)
  984. * @returns TaskContext object or formatted string (markdown/JSON)
  985. */
  986. async buildContext(
  987. input: TaskInput,
  988. options?: BuildContextOptions
  989. ): Promise<TaskContext | string> {
  990. return this.contextBuilder.buildContext(input, options);
  991. }
  992. // ===========================================================================
  993. // Database Management
  994. // ===========================================================================
  995. /**
  996. * Optimize the database (vacuum and analyze)
  997. */
  998. optimize(): void {
  999. this.db.optimize();
  1000. }
  1001. /**
  1002. * Clear all data from the graph
  1003. */
  1004. clear(): void {
  1005. this.queries.clear();
  1006. }
  1007. /**
  1008. * Alias for close() for backwards compatibility.
  1009. * @deprecated Use close() instead
  1010. */
  1011. destroy(): void {
  1012. this.close();
  1013. }
  1014. /**
  1015. * Completely remove CodeGraph from the project.
  1016. * This closes the database and deletes the .CodeGraph directory.
  1017. *
  1018. * WARNING: This permanently deletes all CodeGraph data for the project.
  1019. */
  1020. uninitialize(): void {
  1021. this.close();
  1022. removeDirectory(this.projectRoot);
  1023. }
  1024. }
  1025. // Default export
  1026. export default CodeGraph;