queries.ts 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839
  1. /**
  2. * Database Queries
  3. *
  4. * Prepared statements for CRUD operations on the knowledge graph.
  5. */
  6. import { SqliteDatabase, SqliteStatement } from './sqlite-adapter';
  7. import {
  8. Node,
  9. Edge,
  10. FileRecord,
  11. UnresolvedReference,
  12. NodeKind,
  13. EdgeKind,
  14. Language,
  15. GraphStats,
  16. SearchOptions,
  17. SearchResult,
  18. } from '../types';
  19. import { safeJsonParse } from '../utils';
  20. import { kindBonus, nameMatchBonus, scorePathRelevance } from '../search/query-utils';
  21. import { parseQuery, boundedEditDistance } from '../search/query-parser';
  22. import { isGeneratedFile } from '../extraction/generated-detection';
  23. /**
  24. * Path-only heuristic for files that should not be candidates for
  25. * "dominant file" detection: test/spec files and tool-generated files.
  26. * Generated files (`*.pb.go`, `*.pulsar.go`, mock outputs, …) often
  27. * have huge in-file edge counts that dwarf the real source — etcd's
  28. * `rpc.pb.go` has 4× the in-file edges of `server.go`.
  29. */
  30. function isLowValueFile(filePath: string): boolean {
  31. const lp = filePath.toLowerCase();
  32. return (
  33. /(?:^|\/)(tests?|__tests?__|spec)\//.test(lp) ||
  34. /_test\.go$/.test(lp) ||
  35. /(?:^|\/)test_[^/]+\.py$/.test(lp) ||
  36. /_test\.py$/.test(lp) ||
  37. /_spec\.rb$/.test(lp) ||
  38. /_test\.rb$/.test(lp) ||
  39. /\.(test|spec)\.[jt]sx?$/.test(lp) ||
  40. /(test|spec|tests)\.(java|kt|scala)$/.test(lp) ||
  41. /(tests?|spec)\.cs$/.test(lp) ||
  42. /tests?\.swift$/.test(lp) ||
  43. /_test\.dart$/.test(lp) ||
  44. isGeneratedFile(filePath)
  45. );
  46. }
  47. const SQLITE_PARAM_CHUNK_SIZE = 500;
  48. /**
  49. * Database row types (snake_case from SQLite)
  50. */
  51. interface NodeRow {
  52. id: string;
  53. kind: string;
  54. name: string;
  55. qualified_name: string;
  56. file_path: string;
  57. language: string;
  58. start_line: number;
  59. end_line: number;
  60. start_column: number;
  61. end_column: number;
  62. docstring: string | null;
  63. signature: string | null;
  64. visibility: string | null;
  65. is_exported: number;
  66. is_async: number;
  67. is_static: number;
  68. is_abstract: number;
  69. decorators: string | null;
  70. type_parameters: string | null;
  71. return_type: string | null;
  72. updated_at: number;
  73. }
  74. interface EdgeRow {
  75. id: number;
  76. source: string;
  77. target: string;
  78. kind: string;
  79. metadata: string | null;
  80. line: number | null;
  81. col: number | null;
  82. provenance: string | null;
  83. }
  84. interface FileRow {
  85. path: string;
  86. content_hash: string;
  87. language: string;
  88. size: number;
  89. modified_at: number;
  90. indexed_at: number;
  91. node_count: number;
  92. errors: string | null;
  93. }
  94. interface UnresolvedRefRow {
  95. id: number;
  96. from_node_id: string;
  97. reference_name: string;
  98. reference_kind: string;
  99. line: number;
  100. col: number;
  101. candidates: string | null;
  102. file_path: string;
  103. language: string;
  104. }
  105. /**
  106. * Convert database row to Node object
  107. */
  108. function rowToNode(row: NodeRow): Node {
  109. return {
  110. id: row.id,
  111. kind: row.kind as NodeKind,
  112. name: row.name,
  113. qualifiedName: row.qualified_name,
  114. filePath: row.file_path,
  115. language: row.language as Language,
  116. startLine: row.start_line,
  117. endLine: row.end_line,
  118. startColumn: row.start_column,
  119. endColumn: row.end_column,
  120. docstring: row.docstring ?? undefined,
  121. signature: row.signature ?? undefined,
  122. visibility: row.visibility as Node['visibility'],
  123. isExported: row.is_exported === 1,
  124. isAsync: row.is_async === 1,
  125. isStatic: row.is_static === 1,
  126. isAbstract: row.is_abstract === 1,
  127. decorators: row.decorators ? safeJsonParse(row.decorators, undefined) : undefined,
  128. typeParameters: row.type_parameters ? safeJsonParse(row.type_parameters, undefined) : undefined,
  129. returnType: row.return_type ?? undefined,
  130. updatedAt: row.updated_at,
  131. };
  132. }
  133. /**
  134. * Convert database row to Edge object
  135. */
  136. function rowToEdge(row: EdgeRow): Edge {
  137. return {
  138. source: row.source,
  139. target: row.target,
  140. kind: row.kind as EdgeKind,
  141. metadata: row.metadata ? safeJsonParse(row.metadata, undefined) : undefined,
  142. line: row.line ?? undefined,
  143. column: row.col ?? undefined,
  144. provenance: row.provenance as Edge['provenance'],
  145. };
  146. }
  147. /**
  148. * Convert database row to FileRecord object
  149. */
  150. function rowToFileRecord(row: FileRow): FileRecord {
  151. return {
  152. path: row.path,
  153. contentHash: row.content_hash,
  154. language: row.language as Language,
  155. size: row.size,
  156. modifiedAt: row.modified_at,
  157. indexedAt: row.indexed_at,
  158. nodeCount: row.node_count,
  159. errors: row.errors ? safeJsonParse(row.errors, undefined) : undefined,
  160. };
  161. }
  162. /**
  163. * Query builder for the knowledge graph database
  164. */
  165. export class QueryBuilder {
  166. private db: SqliteDatabase;
  167. // Project-name tokens (go.mod / package.json / repo dir), normalized. A query
  168. // word matching one is dropped from path-relevance scoring — it names the
  169. // whole project, not a symbol, so it carries no discriminative signal (#720).
  170. // Set once by the CodeGraph instance; empty by default (no down-weighting).
  171. private projectNameTokens: Set<string> = new Set();
  172. // Node cache for frequently accessed nodes (LRU-style, max 1000 entries)
  173. private nodeCache: Map<string, Node> = new Map();
  174. private readonly maxCacheSize = 1000;
  175. // Prepared statements (lazily initialized)
  176. private stmts: {
  177. insertNode?: SqliteStatement;
  178. updateNode?: SqliteStatement;
  179. deleteNode?: SqliteStatement;
  180. deleteNodesByFile?: SqliteStatement;
  181. getNodeById?: SqliteStatement;
  182. getNodesByFile?: SqliteStatement;
  183. getNodesByKind?: SqliteStatement;
  184. insertEdge?: SqliteStatement;
  185. upsertFile?: SqliteStatement;
  186. deleteEdgesBySource?: SqliteStatement;
  187. deleteEdgesByTarget?: SqliteStatement;
  188. getEdgesBySource?: SqliteStatement;
  189. getEdgesByTarget?: SqliteStatement;
  190. insertFile?: SqliteStatement;
  191. updateFile?: SqliteStatement;
  192. deleteFile?: SqliteStatement;
  193. getFileByPath?: SqliteStatement;
  194. getAllFiles?: SqliteStatement;
  195. insertUnresolved?: SqliteStatement;
  196. deleteUnresolvedByNode?: SqliteStatement;
  197. getUnresolvedByName?: SqliteStatement;
  198. getNodesByName?: SqliteStatement;
  199. getNodesByQualifiedNameExact?: SqliteStatement;
  200. getNodesByLowerName?: SqliteStatement;
  201. getUnresolvedCount?: SqliteStatement;
  202. getUnresolvedBatch?: SqliteStatement;
  203. getAllFilePaths?: SqliteStatement;
  204. getAllNodeNames?: SqliteStatement;
  205. getDominantFile?: SqliteStatement;
  206. getTopRouteFile?: SqliteStatement;
  207. getRoutingManifest?: SqliteStatement;
  208. } = {};
  209. constructor(db: SqliteDatabase) {
  210. this.db = db;
  211. }
  212. /** Set the normalized project-name tokens used to down-weight non-discriminative
  213. * query words in path scoring (#720). Called once when the project opens. */
  214. setProjectNameTokens(tokens: Set<string>): void {
  215. this.projectNameTokens = tokens;
  216. }
  217. /** The normalized project-name tokens (#720); empty if none were derived. */
  218. getProjectNameTokens(): Set<string> {
  219. return this.projectNameTokens;
  220. }
  221. // ===========================================================================
  222. // Node Operations
  223. // ===========================================================================
  224. /**
  225. * Insert a new node
  226. */
  227. insertNode(node: Node): void {
  228. if (!this.stmts.insertNode) {
  229. this.stmts.insertNode = this.db.prepare(`
  230. INSERT OR REPLACE INTO nodes (
  231. id, kind, name, qualified_name, file_path, language,
  232. start_line, end_line, start_column, end_column,
  233. docstring, signature, visibility,
  234. is_exported, is_async, is_static, is_abstract,
  235. decorators, type_parameters, return_type, updated_at
  236. ) VALUES (
  237. @id, @kind, @name, @qualifiedName, @filePath, @language,
  238. @startLine, @endLine, @startColumn, @endColumn,
  239. @docstring, @signature, @visibility,
  240. @isExported, @isAsync, @isStatic, @isAbstract,
  241. @decorators, @typeParameters, @returnType, @updatedAt
  242. )
  243. `);
  244. }
  245. // Validate required fields to prevent SQLite bind errors
  246. if (!node.id || !node.kind || !node.name || !node.filePath || !node.language) {
  247. console.error('[CodeGraph] Skipping node with missing required fields:', {
  248. id: node.id,
  249. kind: node.kind,
  250. name: node.name,
  251. filePath: node.filePath,
  252. language: node.language,
  253. });
  254. return;
  255. }
  256. // INSERT OR REPLACE may overwrite a node we have cached. Drop the
  257. // stale entry so the next getNodeById sees the new row, not the old
  258. // one (matches the cache-invalidation pattern used by updateNode and
  259. // deleteNode below).
  260. this.nodeCache.delete(node.id);
  261. this.stmts.insertNode.run({
  262. id: node.id,
  263. kind: node.kind,
  264. name: node.name,
  265. qualifiedName: node.qualifiedName ?? node.name,
  266. filePath: node.filePath,
  267. language: node.language,
  268. startLine: node.startLine ?? 0,
  269. endLine: node.endLine ?? 0,
  270. startColumn: node.startColumn ?? 0,
  271. endColumn: node.endColumn ?? 0,
  272. docstring: node.docstring ?? null,
  273. signature: node.signature ?? null,
  274. visibility: node.visibility ?? null,
  275. isExported: node.isExported ? 1 : 0,
  276. isAsync: node.isAsync ? 1 : 0,
  277. isStatic: node.isStatic ? 1 : 0,
  278. isAbstract: node.isAbstract ? 1 : 0,
  279. decorators: node.decorators ? JSON.stringify(node.decorators) : null,
  280. typeParameters: node.typeParameters ? JSON.stringify(node.typeParameters) : null,
  281. returnType: node.returnType ?? null,
  282. updatedAt: node.updatedAt ?? Date.now(),
  283. });
  284. }
  285. /**
  286. * Insert multiple nodes in a transaction
  287. */
  288. insertNodes(nodes: Node[]): void {
  289. this.db.transaction(() => {
  290. for (const node of nodes) {
  291. this.insertNode(node);
  292. }
  293. })();
  294. }
  295. /**
  296. * Update an existing node
  297. */
  298. updateNode(node: Node): void {
  299. if (!this.stmts.updateNode) {
  300. this.stmts.updateNode = this.db.prepare(`
  301. UPDATE nodes SET
  302. kind = @kind,
  303. name = @name,
  304. qualified_name = @qualifiedName,
  305. file_path = @filePath,
  306. language = @language,
  307. start_line = @startLine,
  308. end_line = @endLine,
  309. start_column = @startColumn,
  310. end_column = @endColumn,
  311. docstring = @docstring,
  312. signature = @signature,
  313. visibility = @visibility,
  314. is_exported = @isExported,
  315. is_async = @isAsync,
  316. is_static = @isStatic,
  317. is_abstract = @isAbstract,
  318. decorators = @decorators,
  319. type_parameters = @typeParameters,
  320. return_type = @returnType,
  321. updated_at = @updatedAt
  322. WHERE id = @id
  323. `);
  324. }
  325. // Invalidate cache before update
  326. this.nodeCache.delete(node.id);
  327. // Validate required fields
  328. if (!node.id || !node.kind || !node.name || !node.filePath || !node.language) {
  329. console.error('[CodeGraph] Skipping node update with missing required fields:', node.id);
  330. return;
  331. }
  332. this.stmts.updateNode.run({
  333. id: node.id,
  334. kind: node.kind,
  335. name: node.name,
  336. qualifiedName: node.qualifiedName ?? node.name,
  337. filePath: node.filePath,
  338. language: node.language,
  339. startLine: node.startLine ?? 0,
  340. endLine: node.endLine ?? 0,
  341. startColumn: node.startColumn ?? 0,
  342. endColumn: node.endColumn ?? 0,
  343. docstring: node.docstring ?? null,
  344. signature: node.signature ?? null,
  345. visibility: node.visibility ?? null,
  346. isExported: node.isExported ? 1 : 0,
  347. isAsync: node.isAsync ? 1 : 0,
  348. isStatic: node.isStatic ? 1 : 0,
  349. isAbstract: node.isAbstract ? 1 : 0,
  350. decorators: node.decorators ? JSON.stringify(node.decorators) : null,
  351. typeParameters: node.typeParameters ? JSON.stringify(node.typeParameters) : null,
  352. returnType: node.returnType ?? null,
  353. updatedAt: node.updatedAt ?? Date.now(),
  354. });
  355. }
  356. /**
  357. * Delete a node by ID
  358. */
  359. deleteNode(id: string): void {
  360. if (!this.stmts.deleteNode) {
  361. this.stmts.deleteNode = this.db.prepare('DELETE FROM nodes WHERE id = ?');
  362. }
  363. // Invalidate cache
  364. this.nodeCache.delete(id);
  365. this.stmts.deleteNode.run(id);
  366. }
  367. /**
  368. * Delete all nodes for a file
  369. */
  370. deleteNodesByFile(filePath: string): void {
  371. if (!this.stmts.deleteNodesByFile) {
  372. this.stmts.deleteNodesByFile = this.db.prepare('DELETE FROM nodes WHERE file_path = ?');
  373. }
  374. // Invalidate cache for nodes in this file
  375. for (const [id, node] of this.nodeCache) {
  376. if (node.filePath === filePath) {
  377. this.nodeCache.delete(id);
  378. }
  379. }
  380. this.stmts.deleteNodesByFile.run(filePath);
  381. }
  382. /**
  383. * Get a node by ID
  384. */
  385. getNodeById(id: string): Node | null {
  386. // Check cache first
  387. if (this.nodeCache.has(id)) {
  388. const cached = this.nodeCache.get(id)!;
  389. // Move to end to implement LRU (delete and re-add)
  390. this.nodeCache.delete(id);
  391. this.nodeCache.set(id, cached);
  392. return cached;
  393. }
  394. if (!this.stmts.getNodeById) {
  395. this.stmts.getNodeById = this.db.prepare('SELECT * FROM nodes WHERE id = ?');
  396. }
  397. const row = this.stmts.getNodeById.get(id) as NodeRow | undefined;
  398. if (!row) {
  399. return null;
  400. }
  401. const node = rowToNode(row);
  402. this.cacheNode(node);
  403. return node;
  404. }
  405. /**
  406. * Batch lookup: fetch many nodes by ID in a single SQL round-trip.
  407. *
  408. * Replaces the N+1 pattern in graph traversal where every edge would
  409. * trigger its own `getNodeById` call. For a function with 50 callers
  410. * this collapses 50 point reads into one IN-list query (~10-50x
  411. * faster end-to-end).
  412. *
  413. * Returns a Map keyed by id so callers can preserve their own ordering
  414. * (typically the order edges were returned from the graph). Missing IDs
  415. * are simply absent from the map.
  416. *
  417. * Cache-aware: ids already in the LRU cache are served from memory and
  418. * the SQL query only touches the misses.
  419. */
  420. getNodesByIds(ids: readonly string[]): Map<string, Node> {
  421. const out = new Map<string, Node>();
  422. if (ids.length === 0) return out;
  423. // Serve cache hits first; build the miss list for SQL.
  424. const misses: string[] = [];
  425. for (const id of ids) {
  426. const cached = this.nodeCache.get(id);
  427. if (cached !== undefined) {
  428. // LRU touch
  429. this.nodeCache.delete(id);
  430. this.nodeCache.set(id, cached);
  431. out.set(id, cached);
  432. } else {
  433. misses.push(id);
  434. }
  435. }
  436. if (misses.length === 0) return out;
  437. // Chunk under SQLite's parameter limit (default 999, raised to 32766
  438. // in better-sqlite3 builds — chunk at 500 for safety across both
  439. // backends and to keep the query plan simple).
  440. for (let i = 0; i < misses.length; i += SQLITE_PARAM_CHUNK_SIZE) {
  441. const chunk = misses.slice(i, i + SQLITE_PARAM_CHUNK_SIZE);
  442. const placeholders = chunk.map(() => '?').join(',');
  443. const rows = this.db
  444. .prepare(`SELECT * FROM nodes WHERE id IN (${placeholders})`)
  445. .all(...chunk) as NodeRow[];
  446. for (const row of rows) {
  447. const node = rowToNode(row);
  448. out.set(node.id, node);
  449. this.cacheNode(node);
  450. }
  451. }
  452. return out;
  453. }
  454. private getExistingNodeIds(ids: readonly string[]): Set<string> {
  455. const out = new Set<string>();
  456. if (ids.length === 0) return out;
  457. const uniqueIds = [...new Set(ids)];
  458. for (let i = 0; i < uniqueIds.length; i += SQLITE_PARAM_CHUNK_SIZE) {
  459. const chunk = uniqueIds.slice(i, i + SQLITE_PARAM_CHUNK_SIZE);
  460. const placeholders = chunk.map(() => '?').join(',');
  461. const rows = this.db
  462. .prepare(`SELECT id FROM nodes WHERE id IN (${placeholders})`)
  463. .all(...chunk) as { id: string }[];
  464. for (const row of rows) {
  465. out.add(row.id);
  466. }
  467. }
  468. return out;
  469. }
  470. /**
  471. * Add a node to the cache, evicting oldest if needed
  472. */
  473. private cacheNode(node: Node): void {
  474. if (this.nodeCache.size >= this.maxCacheSize) {
  475. // Evict oldest (first) entry
  476. const firstKey = this.nodeCache.keys().next().value;
  477. if (firstKey) {
  478. this.nodeCache.delete(firstKey);
  479. }
  480. }
  481. this.nodeCache.set(node.id, node);
  482. }
  483. /**
  484. * Clear the node cache
  485. */
  486. clearCache(): void {
  487. this.nodeCache.clear();
  488. }
  489. /**
  490. * Get all nodes in a file
  491. */
  492. getNodesByFile(filePath: string): Node[] {
  493. if (!this.stmts.getNodesByFile) {
  494. this.stmts.getNodesByFile = this.db.prepare(
  495. 'SELECT * FROM nodes WHERE file_path = ? ORDER BY start_line'
  496. );
  497. }
  498. const rows = this.stmts.getNodesByFile.all(filePath) as NodeRow[];
  499. return rows.map(rowToNode);
  500. }
  501. /**
  502. * Find the file that holds the densest concentration of the project's
  503. * internal call graph — the "core" file. Used by context-builder to
  504. * boost ranking of symbols in that file's directory (so e.g. sinatra
  505. * queries surface `lib/sinatra/base.rb`'s `route!` instead of
  506. * `sinatra-contrib/lib/sinatra/multi_route.rb`'s `route` extension).
  507. *
  508. * Returns null if no file has a meaningful concentration (e.g. spread
  509. * evenly across many files, or empty index).
  510. *
  511. * "Internal" = source and target are in the same file. Cross-file
  512. * edges aren't useful here — they don't tell us which file is the
  513. * functional center.
  514. *
  515. * Excludes test/spec files from candidacy via path-pattern. The agent's
  516. * typical question is "how does X work", not "how is X tested", so
  517. * boosting a test file's directory would be a misfire.
  518. */
  519. getDominantFile(): { filePath: string; edgeCount: number; nextEdgeCount: number } | null {
  520. if (!this.stmts.getDominantFile) {
  521. // Pull top 20 candidates; we then filter out test/generated files
  522. // in code (regex-grade matching that SQL LIKE can't express). The
  523. // generated-file filter is critical — without it, etcd's
  524. // `api/etcdserverpb/rpc.pb.go` (1916 in-file edges, generated
  525. // protobuf stub) outranks the real `server/etcdserver/server.go`
  526. // (470 edges) by 4×, and the boost would push the agent toward
  527. // generated code.
  528. this.stmts.getDominantFile = this.db.prepare(`
  529. SELECT n.file_path AS file_path, COUNT(*) AS edge_count
  530. FROM edges e
  531. JOIN nodes n ON e.source = n.id
  532. JOIN nodes m ON e.target = m.id
  533. WHERE n.file_path = m.file_path
  534. GROUP BY n.file_path
  535. ORDER BY edge_count DESC
  536. LIMIT 20
  537. `);
  538. }
  539. const rows = this.stmts.getDominantFile.all() as Array<{ file_path: string; edge_count: number }>;
  540. const filtered = rows.filter(r => !isLowValueFile(r.file_path));
  541. if (filtered.length === 0 || filtered[0]!.edge_count < 20) return null;
  542. return {
  543. filePath: filtered[0]!.file_path,
  544. edgeCount: filtered[0]!.edge_count,
  545. nextEdgeCount: filtered[1]?.edge_count ?? 0,
  546. };
  547. }
  548. /**
  549. * Find the file that holds the densest concentration of the project's
  550. * `route` nodes (framework-emitted: Express/Gin/Flask/Rails/Drupal/etc.).
  551. * Used by handleContext on small repos to inline the project's routing
  552. * config when the agent's query is about request flow — eliminating the
  553. * "Glob + Read routes.rb" pattern that beats codegraph on tiny realworld
  554. * template repos.
  555. *
  556. * Excludes test/generated files from candidacy. Returns null if there
  557. * are fewer than 3 non-test routes total, or if no file holds at least
  558. * 30% of them (diffuse routing → no single answer file).
  559. */
  560. getTopRouteFile(): { filePath: string; routeCount: number; totalRoutes: number } | null {
  561. if (!this.stmts.getTopRouteFile) {
  562. this.stmts.getTopRouteFile = this.db.prepare(`
  563. SELECT file_path, COUNT(*) AS cnt
  564. FROM nodes
  565. WHERE kind = 'route'
  566. GROUP BY file_path
  567. ORDER BY cnt DESC
  568. LIMIT 20
  569. `);
  570. }
  571. const rows = this.stmts.getTopRouteFile.all() as Array<{ file_path: string; cnt: number }>;
  572. const filtered = rows.filter(r => !isLowValueFile(r.file_path));
  573. if (filtered.length === 0) return null;
  574. const totalRoutes = filtered.reduce((sum, r) => sum + r.cnt, 0);
  575. const top = filtered[0]!;
  576. if (totalRoutes < 3 || top.cnt < 3) return null;
  577. if (top.cnt / totalRoutes < 0.30) return null;
  578. return { filePath: top.file_path, routeCount: top.cnt, totalRoutes };
  579. }
  580. /**
  581. * Build a URL → handler manifest from the index. Each route node's
  582. * `references` edge points at the function/method that handles the
  583. * request. We join them in one pass; the agent gets the canonical
  584. * routing answer ("POST /users/login → AuthController#login") without
  585. * having to parse the framework's route DSL itself.
  586. *
  587. * Also returns the file with the most handler endpoints — used as the
  588. * "top handler file" to inline source for, so the agent has both the
  589. * mapping AND the handler implementations.
  590. */
  591. getRoutingManifest(limit: number = 40): {
  592. entries: Array<{ url: string; handler: string; handlerFile: string; handlerLine: number; handlerKind: string }>;
  593. topHandlerFile: string | null;
  594. topHandlerFileCount: number;
  595. totalRoutes: number;
  596. } | null {
  597. if (!this.stmts.getRoutingManifest) {
  598. // Edge kind varies across framework resolvers: Spring/Rails/
  599. // Laravel/Drupal emit `references`, Express emits `calls`. Accept
  600. // both — the semantic is the same (route → its handler).
  601. this.stmts.getRoutingManifest = this.db.prepare(`
  602. SELECT
  603. r.name AS url,
  604. h.name AS handler,
  605. h.file_path AS handler_file,
  606. h.start_line AS handler_line,
  607. h.kind AS handler_kind
  608. FROM nodes r
  609. JOIN edges e ON e.source = r.id
  610. JOIN nodes h ON e.target = h.id
  611. WHERE r.kind = 'route'
  612. AND e.kind IN ('references', 'calls')
  613. AND h.kind IN ('function', 'method', 'class')
  614. ORDER BY r.file_path, r.start_line
  615. LIMIT ?
  616. `);
  617. }
  618. const rows = this.stmts.getRoutingManifest.all(limit) as Array<{
  619. url: string; handler: string; handler_file: string; handler_line: number; handler_kind: string;
  620. }>;
  621. // Drop test/generated handlers — same hygiene as elsewhere.
  622. const filtered = rows.filter(r => !isLowValueFile(r.handler_file));
  623. if (filtered.length < 3) return null;
  624. // Identify the file holding the most handlers (the "primary handler file").
  625. const fileCounts = new Map<string, number>();
  626. for (const r of filtered) {
  627. fileCounts.set(r.handler_file, (fileCounts.get(r.handler_file) ?? 0) + 1);
  628. }
  629. let topHandlerFile: string | null = null;
  630. let topHandlerFileCount = 0;
  631. for (const [file, count] of fileCounts) {
  632. if (count > topHandlerFileCount) {
  633. topHandlerFile = file;
  634. topHandlerFileCount = count;
  635. }
  636. }
  637. return {
  638. entries: filtered.map(r => ({
  639. url: r.url,
  640. handler: r.handler,
  641. handlerFile: r.handler_file,
  642. handlerLine: r.handler_line,
  643. handlerKind: r.handler_kind,
  644. })),
  645. topHandlerFile,
  646. topHandlerFileCount,
  647. totalRoutes: filtered.length,
  648. };
  649. }
  650. /**
  651. * Get all nodes of a specific kind
  652. */
  653. getNodesByKind(kind: NodeKind): Node[] {
  654. if (!this.stmts.getNodesByKind) {
  655. this.stmts.getNodesByKind = this.db.prepare('SELECT * FROM nodes WHERE kind = ?');
  656. }
  657. const rows = this.stmts.getNodesByKind.all(kind) as NodeRow[];
  658. return rows.map(rowToNode);
  659. }
  660. /**
  661. * Stream every node of a kind one at a time (lazy) instead of materializing
  662. * them all like {@link getNodesByKind}. For unbounded kinds (`function`,
  663. * `method`) on a symbol-dense project the full array is gigabytes; the
  664. * dynamic-edge synthesizers only scan-and-filter, so they iterate to keep
  665. * memory O(1) in the node count rather than O(nodes) (#610).
  666. */
  667. *iterateNodesByKind(kind: NodeKind): IterableIterator<Node> {
  668. // Fresh statement per call (not a cached one): an iterator holds an open
  669. // cursor, so a shared statement would conflict across overlapping scans.
  670. const stmt = this.db.prepare('SELECT * FROM nodes WHERE kind = ?');
  671. for (const row of stmt.iterate(kind)) {
  672. yield rowToNode(row as NodeRow);
  673. }
  674. }
  675. /**
  676. * Get all nodes in the database
  677. */
  678. getAllNodes(): Node[] {
  679. const rows = this.db.prepare('SELECT * FROM nodes').all() as NodeRow[];
  680. return rows.map(rowToNode);
  681. }
  682. /**
  683. * Get nodes by exact name match (uses idx_nodes_name index)
  684. */
  685. getNodesByName(name: string): Node[] {
  686. if (!this.stmts.getNodesByName) {
  687. this.stmts.getNodesByName = this.db.prepare('SELECT * FROM nodes WHERE name = ?');
  688. }
  689. const rows = this.stmts.getNodesByName.all(name) as NodeRow[];
  690. return rows.map(rowToNode);
  691. }
  692. /**
  693. * Get nodes by exact qualified name match (uses idx_nodes_qualified_name index)
  694. */
  695. getNodesByQualifiedNameExact(qualifiedName: string): Node[] {
  696. if (!this.stmts.getNodesByQualifiedNameExact) {
  697. this.stmts.getNodesByQualifiedNameExact = this.db.prepare(
  698. 'SELECT * FROM nodes WHERE qualified_name = ?'
  699. );
  700. }
  701. const rows = this.stmts.getNodesByQualifiedNameExact.all(qualifiedName) as NodeRow[];
  702. return rows.map(rowToNode);
  703. }
  704. /**
  705. * Get nodes by lowercase name match (uses idx_nodes_lower_name expression index)
  706. */
  707. getNodesByLowerName(lowerName: string): Node[] {
  708. if (!this.stmts.getNodesByLowerName) {
  709. this.stmts.getNodesByLowerName = this.db.prepare(
  710. 'SELECT * FROM nodes WHERE lower(name) = ?'
  711. );
  712. }
  713. const rows = this.stmts.getNodesByLowerName.all(lowerName) as NodeRow[];
  714. return rows.map(rowToNode);
  715. }
  716. /**
  717. * Search nodes by name using FTS with fallback to LIKE for better matching
  718. *
  719. * Search strategy:
  720. * 1. Try FTS5 prefix match (query*) for word-start matching
  721. * 2. If no results, try LIKE for substring matching (e.g., "signIn" finds "signInWithGoogle")
  722. * 3. Score results based on match quality
  723. */
  724. searchNodes(query: string, options: SearchOptions = {}): SearchResult[] {
  725. const { limit = 100, offset = 0 } = options;
  726. // Parse field-qualified bits out of the raw query (kind:, lang:,
  727. // path:, name:). Anything not recognised stays in `text` and goes
  728. // to FTS unchanged. Filters compose with the SearchOptions arg —
  729. // both are applied (intersection-style).
  730. const parsed = parseQuery(query);
  731. const mergedKinds =
  732. parsed.kinds.length > 0
  733. ? Array.from(new Set([...(options.kinds ?? []), ...parsed.kinds]))
  734. : options.kinds;
  735. const mergedLanguages =
  736. parsed.languages.length > 0
  737. ? Array.from(new Set([...(options.languages ?? []), ...parsed.languages]))
  738. : options.languages;
  739. const pathFilters = parsed.pathFilters;
  740. const nameFilters = parsed.nameFilters;
  741. // The text portion drives FTS/LIKE; if all the user typed was
  742. // filters (`kind:function`), we still need *some* candidate set,
  743. // so synthesise an empty-text path that returns everything matching
  744. // the filters.
  745. const text = parsed.text;
  746. const kinds = mergedKinds;
  747. const languages = mergedLanguages;
  748. // First try FTS5 with prefix matching
  749. let results = text
  750. ? this.searchNodesFTS(text, { kinds, languages, limit, offset })
  751. // Over-fetch by 5× when running filter-only (no text). The
  752. // post-scoring path: + name: filters can be very selective, so
  753. // a smaller multiplier risks returning fewer than `limit`
  754. // results despite the DB having plenty of matches.
  755. : this.searchAllByFilters({ kinds, languages, limit: limit * 5 });
  756. // If no FTS results, try LIKE-based substring search
  757. if (results.length === 0 && text.length >= 2) {
  758. results = this.searchNodesLike(text, { kinds, languages, limit, offset });
  759. }
  760. // Final fuzzy fallback: scan all known names and keep those within
  761. // a tight Levenshtein distance. Only fires when both FTS and LIKE
  762. // returned nothing AND there's a text portion long enough to be
  763. // worth fuzzing (1-char queries would match too much).
  764. if (results.length === 0 && text.length >= 3) {
  765. results = this.searchNodesFuzzy(text, { kinds, languages, limit });
  766. }
  767. // Supplement: ensure exact name matches are always candidates.
  768. // BM25 can bury short exact-match names (e.g. "getBean") under hundreds of
  769. // compound names (e.g. "getBeanDescriptor") in large codebases,
  770. // pushing them past the FTS fetch limit before post-hoc scoring can help.
  771. // Use the max BM25 score as the base so the nameMatchBonus (exact=30 vs
  772. // prefix=20) actually differentiates them after rescoring.
  773. if (results.length > 0 && query) {
  774. const existingIds = new Set(results.map(r => r.node.id));
  775. const maxFtsScore = Math.max(...results.map(r => r.score));
  776. const terms = query.split(/\s+/).filter(t => t.length >= 2);
  777. for (const term of terms) {
  778. let sql = 'SELECT * FROM nodes WHERE name = ? COLLATE NOCASE';
  779. const params: (string | number)[] = [term];
  780. if (kinds && kinds.length > 0) {
  781. sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
  782. params.push(...kinds);
  783. }
  784. if (languages && languages.length > 0) {
  785. sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
  786. params.push(...languages);
  787. }
  788. sql += ' LIMIT 20';
  789. const rows = this.db.prepare(sql).all(...params) as NodeRow[];
  790. for (const row of rows) {
  791. if (!existingIds.has(row.id)) {
  792. results.push({ node: rowToNode(row), score: maxFtsScore });
  793. existingIds.add(row.id);
  794. }
  795. }
  796. }
  797. }
  798. // Apply multi-signal scoring
  799. if (results.length > 0 && (text || query)) {
  800. const scoringQuery = text || query;
  801. results = results.map(r => ({
  802. ...r,
  803. score: r.score
  804. + kindBonus(r.node.kind)
  805. + scorePathRelevance(r.node.filePath, scoringQuery, this.projectNameTokens)
  806. + nameMatchBonus(r.node.name, scoringQuery),
  807. }));
  808. results.sort((a, b) => b.score - a.score);
  809. // Trim to requested limit after rescoring
  810. if (results.length > limit) {
  811. results = results.slice(0, limit);
  812. }
  813. }
  814. // Apply path: + name: filters AFTER scoring. Scoring already uses
  815. // path/name as a soft signal; the explicit filters here are a hard
  816. // gate. Done last so the FTS limit fetched plenty of candidates to
  817. // narrow from.
  818. if (pathFilters.length > 0) {
  819. const lowered = pathFilters.map((p) => p.toLowerCase());
  820. results = results.filter((r) => {
  821. const fp = r.node.filePath.toLowerCase();
  822. return lowered.some((p) => fp.includes(p));
  823. });
  824. }
  825. if (nameFilters.length > 0) {
  826. const lowered = nameFilters.map((n) => n.toLowerCase());
  827. results = results.filter((r) => {
  828. const nm = r.node.name.toLowerCase();
  829. return lowered.some((n) => nm.includes(n));
  830. });
  831. }
  832. return results;
  833. }
  834. /**
  835. * Match-everything path used when the user supplied only field
  836. * filters (`kind:function lang:typescript`) with no text. Returns
  837. * candidates ordered by name; the caller's filter pass narrows to
  838. * what was asked for.
  839. */
  840. private searchAllByFilters(options: {
  841. kinds?: NodeKind[];
  842. languages?: Language[];
  843. limit: number;
  844. }): SearchResult[] {
  845. const { kinds, languages, limit } = options;
  846. let sql = 'SELECT * FROM nodes WHERE 1=1';
  847. const params: (string | number)[] = [];
  848. if (kinds && kinds.length > 0) {
  849. sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
  850. params.push(...kinds);
  851. }
  852. if (languages && languages.length > 0) {
  853. sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
  854. params.push(...languages);
  855. }
  856. sql += ' ORDER BY name LIMIT ?';
  857. params.push(limit);
  858. const rows = this.db.prepare(sql).all(...params) as NodeRow[];
  859. return rows.map((row) => ({ node: rowToNode(row), score: 1 }));
  860. }
  861. /**
  862. * Fuzzy fallback: when zero FTS/LIKE hits, try an edit-distance
  863. * sweep over the distinct symbol-name set. Caps `maxDist` at 2 so
  864. * `getUssr` finds `getUser` but `process` doesn't match `prosody`.
  865. * Bounded edit distance keeps each comparison cheap; the per-query
  866. * scan is O(distinct-name-count) which is far smaller than total
  867. * node count on any real codebase.
  868. */
  869. private searchNodesFuzzy(
  870. text: string,
  871. options: { kinds?: NodeKind[]; languages?: Language[]; limit: number }
  872. ): SearchResult[] {
  873. const { kinds, languages, limit } = options;
  874. const lowered = text.toLowerCase();
  875. const maxDist = lowered.length <= 4 ? 1 : 2;
  876. // Pull the distinct name list once. The set is cached on QueryBuilder
  877. // by getAllNodeNames(); even on a 200k-node project the distinct
  878. // name set is typically O(10k) because most names repeat. The
  879. // candidate-cap below bounds memory regardless.
  880. const allNames = this.getAllNodeNames();
  881. const candidates: Array<{ name: string; dist: number }> = [];
  882. for (const name of allNames) {
  883. const dist = boundedEditDistance(name.toLowerCase(), lowered, maxDist);
  884. if (dist <= maxDist) candidates.push({ name, dist });
  885. }
  886. candidates.sort((a, b) => a.dist - b.dist);
  887. // Cap the per-name follow-up queries. Each survivor triggers a
  888. // separate `SELECT * FROM nodes WHERE name = ?`; without this cap
  889. // a project with many similar names (`getUser1`, `getUser2`...)
  890. // could fan out far beyond `limit` queries before the inner-loop
  891. // limit kicks in.
  892. const FUZZY_FOLLOWUP_CAP = Math.max(limit * 2, 50);
  893. const cappedCandidates = candidates.slice(0, FUZZY_FOLLOWUP_CAP);
  894. const results: SearchResult[] = [];
  895. const seen = new Set<string>();
  896. for (const c of cappedCandidates) {
  897. if (results.length >= limit) break;
  898. let sql = 'SELECT * FROM nodes WHERE name = ?';
  899. const params: (string | number)[] = [c.name];
  900. if (kinds && kinds.length > 0) {
  901. sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
  902. params.push(...kinds);
  903. }
  904. if (languages && languages.length > 0) {
  905. sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
  906. params.push(...languages);
  907. }
  908. sql += ' LIMIT 5';
  909. const rows = this.db.prepare(sql).all(...params) as NodeRow[];
  910. for (const row of rows) {
  911. if (seen.has(row.id)) continue;
  912. seen.add(row.id);
  913. // Lower the score for each edit step away from the query so
  914. // exact-match fallbacks (dist 0) outrank dist-2 typos.
  915. results.push({ node: rowToNode(row), score: 1 / (1 + c.dist) });
  916. if (results.length >= limit) break;
  917. }
  918. }
  919. return results;
  920. }
  921. /**
  922. * FTS5 search with prefix matching
  923. */
  924. private searchNodesFTS(query: string, options: SearchOptions): SearchResult[] {
  925. const { kinds, languages, limit = 100, offset = 0 } = options;
  926. // Add prefix wildcard for better matching (e.g., "auth" matches "AuthService", "authenticate")
  927. // Escape special FTS5 characters and add prefix wildcard.
  928. //
  929. // `::` is a qualifier separator in Rust/C++/Ruby, not a token char,
  930. // so treat it as whitespace before the strip step. Otherwise queries
  931. // like `stage_apply::run` collapse to `stage_applyrun` (the colons
  932. // are stripped without splitting) and find nothing. See #173.
  933. const ftsQuery = query
  934. .replace(/::/g, ' ') // Rust/C++/Ruby qualifier separator
  935. .replace(/['"*():^]/g, '') // Remove FTS5 special chars
  936. .split(/\s+/)
  937. .filter(term => term.length > 0)
  938. // Strip FTS5 boolean operators to prevent query manipulation
  939. .filter(term => !/^(AND|OR|NOT|NEAR)$/i.test(term))
  940. .map(term => `"${term}"*`) // Prefix match each term
  941. .join(' OR ');
  942. if (!ftsQuery) {
  943. return [];
  944. }
  945. // BM25 column weights: id=0, name=20, qualified_name=5, docstring=1, signature=2
  946. // Heavy name weight ensures exact/prefix name matches rank above incidental
  947. // mentions in long docstrings or qualified names of nested symbols.
  948. // Fetch 5x requested limit so post-hoc rescoring (kindBonus, pathRelevance,
  949. // nameMatchBonus) can promote results that BM25 alone undervalues.
  950. const ftsLimit = Math.max(limit * 5, 100);
  951. let sql = `
  952. SELECT nodes.*, bm25(nodes_fts, 0, 20, 5, 1, 2) as score
  953. FROM nodes_fts
  954. JOIN nodes ON nodes_fts.id = nodes.id
  955. WHERE nodes_fts MATCH ?
  956. `;
  957. const params: (string | number)[] = [ftsQuery];
  958. if (kinds && kinds.length > 0) {
  959. sql += ` AND nodes.kind IN (${kinds.map(() => '?').join(',')})`;
  960. params.push(...kinds);
  961. }
  962. if (languages && languages.length > 0) {
  963. sql += ` AND nodes.language IN (${languages.map(() => '?').join(',')})`;
  964. params.push(...languages);
  965. }
  966. sql += ' ORDER BY score LIMIT ? OFFSET ?';
  967. params.push(ftsLimit, offset);
  968. try {
  969. const rows = this.db.prepare(sql).all(...params) as (NodeRow & { score: number })[];
  970. return rows.map((row) => ({
  971. node: rowToNode(row),
  972. score: Math.abs(row.score), // bm25 returns negative scores
  973. }));
  974. } catch {
  975. // FTS query failed, return empty
  976. return [];
  977. }
  978. }
  979. /**
  980. * LIKE-based substring search for cases where FTS doesn't match
  981. * Useful for camelCase matching (e.g., "signIn" finds "signInWithGoogle")
  982. */
  983. private searchNodesLike(query: string, options: SearchOptions): SearchResult[] {
  984. const { kinds, languages, limit = 100, offset = 0 } = options;
  985. let sql = `
  986. SELECT nodes.*,
  987. CASE
  988. WHEN name = ? THEN 1.0
  989. WHEN name LIKE ? THEN 0.9
  990. WHEN name LIKE ? THEN 0.8
  991. WHEN qualified_name LIKE ? THEN 0.7
  992. ELSE 0.5
  993. END as score
  994. FROM nodes
  995. WHERE (
  996. name LIKE ? OR
  997. qualified_name LIKE ? OR
  998. name LIKE ?
  999. )
  1000. `;
  1001. // Pattern variants for better matching
  1002. const exactMatch = query;
  1003. const startsWith = `${query}%`;
  1004. const contains = `%${query}%`;
  1005. const params: (string | number)[] = [
  1006. exactMatch, // Exact match score
  1007. startsWith, // Starts with score
  1008. contains, // Contains score
  1009. contains, // Qualified name score
  1010. contains, // WHERE: name contains
  1011. contains, // WHERE: qualified_name contains
  1012. startsWith, // WHERE: name starts with
  1013. ];
  1014. if (kinds && kinds.length > 0) {
  1015. sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
  1016. params.push(...kinds);
  1017. }
  1018. if (languages && languages.length > 0) {
  1019. sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
  1020. params.push(...languages);
  1021. }
  1022. sql += ' ORDER BY score DESC, length(name) ASC LIMIT ? OFFSET ?';
  1023. params.push(limit, offset);
  1024. const rows = this.db.prepare(sql).all(...params) as (NodeRow & { score: number })[];
  1025. return rows.map((row) => ({
  1026. node: rowToNode(row),
  1027. score: row.score,
  1028. }));
  1029. }
  1030. /**
  1031. * Find nodes by exact name match
  1032. *
  1033. * Used for hybrid search - looks up symbols by exact name or case-insensitive match.
  1034. * Returns high-confidence matches for known symbol names extracted from query.
  1035. *
  1036. * @param names - Array of symbol names to look up
  1037. * @param options - Search options (kinds, languages, limit)
  1038. * @returns SearchResult array with exact matches scored at 1.0
  1039. */
  1040. findNodesByExactName(names: string[], options: SearchOptions = {}): SearchResult[] {
  1041. if (names.length === 0) return [];
  1042. const { kinds, languages, limit = 50 } = options;
  1043. // Two-pass approach to handle common names (e.g., "run" has 40+ matches):
  1044. // Pass 1: Find which files contain distinctive (rare) symbols from the query.
  1045. // Pass 2: Query each name, boosting results that co-locate with distinctive symbols.
  1046. // Pass 1: Find files containing each queried name, identify distinctive names
  1047. const nameToFiles = new Map<string, Set<string>>();
  1048. for (const name of names) {
  1049. let sql = 'SELECT DISTINCT file_path FROM nodes WHERE name COLLATE NOCASE = ?';
  1050. const params: (string | number)[] = [name];
  1051. if (kinds && kinds.length > 0) {
  1052. sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
  1053. params.push(...kinds);
  1054. }
  1055. sql += ' LIMIT 100';
  1056. const rows = this.db.prepare(sql).all(...params) as { file_path: string }[];
  1057. nameToFiles.set(name.toLowerCase(), new Set(rows.map(r => r.file_path)));
  1058. }
  1059. // Distinctive names are those with fewer than 10 file matches (e.g., "scrapeLoop" = 1 file)
  1060. const distinctiveFiles = new Set<string>();
  1061. for (const [, files] of nameToFiles) {
  1062. if (files.size > 0 && files.size < 10) {
  1063. for (const f of files) distinctiveFiles.add(f);
  1064. }
  1065. }
  1066. // Pass 2: Query each name with per-name limit, scoring by co-location
  1067. const perNameLimit = Math.max(8, Math.ceil(limit / names.length));
  1068. const allResults: SearchResult[] = [];
  1069. const seenIds = new Set<string>();
  1070. for (const name of names) {
  1071. let sql = `
  1072. SELECT nodes.*, 1.0 as score
  1073. FROM nodes
  1074. WHERE name COLLATE NOCASE = ?
  1075. `;
  1076. const params: (string | number)[] = [name];
  1077. if (kinds && kinds.length > 0) {
  1078. sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
  1079. params.push(...kinds);
  1080. }
  1081. if (languages && languages.length > 0) {
  1082. sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
  1083. params.push(...languages);
  1084. }
  1085. // Fetch enough to find co-located results among common names
  1086. sql += ' LIMIT ?';
  1087. params.push(Math.max(perNameLimit * 3, 50));
  1088. const rows = this.db.prepare(sql).all(...params) as (NodeRow & { score: number })[];
  1089. const nameResults: SearchResult[] = [];
  1090. for (const row of rows) {
  1091. const node = rowToNode(row);
  1092. if (seenIds.has(node.id)) continue;
  1093. // Boost results in files that also contain distinctive symbols
  1094. const coLocationBoost = distinctiveFiles.has(node.filePath) ? 20 : 0;
  1095. nameResults.push({ node, score: row.score + coLocationBoost });
  1096. }
  1097. // Sort by score (co-located first), take per-name limit
  1098. nameResults.sort((a, b) => b.score - a.score);
  1099. for (const r of nameResults.slice(0, perNameLimit)) {
  1100. seenIds.add(r.node.id);
  1101. allResults.push(r);
  1102. }
  1103. }
  1104. // Sort all results by score so co-located results bubble up
  1105. allResults.sort((a, b) => b.score - a.score);
  1106. return allResults.slice(0, limit);
  1107. }
  1108. /**
  1109. * Find nodes whose name contains a substring (LIKE-based).
  1110. * Useful for CamelCase-part matching where FTS fails because
  1111. * e.g. "TransportSearchAction" is one FTS token, not matchable by "Search"*.
  1112. *
  1113. * Results are ordered by name length (shorter = more likely to be the core type).
  1114. */
  1115. findNodesByNameSubstring(
  1116. substring: string,
  1117. options: SearchOptions & { excludePrefix?: boolean } = {}
  1118. ): SearchResult[] {
  1119. const { kinds, languages, limit = 30, excludePrefix } = options;
  1120. let sql = `
  1121. SELECT nodes.*, 1.0 as score
  1122. FROM nodes
  1123. WHERE name LIKE ?
  1124. `;
  1125. const params: (string | number)[] = [`%${substring}%`];
  1126. // Exclude prefix matches (handled by FTS-based prefix search in Step 2b)
  1127. if (excludePrefix) {
  1128. sql += ` AND name NOT LIKE ?`;
  1129. params.push(`${substring}%`);
  1130. }
  1131. if (kinds && kinds.length > 0) {
  1132. sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
  1133. params.push(...kinds);
  1134. }
  1135. if (languages && languages.length > 0) {
  1136. sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
  1137. params.push(...languages);
  1138. }
  1139. sql += ' ORDER BY length(name) ASC LIMIT ?';
  1140. params.push(limit);
  1141. const rows = this.db.prepare(sql).all(...params) as (NodeRow & { score: number })[];
  1142. return rows.map((row) => ({
  1143. node: rowToNode(row),
  1144. score: row.score,
  1145. }));
  1146. }
  1147. // ===========================================================================
  1148. // Edge Operations
  1149. // ===========================================================================
  1150. /**
  1151. * Insert a new edge
  1152. */
  1153. insertEdge(edge: Edge): void {
  1154. if (!this.stmts.insertEdge) {
  1155. this.stmts.insertEdge = this.db.prepare(`
  1156. INSERT OR IGNORE INTO edges (source, target, kind, metadata, line, col, provenance)
  1157. VALUES (@source, @target, @kind, @metadata, @line, @col, @provenance)
  1158. `);
  1159. }
  1160. this.stmts.insertEdge.run({
  1161. source: edge.source,
  1162. target: edge.target,
  1163. kind: edge.kind,
  1164. metadata: edge.metadata ? JSON.stringify(edge.metadata) : null,
  1165. line: edge.line ?? null,
  1166. col: edge.column ?? null,
  1167. provenance: edge.provenance ?? null,
  1168. });
  1169. }
  1170. /**
  1171. * Insert multiple edges in a transaction
  1172. */
  1173. insertEdges(edges: Edge[]): void {
  1174. if (edges.length === 0) return;
  1175. this.db.transaction(() => {
  1176. const endpointIds = new Set<string>();
  1177. for (const edge of edges) {
  1178. endpointIds.add(edge.source);
  1179. endpointIds.add(edge.target);
  1180. }
  1181. const existingNodeIds = this.getExistingNodeIds([...endpointIds]);
  1182. for (const edge of edges) {
  1183. if (!existingNodeIds.has(edge.source) || !existingNodeIds.has(edge.target)) {
  1184. continue;
  1185. }
  1186. this.insertEdge(edge);
  1187. }
  1188. })();
  1189. }
  1190. /**
  1191. * Delete all edges from a source node
  1192. */
  1193. deleteEdgesBySource(sourceId: string): void {
  1194. if (!this.stmts.deleteEdgesBySource) {
  1195. this.stmts.deleteEdgesBySource = this.db.prepare('DELETE FROM edges WHERE source = ?');
  1196. }
  1197. this.stmts.deleteEdgesBySource.run(sourceId);
  1198. }
  1199. /**
  1200. * Get outgoing edges from a node
  1201. */
  1202. getOutgoingEdges(sourceId: string, kinds?: EdgeKind[], provenance?: string): Edge[] {
  1203. if ((kinds && kinds.length > 0) || provenance) {
  1204. let sql = 'SELECT * FROM edges WHERE source = ?';
  1205. const params: (string | number)[] = [sourceId];
  1206. if (kinds && kinds.length > 0) {
  1207. sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
  1208. params.push(...kinds);
  1209. }
  1210. if (provenance) {
  1211. sql += ' AND provenance = ?';
  1212. params.push(provenance);
  1213. }
  1214. const rows = this.db.prepare(sql).all(...params) as EdgeRow[];
  1215. return rows.map(rowToEdge);
  1216. }
  1217. if (!this.stmts.getEdgesBySource) {
  1218. this.stmts.getEdgesBySource = this.db.prepare('SELECT * FROM edges WHERE source = ?');
  1219. }
  1220. const rows = this.stmts.getEdgesBySource.all(sourceId) as EdgeRow[];
  1221. return rows.map(rowToEdge);
  1222. }
  1223. /**
  1224. * Get incoming edges to a node
  1225. */
  1226. getIncomingEdges(targetId: string, kinds?: EdgeKind[]): Edge[] {
  1227. if (kinds && kinds.length > 0) {
  1228. const sql = `SELECT * FROM edges WHERE target = ? AND kind IN (${kinds.map(() => '?').join(',')})`;
  1229. const rows = this.db.prepare(sql).all(targetId, ...kinds) as EdgeRow[];
  1230. return rows.map(rowToEdge);
  1231. }
  1232. if (!this.stmts.getEdgesByTarget) {
  1233. this.stmts.getEdgesByTarget = this.db.prepare('SELECT * FROM edges WHERE target = ?');
  1234. }
  1235. const rows = this.stmts.getEdgesByTarget.all(targetId) as EdgeRow[];
  1236. return rows.map(rowToEdge);
  1237. }
  1238. /**
  1239. * Find all edges where both source and target are in the given node set.
  1240. * Useful for recovering inter-node connectivity after BFS.
  1241. */
  1242. findEdgesBetweenNodes(nodeIds: string[], kinds?: EdgeKind[]): Edge[] {
  1243. if (nodeIds.length === 0) return [];
  1244. const idsJson = JSON.stringify(nodeIds);
  1245. let sql = `SELECT * FROM edges WHERE source IN (SELECT value FROM json_each(?)) AND target IN (SELECT value FROM json_each(?))`;
  1246. const params: string[] = [idsJson, idsJson];
  1247. if (kinds && kinds.length > 0) {
  1248. sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
  1249. params.push(...kinds);
  1250. }
  1251. const rows = this.db.prepare(sql).all(...params) as EdgeRow[];
  1252. return rows.map(rowToEdge);
  1253. }
  1254. /**
  1255. * Distinct file paths that DEPEND ON `filePath`: every file containing a
  1256. * symbol with a cross-file edge (any kind except `contains`) into a symbol
  1257. * of this file. This is the file-level projection of the symbol dependency
  1258. * graph and the basis for blast-radius / `affected` test selection.
  1259. *
  1260. * It deliberately does NOT restrict to `imports` edges. In this graph an
  1261. * `imports` edge connects a file to its own local import declarations
  1262. * (it is always same-file), so an imports-only lookup returns zero
  1263. * cross-file dependents for every file. The real cross-file dependency
  1264. * signal is the resolved call/reference graph — calls, references,
  1265. * instantiates, extends, implements, overrides, type_of, returns,
  1266. * decorates — exactly what {@link GraphTraverser.getImpactRadius} traverses.
  1267. * `contains` is excluded: a parent containing a symbol does not *depend* on
  1268. * it. One indexed query (idx_nodes_file_path + idx_edges_target_kind).
  1269. */
  1270. getDependentFilePaths(filePath: string): string[] {
  1271. const sql = `SELECT DISTINCT src.file_path AS fp
  1272. FROM edges e
  1273. JOIN nodes tgt ON tgt.id = e.target
  1274. JOIN nodes src ON src.id = e.source
  1275. WHERE tgt.file_path = ?
  1276. AND e.kind != 'contains'
  1277. AND src.file_path != ?`;
  1278. const rows = this.db.prepare(sql).all(filePath, filePath) as Array<{ fp: string }>;
  1279. return rows.map((r) => r.fp);
  1280. }
  1281. /**
  1282. * Distinct file paths that `filePath` DEPENDS ON — the inverse of
  1283. * {@link getDependentFilePaths}: every file containing a symbol that a
  1284. * symbol of this file has a cross-file edge into. Same edge-kind rules
  1285. * (all kinds except `contains`); same reason imports-only is insufficient.
  1286. */
  1287. getDependencyFilePaths(filePath: string): string[] {
  1288. const sql = `SELECT DISTINCT tgt.file_path AS fp
  1289. FROM edges e
  1290. JOIN nodes src ON src.id = e.source
  1291. JOIN nodes tgt ON tgt.id = e.target
  1292. WHERE src.file_path = ?
  1293. AND e.kind != 'contains'
  1294. AND tgt.file_path != ?`;
  1295. const rows = this.db.prepare(sql).all(filePath, filePath) as Array<{ fp: string }>;
  1296. return rows.map((r) => r.fp);
  1297. }
  1298. // ===========================================================================
  1299. // File Operations
  1300. // ===========================================================================
  1301. /**
  1302. * Insert or update a file record
  1303. */
  1304. upsertFile(file: FileRecord): void {
  1305. if (!this.stmts.upsertFile) {
  1306. this.stmts.upsertFile = this.db.prepare(`
  1307. INSERT INTO files (path, content_hash, language, size, modified_at, indexed_at, node_count, errors)
  1308. VALUES (@path, @contentHash, @language, @size, @modifiedAt, @indexedAt, @nodeCount, @errors)
  1309. ON CONFLICT(path) DO UPDATE SET
  1310. content_hash = @contentHash,
  1311. language = @language,
  1312. size = @size,
  1313. modified_at = @modifiedAt,
  1314. indexed_at = @indexedAt,
  1315. node_count = @nodeCount,
  1316. errors = @errors
  1317. `);
  1318. }
  1319. this.stmts.upsertFile.run({
  1320. path: file.path,
  1321. contentHash: file.contentHash,
  1322. language: file.language,
  1323. size: file.size,
  1324. modifiedAt: file.modifiedAt,
  1325. indexedAt: file.indexedAt,
  1326. nodeCount: file.nodeCount,
  1327. errors: file.errors ? JSON.stringify(file.errors) : null,
  1328. });
  1329. }
  1330. /**
  1331. * Delete a file record and its nodes
  1332. */
  1333. deleteFile(filePath: string): void {
  1334. this.db.transaction(() => {
  1335. this.deleteNodesByFile(filePath);
  1336. if (!this.stmts.deleteFile) {
  1337. this.stmts.deleteFile = this.db.prepare('DELETE FROM files WHERE path = ?');
  1338. }
  1339. this.stmts.deleteFile.run(filePath);
  1340. })();
  1341. }
  1342. /**
  1343. * Get a file record by path
  1344. */
  1345. getFileByPath(filePath: string): FileRecord | null {
  1346. if (!this.stmts.getFileByPath) {
  1347. this.stmts.getFileByPath = this.db.prepare('SELECT * FROM files WHERE path = ?');
  1348. }
  1349. const row = this.stmts.getFileByPath.get(filePath) as FileRow | undefined;
  1350. return row ? rowToFileRecord(row) : null;
  1351. }
  1352. /**
  1353. * Get all tracked files
  1354. */
  1355. getAllFiles(): FileRecord[] {
  1356. if (!this.stmts.getAllFiles) {
  1357. this.stmts.getAllFiles = this.db.prepare('SELECT * FROM files ORDER BY path');
  1358. }
  1359. const rows = this.stmts.getAllFiles.all() as FileRow[];
  1360. return rows.map(rowToFileRecord);
  1361. }
  1362. /**
  1363. * Most recent index timestamp (ms since epoch) across all tracked files, or
  1364. * null when nothing is indexed yet. One indexed aggregate, no per-row scan. (#329)
  1365. */
  1366. getLastIndexedAt(): number | null {
  1367. const row = this.db
  1368. .prepare('SELECT MAX(indexed_at) AS last FROM files')
  1369. .get() as { last: number | null } | undefined;
  1370. return row?.last ?? null;
  1371. }
  1372. /**
  1373. * Get files that need re-indexing (hash changed)
  1374. */
  1375. getStaleFiles(currentHashes: Map<string, string>): FileRecord[] {
  1376. const files = this.getAllFiles();
  1377. return files.filter((f) => {
  1378. const currentHash = currentHashes.get(f.path);
  1379. return currentHash && currentHash !== f.contentHash;
  1380. });
  1381. }
  1382. // ===========================================================================
  1383. // Unresolved References
  1384. // ===========================================================================
  1385. /**
  1386. * Insert an unresolved reference
  1387. */
  1388. insertUnresolvedRef(ref: UnresolvedReference): void {
  1389. if (!this.stmts.insertUnresolved) {
  1390. this.stmts.insertUnresolved = this.db.prepare(`
  1391. INSERT INTO unresolved_refs (from_node_id, reference_name, reference_kind, line, col, candidates, file_path, language)
  1392. VALUES (@fromNodeId, @referenceName, @referenceKind, @line, @col, @candidates, @filePath, @language)
  1393. `);
  1394. }
  1395. this.stmts.insertUnresolved.run({
  1396. fromNodeId: ref.fromNodeId,
  1397. referenceName: ref.referenceName,
  1398. referenceKind: ref.referenceKind,
  1399. line: ref.line,
  1400. col: ref.column,
  1401. candidates: ref.candidates ? JSON.stringify(ref.candidates) : null,
  1402. filePath: ref.filePath ?? '',
  1403. language: ref.language ?? 'unknown',
  1404. });
  1405. }
  1406. /**
  1407. * Insert multiple unresolved references in a transaction
  1408. */
  1409. insertUnresolvedRefsBatch(refs: UnresolvedReference[]): void {
  1410. if (refs.length === 0) return;
  1411. const insert = this.db.transaction(() => {
  1412. for (const ref of refs) {
  1413. this.insertUnresolvedRef(ref);
  1414. }
  1415. });
  1416. insert();
  1417. }
  1418. /**
  1419. * Delete unresolved references from a node
  1420. */
  1421. deleteUnresolvedByNode(nodeId: string): void {
  1422. if (!this.stmts.deleteUnresolvedByNode) {
  1423. this.stmts.deleteUnresolvedByNode = this.db.prepare(
  1424. 'DELETE FROM unresolved_refs WHERE from_node_id = ?'
  1425. );
  1426. }
  1427. this.stmts.deleteUnresolvedByNode.run(nodeId);
  1428. }
  1429. /**
  1430. * Get unresolved references by name (for resolution)
  1431. */
  1432. getUnresolvedByName(name: string): UnresolvedReference[] {
  1433. if (!this.stmts.getUnresolvedByName) {
  1434. this.stmts.getUnresolvedByName = this.db.prepare(
  1435. 'SELECT * FROM unresolved_refs WHERE reference_name = ?'
  1436. );
  1437. }
  1438. const rows = this.stmts.getUnresolvedByName.all(name) as UnresolvedRefRow[];
  1439. return rows.map((row) => ({
  1440. fromNodeId: row.from_node_id,
  1441. referenceName: row.reference_name,
  1442. referenceKind: row.reference_kind as EdgeKind,
  1443. line: row.line,
  1444. column: row.col,
  1445. candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
  1446. filePath: row.file_path,
  1447. language: row.language as Language,
  1448. }));
  1449. }
  1450. /**
  1451. * Get all unresolved references
  1452. */
  1453. getUnresolvedReferences(): UnresolvedReference[] {
  1454. const rows = this.db.prepare('SELECT * FROM unresolved_refs').all() as UnresolvedRefRow[];
  1455. return rows.map((row) => ({
  1456. fromNodeId: row.from_node_id,
  1457. referenceName: row.reference_name,
  1458. referenceKind: row.reference_kind as EdgeKind,
  1459. line: row.line,
  1460. column: row.col,
  1461. candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
  1462. filePath: row.file_path,
  1463. language: row.language as Language,
  1464. }));
  1465. }
  1466. /**
  1467. * Get the count of unresolved references without loading them into memory
  1468. */
  1469. getUnresolvedReferencesCount(): number {
  1470. if (!this.stmts.getUnresolvedCount) {
  1471. this.stmts.getUnresolvedCount = this.db.prepare(
  1472. 'SELECT COUNT(*) as count FROM unresolved_refs'
  1473. );
  1474. }
  1475. const row = this.stmts.getUnresolvedCount.get() as { count: number };
  1476. return row.count;
  1477. }
  1478. /**
  1479. * Get a batch of unresolved references using LIMIT/OFFSET pagination.
  1480. * Used to process references in bounded memory chunks.
  1481. */
  1482. getUnresolvedReferencesBatch(offset: number, limit: number): UnresolvedReference[] {
  1483. if (!this.stmts.getUnresolvedBatch) {
  1484. this.stmts.getUnresolvedBatch = this.db.prepare(
  1485. 'SELECT * FROM unresolved_refs LIMIT ? OFFSET ?'
  1486. );
  1487. }
  1488. const rows = this.stmts.getUnresolvedBatch.all(limit, offset) as UnresolvedRefRow[];
  1489. return rows.map((row) => ({
  1490. fromNodeId: row.from_node_id,
  1491. referenceName: row.reference_name,
  1492. referenceKind: row.reference_kind as EdgeKind,
  1493. line: row.line,
  1494. column: row.col,
  1495. candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
  1496. filePath: row.file_path,
  1497. language: row.language as Language,
  1498. }));
  1499. }
  1500. /**
  1501. * Get all tracked file paths (lightweight — no full FileRecord objects)
  1502. */
  1503. getAllFilePaths(): string[] {
  1504. if (!this.stmts.getAllFilePaths) {
  1505. this.stmts.getAllFilePaths = this.db.prepare('SELECT path FROM files ORDER BY path');
  1506. }
  1507. const rows = this.stmts.getAllFilePaths.all() as Array<{ path: string }>;
  1508. return rows.map((r) => r.path);
  1509. }
  1510. /**
  1511. * Get all distinct node names (lightweight — just name strings for pre-filtering)
  1512. */
  1513. getAllNodeNames(): string[] {
  1514. if (!this.stmts.getAllNodeNames) {
  1515. this.stmts.getAllNodeNames = this.db.prepare('SELECT DISTINCT name FROM nodes');
  1516. }
  1517. const rows = this.stmts.getAllNodeNames.all() as Array<{ name: string }>;
  1518. return rows.map((r) => r.name);
  1519. }
  1520. /**
  1521. * Get unresolved references scoped to specific file paths.
  1522. * Uses the idx_unresolved_file_path index for efficient lookup.
  1523. */
  1524. getUnresolvedReferencesByFiles(filePaths: string[]): UnresolvedReference[] {
  1525. if (filePaths.length === 0) return [];
  1526. // Chunk under SQLite's parameter limit: the first sync of a very large repo
  1527. // passes every changed file here, which an unbounded `IN (...)` would bind
  1528. // as one parameter each — exceeding MAX_VARIABLE_NUMBER and aborting with
  1529. // "too many SQL variables". (#540)
  1530. const rows: UnresolvedRefRow[] = [];
  1531. for (let i = 0; i < filePaths.length; i += SQLITE_PARAM_CHUNK_SIZE) {
  1532. const chunk = filePaths.slice(i, i + SQLITE_PARAM_CHUNK_SIZE);
  1533. const placeholders = chunk.map(() => '?').join(',');
  1534. const chunkRows = this.db
  1535. .prepare(`SELECT * FROM unresolved_refs WHERE file_path IN (${placeholders})`)
  1536. .all(...chunk) as UnresolvedRefRow[];
  1537. rows.push(...chunkRows);
  1538. }
  1539. return rows.map((row) => ({
  1540. fromNodeId: row.from_node_id,
  1541. referenceName: row.reference_name,
  1542. referenceKind: row.reference_kind as EdgeKind,
  1543. line: row.line,
  1544. column: row.col,
  1545. candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
  1546. filePath: row.file_path,
  1547. language: row.language as Language,
  1548. }));
  1549. }
  1550. /**
  1551. * Delete all unresolved references (after resolution)
  1552. */
  1553. clearUnresolvedReferences(): void {
  1554. this.db.exec('DELETE FROM unresolved_refs');
  1555. }
  1556. /**
  1557. * Delete resolved references by their IDs
  1558. */
  1559. deleteResolvedReferences(fromNodeIds: string[]): void {
  1560. if (fromNodeIds.length === 0) return;
  1561. const placeholders = fromNodeIds.map(() => '?').join(',');
  1562. this.db.prepare(`DELETE FROM unresolved_refs WHERE from_node_id IN (${placeholders})`).run(...fromNodeIds);
  1563. }
  1564. /**
  1565. * Delete specific resolved references by (fromNodeId, referenceName, referenceKind) tuples.
  1566. * More precise than deleteResolvedReferences — only removes refs that were actually resolved.
  1567. */
  1568. deleteSpecificResolvedReferences(refs: Array<{ fromNodeId: string; referenceName: string; referenceKind: string }>): void {
  1569. if (refs.length === 0) return;
  1570. const stmt = this.db.prepare(
  1571. 'DELETE FROM unresolved_refs WHERE from_node_id = ? AND reference_name = ? AND reference_kind = ?'
  1572. );
  1573. const deleteMany = this.db.transaction((items: typeof refs) => {
  1574. for (const ref of items) {
  1575. stmt.run(ref.fromNodeId, ref.referenceName, ref.referenceKind);
  1576. }
  1577. });
  1578. deleteMany(refs);
  1579. }
  1580. // ===========================================================================
  1581. // Statistics
  1582. // ===========================================================================
  1583. /**
  1584. * Lightweight (nodes, edges) count snapshot. Used around an index/sync
  1585. * run to compute true additions across extraction + resolution +
  1586. * synthesis — the per-phase counter in the orchestrator only sees
  1587. * extraction's contribution, which is why the CLI summary under-reported
  1588. * the edge count (resolution + synthesizer edges were invisible).
  1589. */
  1590. getNodeAndEdgeCount(): { nodes: number; edges: number } {
  1591. return this.db
  1592. .prepare('SELECT (SELECT COUNT(*) FROM nodes) AS nodes, (SELECT COUNT(*) FROM edges) AS edges')
  1593. .get() as { nodes: number; edges: number };
  1594. }
  1595. /**
  1596. * Get graph statistics
  1597. */
  1598. getStats(): GraphStats {
  1599. // Single query for all three aggregate counts
  1600. const counts = this.db.prepare(`
  1601. SELECT
  1602. (SELECT COUNT(*) FROM nodes) AS node_count,
  1603. (SELECT COUNT(*) FROM edges) AS edge_count,
  1604. (SELECT COUNT(*) FROM files) AS file_count
  1605. `).get() as { node_count: number; edge_count: number; file_count: number };
  1606. const nodesByKind = {} as Record<NodeKind, number>;
  1607. const nodeKindRows = this.db
  1608. .prepare('SELECT kind, COUNT(*) as count FROM nodes GROUP BY kind')
  1609. .all() as Array<{ kind: string; count: number }>;
  1610. for (const row of nodeKindRows) {
  1611. nodesByKind[row.kind as NodeKind] = row.count;
  1612. }
  1613. const edgesByKind = {} as Record<EdgeKind, number>;
  1614. const edgeKindRows = this.db
  1615. .prepare('SELECT kind, COUNT(*) as count FROM edges GROUP BY kind')
  1616. .all() as Array<{ kind: string; count: number }>;
  1617. for (const row of edgeKindRows) {
  1618. edgesByKind[row.kind as EdgeKind] = row.count;
  1619. }
  1620. const filesByLanguage = {} as Record<Language, number>;
  1621. const languageRows = this.db
  1622. .prepare('SELECT language, COUNT(*) as count FROM files GROUP BY language')
  1623. .all() as Array<{ language: string; count: number }>;
  1624. for (const row of languageRows) {
  1625. filesByLanguage[row.language as Language] = row.count;
  1626. }
  1627. return {
  1628. nodeCount: counts.node_count,
  1629. edgeCount: counts.edge_count,
  1630. fileCount: counts.file_count,
  1631. nodesByKind,
  1632. edgesByKind,
  1633. filesByLanguage,
  1634. dbSizeBytes: 0, // Set by caller using DatabaseConnection.getSize()
  1635. lastUpdated: Date.now(),
  1636. };
  1637. }
  1638. // ===========================================================================
  1639. // Project Metadata
  1640. // ===========================================================================
  1641. /**
  1642. * Get a metadata value by key
  1643. */
  1644. getMetadata(key: string): string | null {
  1645. const row = this.db.prepare('SELECT value FROM project_metadata WHERE key = ?').get(key) as { value: string } | undefined;
  1646. return row?.value ?? null;
  1647. }
  1648. /**
  1649. * Set a metadata key-value pair (upsert)
  1650. */
  1651. setMetadata(key: string, value: string): void {
  1652. this.db.prepare(
  1653. 'INSERT INTO project_metadata (key, value, updated_at) VALUES (?, ?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at'
  1654. ).run(key, value, Date.now());
  1655. }
  1656. /**
  1657. * Get all metadata as a key-value record
  1658. */
  1659. getAllMetadata(): Record<string, string> {
  1660. const rows = this.db.prepare('SELECT key, value FROM project_metadata').all() as { key: string; value: string }[];
  1661. const result: Record<string, string> = {};
  1662. for (const row of rows) {
  1663. result[row.key] = row.value;
  1664. }
  1665. return result;
  1666. }
  1667. /**
  1668. * Clear all data from the database
  1669. */
  1670. clear(): void {
  1671. this.nodeCache.clear();
  1672. this.db.transaction(() => {
  1673. this.db.exec('DELETE FROM unresolved_refs');
  1674. this.db.exec('DELETE FROM edges');
  1675. this.db.exec('DELETE FROM nodes');
  1676. this.db.exec('DELETE FROM files');
  1677. })();
  1678. }
  1679. }