| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751 |
- /**
- * Database Queries
- *
- * Prepared statements for CRUD operations on the knowledge graph.
- */
- import { SqliteDatabase, SqliteStatement } from './sqlite-adapter';
- import {
- Node,
- Edge,
- FileRecord,
- UnresolvedReference,
- NodeKind,
- EdgeKind,
- Language,
- GraphStats,
- SearchOptions,
- SearchResult,
- } from '../types';
- import { safeJsonParse } from '../utils';
- import { kindBonus, nameMatchBonus, scorePathRelevance } from '../search/query-utils';
- import { parseQuery, boundedEditDistance } from '../search/query-parser';
- import { isGeneratedFile } from '../extraction/generated-detection';
- /**
- * Path-only heuristic for files that should not be candidates for
- * "dominant file" detection: test/spec files and tool-generated files.
- * Generated files (`*.pb.go`, `*.pulsar.go`, mock outputs, …) often
- * have huge in-file edge counts that dwarf the real source — etcd's
- * `rpc.pb.go` has 4× the in-file edges of `server.go`.
- */
- function isLowValueFile(filePath: string): boolean {
- const lp = filePath.toLowerCase();
- return (
- /(?:^|\/)(tests?|__tests?__|spec)\//.test(lp) ||
- /_test\.go$/.test(lp) ||
- /(?:^|\/)test_[^/]+\.py$/.test(lp) ||
- /_test\.py$/.test(lp) ||
- /_spec\.rb$/.test(lp) ||
- /_test\.rb$/.test(lp) ||
- /\.(test|spec)\.[jt]sx?$/.test(lp) ||
- /(test|spec|tests)\.(java|kt|scala)$/.test(lp) ||
- /(tests?|spec)\.cs$/.test(lp) ||
- /tests?\.swift$/.test(lp) ||
- /_test\.dart$/.test(lp) ||
- isGeneratedFile(filePath)
- );
- }
- const SQLITE_PARAM_CHUNK_SIZE = 500;
- /**
- * Database row types (snake_case from SQLite)
- */
- interface NodeRow {
- id: string;
- kind: string;
- name: string;
- qualified_name: string;
- file_path: string;
- language: string;
- start_line: number;
- end_line: number;
- start_column: number;
- end_column: number;
- docstring: string | null;
- signature: string | null;
- visibility: string | null;
- is_exported: number;
- is_async: number;
- is_static: number;
- is_abstract: number;
- decorators: string | null;
- type_parameters: string | null;
- updated_at: number;
- }
- interface EdgeRow {
- id: number;
- source: string;
- target: string;
- kind: string;
- metadata: string | null;
- line: number | null;
- col: number | null;
- provenance: string | null;
- }
- interface FileRow {
- path: string;
- content_hash: string;
- language: string;
- size: number;
- modified_at: number;
- indexed_at: number;
- node_count: number;
- errors: string | null;
- }
- interface UnresolvedRefRow {
- id: number;
- from_node_id: string;
- reference_name: string;
- reference_kind: string;
- line: number;
- col: number;
- candidates: string | null;
- file_path: string;
- language: string;
- }
- /**
- * Convert database row to Node object
- */
- function rowToNode(row: NodeRow): Node {
- return {
- id: row.id,
- kind: row.kind as NodeKind,
- name: row.name,
- qualifiedName: row.qualified_name,
- filePath: row.file_path,
- language: row.language as Language,
- startLine: row.start_line,
- endLine: row.end_line,
- startColumn: row.start_column,
- endColumn: row.end_column,
- docstring: row.docstring ?? undefined,
- signature: row.signature ?? undefined,
- visibility: row.visibility as Node['visibility'],
- isExported: row.is_exported === 1,
- isAsync: row.is_async === 1,
- isStatic: row.is_static === 1,
- isAbstract: row.is_abstract === 1,
- decorators: row.decorators ? safeJsonParse(row.decorators, undefined) : undefined,
- typeParameters: row.type_parameters ? safeJsonParse(row.type_parameters, undefined) : undefined,
- updatedAt: row.updated_at,
- };
- }
- /**
- * Convert database row to Edge object
- */
- function rowToEdge(row: EdgeRow): Edge {
- return {
- source: row.source,
- target: row.target,
- kind: row.kind as EdgeKind,
- metadata: row.metadata ? safeJsonParse(row.metadata, undefined) : undefined,
- line: row.line ?? undefined,
- column: row.col ?? undefined,
- provenance: row.provenance as Edge['provenance'],
- };
- }
- /**
- * Convert database row to FileRecord object
- */
- function rowToFileRecord(row: FileRow): FileRecord {
- return {
- path: row.path,
- contentHash: row.content_hash,
- language: row.language as Language,
- size: row.size,
- modifiedAt: row.modified_at,
- indexedAt: row.indexed_at,
- nodeCount: row.node_count,
- errors: row.errors ? safeJsonParse(row.errors, undefined) : undefined,
- };
- }
- /**
- * Query builder for the knowledge graph database
- */
- export class QueryBuilder {
- private db: SqliteDatabase;
- // Node cache for frequently accessed nodes (LRU-style, max 1000 entries)
- private nodeCache: Map<string, Node> = new Map();
- private readonly maxCacheSize = 1000;
- // Prepared statements (lazily initialized)
- private stmts: {
- insertNode?: SqliteStatement;
- updateNode?: SqliteStatement;
- deleteNode?: SqliteStatement;
- deleteNodesByFile?: SqliteStatement;
- getNodeById?: SqliteStatement;
- getNodesByFile?: SqliteStatement;
- getNodesByKind?: SqliteStatement;
- insertEdge?: SqliteStatement;
- upsertFile?: SqliteStatement;
- deleteEdgesBySource?: SqliteStatement;
- deleteEdgesByTarget?: SqliteStatement;
- getEdgesBySource?: SqliteStatement;
- getEdgesByTarget?: SqliteStatement;
- insertFile?: SqliteStatement;
- updateFile?: SqliteStatement;
- deleteFile?: SqliteStatement;
- getFileByPath?: SqliteStatement;
- getAllFiles?: SqliteStatement;
- insertUnresolved?: SqliteStatement;
- deleteUnresolvedByNode?: SqliteStatement;
- getUnresolvedByName?: SqliteStatement;
- getNodesByName?: SqliteStatement;
- getNodesByQualifiedNameExact?: SqliteStatement;
- getNodesByLowerName?: SqliteStatement;
- getUnresolvedCount?: SqliteStatement;
- getUnresolvedBatch?: SqliteStatement;
- getAllFilePaths?: SqliteStatement;
- getAllNodeNames?: SqliteStatement;
- getDominantFile?: SqliteStatement;
- getTopRouteFile?: SqliteStatement;
- getRoutingManifest?: SqliteStatement;
- } = {};
- constructor(db: SqliteDatabase) {
- this.db = db;
- }
- // ===========================================================================
- // Node Operations
- // ===========================================================================
- /**
- * Insert a new node
- */
- insertNode(node: Node): void {
- if (!this.stmts.insertNode) {
- this.stmts.insertNode = this.db.prepare(`
- INSERT OR REPLACE INTO nodes (
- id, kind, name, qualified_name, file_path, language,
- start_line, end_line, start_column, end_column,
- docstring, signature, visibility,
- is_exported, is_async, is_static, is_abstract,
- decorators, type_parameters, updated_at
- ) VALUES (
- @id, @kind, @name, @qualifiedName, @filePath, @language,
- @startLine, @endLine, @startColumn, @endColumn,
- @docstring, @signature, @visibility,
- @isExported, @isAsync, @isStatic, @isAbstract,
- @decorators, @typeParameters, @updatedAt
- )
- `);
- }
- // Validate required fields to prevent SQLite bind errors
- if (!node.id || !node.kind || !node.name || !node.filePath || !node.language) {
- console.error('[CodeGraph] Skipping node with missing required fields:', {
- id: node.id,
- kind: node.kind,
- name: node.name,
- filePath: node.filePath,
- language: node.language,
- });
- return;
- }
- // INSERT OR REPLACE may overwrite a node we have cached. Drop the
- // stale entry so the next getNodeById sees the new row, not the old
- // one (matches the cache-invalidation pattern used by updateNode and
- // deleteNode below).
- this.nodeCache.delete(node.id);
- this.stmts.insertNode.run({
- id: node.id,
- kind: node.kind,
- name: node.name,
- qualifiedName: node.qualifiedName ?? node.name,
- filePath: node.filePath,
- language: node.language,
- startLine: node.startLine ?? 0,
- endLine: node.endLine ?? 0,
- startColumn: node.startColumn ?? 0,
- endColumn: node.endColumn ?? 0,
- docstring: node.docstring ?? null,
- signature: node.signature ?? null,
- visibility: node.visibility ?? null,
- isExported: node.isExported ? 1 : 0,
- isAsync: node.isAsync ? 1 : 0,
- isStatic: node.isStatic ? 1 : 0,
- isAbstract: node.isAbstract ? 1 : 0,
- decorators: node.decorators ? JSON.stringify(node.decorators) : null,
- typeParameters: node.typeParameters ? JSON.stringify(node.typeParameters) : null,
- updatedAt: node.updatedAt ?? Date.now(),
- });
- }
- /**
- * Insert multiple nodes in a transaction
- */
- insertNodes(nodes: Node[]): void {
- this.db.transaction(() => {
- for (const node of nodes) {
- this.insertNode(node);
- }
- })();
- }
- /**
- * Update an existing node
- */
- updateNode(node: Node): void {
- if (!this.stmts.updateNode) {
- this.stmts.updateNode = this.db.prepare(`
- UPDATE nodes SET
- kind = @kind,
- name = @name,
- qualified_name = @qualifiedName,
- file_path = @filePath,
- language = @language,
- start_line = @startLine,
- end_line = @endLine,
- start_column = @startColumn,
- end_column = @endColumn,
- docstring = @docstring,
- signature = @signature,
- visibility = @visibility,
- is_exported = @isExported,
- is_async = @isAsync,
- is_static = @isStatic,
- is_abstract = @isAbstract,
- decorators = @decorators,
- type_parameters = @typeParameters,
- updated_at = @updatedAt
- WHERE id = @id
- `);
- }
- // Invalidate cache before update
- this.nodeCache.delete(node.id);
- // Validate required fields
- if (!node.id || !node.kind || !node.name || !node.filePath || !node.language) {
- console.error('[CodeGraph] Skipping node update with missing required fields:', node.id);
- return;
- }
- this.stmts.updateNode.run({
- id: node.id,
- kind: node.kind,
- name: node.name,
- qualifiedName: node.qualifiedName ?? node.name,
- filePath: node.filePath,
- language: node.language,
- startLine: node.startLine ?? 0,
- endLine: node.endLine ?? 0,
- startColumn: node.startColumn ?? 0,
- endColumn: node.endColumn ?? 0,
- docstring: node.docstring ?? null,
- signature: node.signature ?? null,
- visibility: node.visibility ?? null,
- isExported: node.isExported ? 1 : 0,
- isAsync: node.isAsync ? 1 : 0,
- isStatic: node.isStatic ? 1 : 0,
- isAbstract: node.isAbstract ? 1 : 0,
- decorators: node.decorators ? JSON.stringify(node.decorators) : null,
- typeParameters: node.typeParameters ? JSON.stringify(node.typeParameters) : null,
- updatedAt: node.updatedAt ?? Date.now(),
- });
- }
- /**
- * Delete a node by ID
- */
- deleteNode(id: string): void {
- if (!this.stmts.deleteNode) {
- this.stmts.deleteNode = this.db.prepare('DELETE FROM nodes WHERE id = ?');
- }
- // Invalidate cache
- this.nodeCache.delete(id);
- this.stmts.deleteNode.run(id);
- }
- /**
- * Delete all nodes for a file
- */
- deleteNodesByFile(filePath: string): void {
- if (!this.stmts.deleteNodesByFile) {
- this.stmts.deleteNodesByFile = this.db.prepare('DELETE FROM nodes WHERE file_path = ?');
- }
- // Invalidate cache for nodes in this file
- for (const [id, node] of this.nodeCache) {
- if (node.filePath === filePath) {
- this.nodeCache.delete(id);
- }
- }
- this.stmts.deleteNodesByFile.run(filePath);
- }
- /**
- * Get a node by ID
- */
- getNodeById(id: string): Node | null {
- // Check cache first
- if (this.nodeCache.has(id)) {
- const cached = this.nodeCache.get(id)!;
- // Move to end to implement LRU (delete and re-add)
- this.nodeCache.delete(id);
- this.nodeCache.set(id, cached);
- return cached;
- }
- if (!this.stmts.getNodeById) {
- this.stmts.getNodeById = this.db.prepare('SELECT * FROM nodes WHERE id = ?');
- }
- const row = this.stmts.getNodeById.get(id) as NodeRow | undefined;
- if (!row) {
- return null;
- }
- const node = rowToNode(row);
- this.cacheNode(node);
- return node;
- }
- /**
- * Batch lookup: fetch many nodes by ID in a single SQL round-trip.
- *
- * Replaces the N+1 pattern in graph traversal where every edge would
- * trigger its own `getNodeById` call. For a function with 50 callers
- * this collapses 50 point reads into one IN-list query (~10-50x
- * faster end-to-end).
- *
- * Returns a Map keyed by id so callers can preserve their own ordering
- * (typically the order edges were returned from the graph). Missing IDs
- * are simply absent from the map.
- *
- * Cache-aware: ids already in the LRU cache are served from memory and
- * the SQL query only touches the misses.
- */
- getNodesByIds(ids: readonly string[]): Map<string, Node> {
- const out = new Map<string, Node>();
- if (ids.length === 0) return out;
- // Serve cache hits first; build the miss list for SQL.
- const misses: string[] = [];
- for (const id of ids) {
- const cached = this.nodeCache.get(id);
- if (cached !== undefined) {
- // LRU touch
- this.nodeCache.delete(id);
- this.nodeCache.set(id, cached);
- out.set(id, cached);
- } else {
- misses.push(id);
- }
- }
- if (misses.length === 0) return out;
- // Chunk under SQLite's parameter limit (default 999, raised to 32766
- // in better-sqlite3 builds — chunk at 500 for safety across both
- // backends and to keep the query plan simple).
- for (let i = 0; i < misses.length; i += SQLITE_PARAM_CHUNK_SIZE) {
- const chunk = misses.slice(i, i + SQLITE_PARAM_CHUNK_SIZE);
- const placeholders = chunk.map(() => '?').join(',');
- const rows = this.db
- .prepare(`SELECT * FROM nodes WHERE id IN (${placeholders})`)
- .all(...chunk) as NodeRow[];
- for (const row of rows) {
- const node = rowToNode(row);
- out.set(node.id, node);
- this.cacheNode(node);
- }
- }
- return out;
- }
- private getExistingNodeIds(ids: readonly string[]): Set<string> {
- const out = new Set<string>();
- if (ids.length === 0) return out;
- const uniqueIds = [...new Set(ids)];
- for (let i = 0; i < uniqueIds.length; i += SQLITE_PARAM_CHUNK_SIZE) {
- const chunk = uniqueIds.slice(i, i + SQLITE_PARAM_CHUNK_SIZE);
- const placeholders = chunk.map(() => '?').join(',');
- const rows = this.db
- .prepare(`SELECT id FROM nodes WHERE id IN (${placeholders})`)
- .all(...chunk) as { id: string }[];
- for (const row of rows) {
- out.add(row.id);
- }
- }
- return out;
- }
- /**
- * Add a node to the cache, evicting oldest if needed
- */
- private cacheNode(node: Node): void {
- if (this.nodeCache.size >= this.maxCacheSize) {
- // Evict oldest (first) entry
- const firstKey = this.nodeCache.keys().next().value;
- if (firstKey) {
- this.nodeCache.delete(firstKey);
- }
- }
- this.nodeCache.set(node.id, node);
- }
- /**
- * Clear the node cache
- */
- clearCache(): void {
- this.nodeCache.clear();
- }
- /**
- * Get all nodes in a file
- */
- getNodesByFile(filePath: string): Node[] {
- if (!this.stmts.getNodesByFile) {
- this.stmts.getNodesByFile = this.db.prepare(
- 'SELECT * FROM nodes WHERE file_path = ? ORDER BY start_line'
- );
- }
- const rows = this.stmts.getNodesByFile.all(filePath) as NodeRow[];
- return rows.map(rowToNode);
- }
- /**
- * Find the file that holds the densest concentration of the project's
- * internal call graph — the "core" file. Used by context-builder to
- * boost ranking of symbols in that file's directory (so e.g. sinatra
- * queries surface `lib/sinatra/base.rb`'s `route!` instead of
- * `sinatra-contrib/lib/sinatra/multi_route.rb`'s `route` extension).
- *
- * Returns null if no file has a meaningful concentration (e.g. spread
- * evenly across many files, or empty index).
- *
- * "Internal" = source and target are in the same file. Cross-file
- * edges aren't useful here — they don't tell us which file is the
- * functional center.
- *
- * Excludes test/spec files from candidacy via path-pattern. The agent's
- * typical question is "how does X work", not "how is X tested", so
- * boosting a test file's directory would be a misfire.
- */
- getDominantFile(): { filePath: string; edgeCount: number; nextEdgeCount: number } | null {
- if (!this.stmts.getDominantFile) {
- // Pull top 20 candidates; we then filter out test/generated files
- // in code (regex-grade matching that SQL LIKE can't express). The
- // generated-file filter is critical — without it, etcd's
- // `api/etcdserverpb/rpc.pb.go` (1916 in-file edges, generated
- // protobuf stub) outranks the real `server/etcdserver/server.go`
- // (470 edges) by 4×, and the boost would push the agent toward
- // generated code.
- this.stmts.getDominantFile = this.db.prepare(`
- SELECT n.file_path AS file_path, COUNT(*) AS edge_count
- FROM edges e
- JOIN nodes n ON e.source = n.id
- JOIN nodes m ON e.target = m.id
- WHERE n.file_path = m.file_path
- GROUP BY n.file_path
- ORDER BY edge_count DESC
- LIMIT 20
- `);
- }
- const rows = this.stmts.getDominantFile.all() as Array<{ file_path: string; edge_count: number }>;
- const filtered = rows.filter(r => !isLowValueFile(r.file_path));
- if (filtered.length === 0 || filtered[0]!.edge_count < 20) return null;
- return {
- filePath: filtered[0]!.file_path,
- edgeCount: filtered[0]!.edge_count,
- nextEdgeCount: filtered[1]?.edge_count ?? 0,
- };
- }
- /**
- * Find the file that holds the densest concentration of the project's
- * `route` nodes (framework-emitted: Express/Gin/Flask/Rails/Drupal/etc.).
- * Used by handleContext on small repos to inline the project's routing
- * config when the agent's query is about request flow — eliminating the
- * "Glob + Read routes.rb" pattern that beats codegraph on tiny realworld
- * template repos.
- *
- * Excludes test/generated files from candidacy. Returns null if there
- * are fewer than 3 non-test routes total, or if no file holds at least
- * 30% of them (diffuse routing → no single answer file).
- */
- getTopRouteFile(): { filePath: string; routeCount: number; totalRoutes: number } | null {
- if (!this.stmts.getTopRouteFile) {
- this.stmts.getTopRouteFile = this.db.prepare(`
- SELECT file_path, COUNT(*) AS cnt
- FROM nodes
- WHERE kind = 'route'
- GROUP BY file_path
- ORDER BY cnt DESC
- LIMIT 20
- `);
- }
- const rows = this.stmts.getTopRouteFile.all() as Array<{ file_path: string; cnt: number }>;
- const filtered = rows.filter(r => !isLowValueFile(r.file_path));
- if (filtered.length === 0) return null;
- const totalRoutes = filtered.reduce((sum, r) => sum + r.cnt, 0);
- const top = filtered[0]!;
- if (totalRoutes < 3 || top.cnt < 3) return null;
- if (top.cnt / totalRoutes < 0.30) return null;
- return { filePath: top.file_path, routeCount: top.cnt, totalRoutes };
- }
- /**
- * Build a URL → handler manifest from the index. Each route node's
- * `references` edge points at the function/method that handles the
- * request. We join them in one pass; the agent gets the canonical
- * routing answer ("POST /users/login → AuthController#login") without
- * having to parse the framework's route DSL itself.
- *
- * Also returns the file with the most handler endpoints — used as the
- * "top handler file" to inline source for, so the agent has both the
- * mapping AND the handler implementations.
- */
- getRoutingManifest(limit: number = 40): {
- entries: Array<{ url: string; handler: string; handlerFile: string; handlerLine: number; handlerKind: string }>;
- topHandlerFile: string | null;
- topHandlerFileCount: number;
- totalRoutes: number;
- } | null {
- if (!this.stmts.getRoutingManifest) {
- // Edge kind varies across framework resolvers: Spring/Rails/
- // Laravel/Drupal emit `references`, Express emits `calls`. Accept
- // both — the semantic is the same (route → its handler).
- this.stmts.getRoutingManifest = this.db.prepare(`
- SELECT
- r.name AS url,
- h.name AS handler,
- h.file_path AS handler_file,
- h.start_line AS handler_line,
- h.kind AS handler_kind
- FROM nodes r
- JOIN edges e ON e.source = r.id
- JOIN nodes h ON e.target = h.id
- WHERE r.kind = 'route'
- AND e.kind IN ('references', 'calls')
- AND h.kind IN ('function', 'method', 'class')
- ORDER BY r.file_path, r.start_line
- LIMIT ?
- `);
- }
- const rows = this.stmts.getRoutingManifest.all(limit) as Array<{
- url: string; handler: string; handler_file: string; handler_line: number; handler_kind: string;
- }>;
- // Drop test/generated handlers — same hygiene as elsewhere.
- const filtered = rows.filter(r => !isLowValueFile(r.handler_file));
- if (filtered.length < 3) return null;
- // Identify the file holding the most handlers (the "primary handler file").
- const fileCounts = new Map<string, number>();
- for (const r of filtered) {
- fileCounts.set(r.handler_file, (fileCounts.get(r.handler_file) ?? 0) + 1);
- }
- let topHandlerFile: string | null = null;
- let topHandlerFileCount = 0;
- for (const [file, count] of fileCounts) {
- if (count > topHandlerFileCount) {
- topHandlerFile = file;
- topHandlerFileCount = count;
- }
- }
- return {
- entries: filtered.map(r => ({
- url: r.url,
- handler: r.handler,
- handlerFile: r.handler_file,
- handlerLine: r.handler_line,
- handlerKind: r.handler_kind,
- })),
- topHandlerFile,
- topHandlerFileCount,
- totalRoutes: filtered.length,
- };
- }
- /**
- * Get all nodes of a specific kind
- */
- getNodesByKind(kind: NodeKind): Node[] {
- if (!this.stmts.getNodesByKind) {
- this.stmts.getNodesByKind = this.db.prepare('SELECT * FROM nodes WHERE kind = ?');
- }
- const rows = this.stmts.getNodesByKind.all(kind) as NodeRow[];
- return rows.map(rowToNode);
- }
- /**
- * Stream every node of a kind one at a time (lazy) instead of materializing
- * them all like {@link getNodesByKind}. For unbounded kinds (`function`,
- * `method`) on a symbol-dense project the full array is gigabytes; the
- * dynamic-edge synthesizers only scan-and-filter, so they iterate to keep
- * memory O(1) in the node count rather than O(nodes) (#610).
- */
- *iterateNodesByKind(kind: NodeKind): IterableIterator<Node> {
- // Fresh statement per call (not a cached one): an iterator holds an open
- // cursor, so a shared statement would conflict across overlapping scans.
- const stmt = this.db.prepare('SELECT * FROM nodes WHERE kind = ?');
- for (const row of stmt.iterate(kind)) {
- yield rowToNode(row as NodeRow);
- }
- }
- /**
- * Get all nodes in the database
- */
- getAllNodes(): Node[] {
- const rows = this.db.prepare('SELECT * FROM nodes').all() as NodeRow[];
- return rows.map(rowToNode);
- }
- /**
- * Get nodes by exact name match (uses idx_nodes_name index)
- */
- getNodesByName(name: string): Node[] {
- if (!this.stmts.getNodesByName) {
- this.stmts.getNodesByName = this.db.prepare('SELECT * FROM nodes WHERE name = ?');
- }
- const rows = this.stmts.getNodesByName.all(name) as NodeRow[];
- return rows.map(rowToNode);
- }
- /**
- * Get nodes by exact qualified name match (uses idx_nodes_qualified_name index)
- */
- getNodesByQualifiedNameExact(qualifiedName: string): Node[] {
- if (!this.stmts.getNodesByQualifiedNameExact) {
- this.stmts.getNodesByQualifiedNameExact = this.db.prepare(
- 'SELECT * FROM nodes WHERE qualified_name = ?'
- );
- }
- const rows = this.stmts.getNodesByQualifiedNameExact.all(qualifiedName) as NodeRow[];
- return rows.map(rowToNode);
- }
- /**
- * Get nodes by lowercase name match (uses idx_nodes_lower_name expression index)
- */
- getNodesByLowerName(lowerName: string): Node[] {
- if (!this.stmts.getNodesByLowerName) {
- this.stmts.getNodesByLowerName = this.db.prepare(
- 'SELECT * FROM nodes WHERE lower(name) = ?'
- );
- }
- const rows = this.stmts.getNodesByLowerName.all(lowerName) as NodeRow[];
- return rows.map(rowToNode);
- }
- /**
- * Search nodes by name using FTS with fallback to LIKE for better matching
- *
- * Search strategy:
- * 1. Try FTS5 prefix match (query*) for word-start matching
- * 2. If no results, try LIKE for substring matching (e.g., "signIn" finds "signInWithGoogle")
- * 3. Score results based on match quality
- */
- searchNodes(query: string, options: SearchOptions = {}): SearchResult[] {
- const { limit = 100, offset = 0 } = options;
- // Parse field-qualified bits out of the raw query (kind:, lang:,
- // path:, name:). Anything not recognised stays in `text` and goes
- // to FTS unchanged. Filters compose with the SearchOptions arg —
- // both are applied (intersection-style).
- const parsed = parseQuery(query);
- const mergedKinds =
- parsed.kinds.length > 0
- ? Array.from(new Set([...(options.kinds ?? []), ...parsed.kinds]))
- : options.kinds;
- const mergedLanguages =
- parsed.languages.length > 0
- ? Array.from(new Set([...(options.languages ?? []), ...parsed.languages]))
- : options.languages;
- const pathFilters = parsed.pathFilters;
- const nameFilters = parsed.nameFilters;
- // The text portion drives FTS/LIKE; if all the user typed was
- // filters (`kind:function`), we still need *some* candidate set,
- // so synthesise an empty-text path that returns everything matching
- // the filters.
- const text = parsed.text;
- const kinds = mergedKinds;
- const languages = mergedLanguages;
- // First try FTS5 with prefix matching
- let results = text
- ? this.searchNodesFTS(text, { kinds, languages, limit, offset })
- // Over-fetch by 5× when running filter-only (no text). The
- // post-scoring path: + name: filters can be very selective, so
- // a smaller multiplier risks returning fewer than `limit`
- // results despite the DB having plenty of matches.
- : this.searchAllByFilters({ kinds, languages, limit: limit * 5 });
- // If no FTS results, try LIKE-based substring search
- if (results.length === 0 && text.length >= 2) {
- results = this.searchNodesLike(text, { kinds, languages, limit, offset });
- }
- // Final fuzzy fallback: scan all known names and keep those within
- // a tight Levenshtein distance. Only fires when both FTS and LIKE
- // returned nothing AND there's a text portion long enough to be
- // worth fuzzing (1-char queries would match too much).
- if (results.length === 0 && text.length >= 3) {
- results = this.searchNodesFuzzy(text, { kinds, languages, limit });
- }
- // Supplement: ensure exact name matches are always candidates.
- // BM25 can bury short exact-match names (e.g. "getBean") under hundreds of
- // compound names (e.g. "getBeanDescriptor") in large codebases,
- // pushing them past the FTS fetch limit before post-hoc scoring can help.
- // Use the max BM25 score as the base so the nameMatchBonus (exact=30 vs
- // prefix=20) actually differentiates them after rescoring.
- if (results.length > 0 && query) {
- const existingIds = new Set(results.map(r => r.node.id));
- const maxFtsScore = Math.max(...results.map(r => r.score));
- const terms = query.split(/\s+/).filter(t => t.length >= 2);
- for (const term of terms) {
- let sql = 'SELECT * FROM nodes WHERE name = ? COLLATE NOCASE';
- const params: (string | number)[] = [term];
- if (kinds && kinds.length > 0) {
- sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
- params.push(...kinds);
- }
- if (languages && languages.length > 0) {
- sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
- params.push(...languages);
- }
- sql += ' LIMIT 20';
- const rows = this.db.prepare(sql).all(...params) as NodeRow[];
- for (const row of rows) {
- if (!existingIds.has(row.id)) {
- results.push({ node: rowToNode(row), score: maxFtsScore });
- existingIds.add(row.id);
- }
- }
- }
- }
- // Apply multi-signal scoring
- if (results.length > 0 && (text || query)) {
- const scoringQuery = text || query;
- results = results.map(r => ({
- ...r,
- score: r.score
- + kindBonus(r.node.kind)
- + scorePathRelevance(r.node.filePath, scoringQuery)
- + nameMatchBonus(r.node.name, scoringQuery),
- }));
- results.sort((a, b) => b.score - a.score);
- // Trim to requested limit after rescoring
- if (results.length > limit) {
- results = results.slice(0, limit);
- }
- }
- // Apply path: + name: filters AFTER scoring. Scoring already uses
- // path/name as a soft signal; the explicit filters here are a hard
- // gate. Done last so the FTS limit fetched plenty of candidates to
- // narrow from.
- if (pathFilters.length > 0) {
- const lowered = pathFilters.map((p) => p.toLowerCase());
- results = results.filter((r) => {
- const fp = r.node.filePath.toLowerCase();
- return lowered.some((p) => fp.includes(p));
- });
- }
- if (nameFilters.length > 0) {
- const lowered = nameFilters.map((n) => n.toLowerCase());
- results = results.filter((r) => {
- const nm = r.node.name.toLowerCase();
- return lowered.some((n) => nm.includes(n));
- });
- }
- return results;
- }
- /**
- * Match-everything path used when the user supplied only field
- * filters (`kind:function lang:typescript`) with no text. Returns
- * candidates ordered by name; the caller's filter pass narrows to
- * what was asked for.
- */
- private searchAllByFilters(options: {
- kinds?: NodeKind[];
- languages?: Language[];
- limit: number;
- }): SearchResult[] {
- const { kinds, languages, limit } = options;
- let sql = 'SELECT * FROM nodes WHERE 1=1';
- const params: (string | number)[] = [];
- if (kinds && kinds.length > 0) {
- sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
- params.push(...kinds);
- }
- if (languages && languages.length > 0) {
- sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
- params.push(...languages);
- }
- sql += ' ORDER BY name LIMIT ?';
- params.push(limit);
- const rows = this.db.prepare(sql).all(...params) as NodeRow[];
- return rows.map((row) => ({ node: rowToNode(row), score: 1 }));
- }
- /**
- * Fuzzy fallback: when zero FTS/LIKE hits, try an edit-distance
- * sweep over the distinct symbol-name set. Caps `maxDist` at 2 so
- * `getUssr` finds `getUser` but `process` doesn't match `prosody`.
- * Bounded edit distance keeps each comparison cheap; the per-query
- * scan is O(distinct-name-count) which is far smaller than total
- * node count on any real codebase.
- */
- private searchNodesFuzzy(
- text: string,
- options: { kinds?: NodeKind[]; languages?: Language[]; limit: number }
- ): SearchResult[] {
- const { kinds, languages, limit } = options;
- const lowered = text.toLowerCase();
- const maxDist = lowered.length <= 4 ? 1 : 2;
- // Pull the distinct name list once. The set is cached on QueryBuilder
- // by getAllNodeNames(); even on a 200k-node project the distinct
- // name set is typically O(10k) because most names repeat. The
- // candidate-cap below bounds memory regardless.
- const allNames = this.getAllNodeNames();
- const candidates: Array<{ name: string; dist: number }> = [];
- for (const name of allNames) {
- const dist = boundedEditDistance(name.toLowerCase(), lowered, maxDist);
- if (dist <= maxDist) candidates.push({ name, dist });
- }
- candidates.sort((a, b) => a.dist - b.dist);
- // Cap the per-name follow-up queries. Each survivor triggers a
- // separate `SELECT * FROM nodes WHERE name = ?`; without this cap
- // a project with many similar names (`getUser1`, `getUser2`...)
- // could fan out far beyond `limit` queries before the inner-loop
- // limit kicks in.
- const FUZZY_FOLLOWUP_CAP = Math.max(limit * 2, 50);
- const cappedCandidates = candidates.slice(0, FUZZY_FOLLOWUP_CAP);
- const results: SearchResult[] = [];
- const seen = new Set<string>();
- for (const c of cappedCandidates) {
- if (results.length >= limit) break;
- let sql = 'SELECT * FROM nodes WHERE name = ?';
- const params: (string | number)[] = [c.name];
- if (kinds && kinds.length > 0) {
- sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
- params.push(...kinds);
- }
- if (languages && languages.length > 0) {
- sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
- params.push(...languages);
- }
- sql += ' LIMIT 5';
- const rows = this.db.prepare(sql).all(...params) as NodeRow[];
- for (const row of rows) {
- if (seen.has(row.id)) continue;
- seen.add(row.id);
- // Lower the score for each edit step away from the query so
- // exact-match fallbacks (dist 0) outrank dist-2 typos.
- results.push({ node: rowToNode(row), score: 1 / (1 + c.dist) });
- if (results.length >= limit) break;
- }
- }
- return results;
- }
- /**
- * FTS5 search with prefix matching
- */
- private searchNodesFTS(query: string, options: SearchOptions): SearchResult[] {
- const { kinds, languages, limit = 100, offset = 0 } = options;
- // Add prefix wildcard for better matching (e.g., "auth" matches "AuthService", "authenticate")
- // Escape special FTS5 characters and add prefix wildcard.
- //
- // `::` is a qualifier separator in Rust/C++/Ruby, not a token char,
- // so treat it as whitespace before the strip step. Otherwise queries
- // like `stage_apply::run` collapse to `stage_applyrun` (the colons
- // are stripped without splitting) and find nothing. See #173.
- const ftsQuery = query
- .replace(/::/g, ' ') // Rust/C++/Ruby qualifier separator
- .replace(/['"*():^]/g, '') // Remove FTS5 special chars
- .split(/\s+/)
- .filter(term => term.length > 0)
- // Strip FTS5 boolean operators to prevent query manipulation
- .filter(term => !/^(AND|OR|NOT|NEAR)$/i.test(term))
- .map(term => `"${term}"*`) // Prefix match each term
- .join(' OR ');
- if (!ftsQuery) {
- return [];
- }
- // BM25 column weights: id=0, name=20, qualified_name=5, docstring=1, signature=2
- // Heavy name weight ensures exact/prefix name matches rank above incidental
- // mentions in long docstrings or qualified names of nested symbols.
- // Fetch 5x requested limit so post-hoc rescoring (kindBonus, pathRelevance,
- // nameMatchBonus) can promote results that BM25 alone undervalues.
- const ftsLimit = Math.max(limit * 5, 100);
- let sql = `
- SELECT nodes.*, bm25(nodes_fts, 0, 20, 5, 1, 2) as score
- FROM nodes_fts
- JOIN nodes ON nodes_fts.id = nodes.id
- WHERE nodes_fts MATCH ?
- `;
- const params: (string | number)[] = [ftsQuery];
- if (kinds && kinds.length > 0) {
- sql += ` AND nodes.kind IN (${kinds.map(() => '?').join(',')})`;
- params.push(...kinds);
- }
- if (languages && languages.length > 0) {
- sql += ` AND nodes.language IN (${languages.map(() => '?').join(',')})`;
- params.push(...languages);
- }
- sql += ' ORDER BY score LIMIT ? OFFSET ?';
- params.push(ftsLimit, offset);
- try {
- const rows = this.db.prepare(sql).all(...params) as (NodeRow & { score: number })[];
- return rows.map((row) => ({
- node: rowToNode(row),
- score: Math.abs(row.score), // bm25 returns negative scores
- }));
- } catch {
- // FTS query failed, return empty
- return [];
- }
- }
- /**
- * LIKE-based substring search for cases where FTS doesn't match
- * Useful for camelCase matching (e.g., "signIn" finds "signInWithGoogle")
- */
- private searchNodesLike(query: string, options: SearchOptions): SearchResult[] {
- const { kinds, languages, limit = 100, offset = 0 } = options;
- let sql = `
- SELECT nodes.*,
- CASE
- WHEN name = ? THEN 1.0
- WHEN name LIKE ? THEN 0.9
- WHEN name LIKE ? THEN 0.8
- WHEN qualified_name LIKE ? THEN 0.7
- ELSE 0.5
- END as score
- FROM nodes
- WHERE (
- name LIKE ? OR
- qualified_name LIKE ? OR
- name LIKE ?
- )
- `;
- // Pattern variants for better matching
- const exactMatch = query;
- const startsWith = `${query}%`;
- const contains = `%${query}%`;
- const params: (string | number)[] = [
- exactMatch, // Exact match score
- startsWith, // Starts with score
- contains, // Contains score
- contains, // Qualified name score
- contains, // WHERE: name contains
- contains, // WHERE: qualified_name contains
- startsWith, // WHERE: name starts with
- ];
- if (kinds && kinds.length > 0) {
- sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
- params.push(...kinds);
- }
- if (languages && languages.length > 0) {
- sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
- params.push(...languages);
- }
- sql += ' ORDER BY score DESC, length(name) ASC LIMIT ? OFFSET ?';
- params.push(limit, offset);
- const rows = this.db.prepare(sql).all(...params) as (NodeRow & { score: number })[];
- return rows.map((row) => ({
- node: rowToNode(row),
- score: row.score,
- }));
- }
- /**
- * Find nodes by exact name match
- *
- * Used for hybrid search - looks up symbols by exact name or case-insensitive match.
- * Returns high-confidence matches for known symbol names extracted from query.
- *
- * @param names - Array of symbol names to look up
- * @param options - Search options (kinds, languages, limit)
- * @returns SearchResult array with exact matches scored at 1.0
- */
- findNodesByExactName(names: string[], options: SearchOptions = {}): SearchResult[] {
- if (names.length === 0) return [];
- const { kinds, languages, limit = 50 } = options;
- // Two-pass approach to handle common names (e.g., "run" has 40+ matches):
- // Pass 1: Find which files contain distinctive (rare) symbols from the query.
- // Pass 2: Query each name, boosting results that co-locate with distinctive symbols.
- // Pass 1: Find files containing each queried name, identify distinctive names
- const nameToFiles = new Map<string, Set<string>>();
- for (const name of names) {
- let sql = 'SELECT DISTINCT file_path FROM nodes WHERE name COLLATE NOCASE = ?';
- const params: (string | number)[] = [name];
- if (kinds && kinds.length > 0) {
- sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
- params.push(...kinds);
- }
- sql += ' LIMIT 100';
- const rows = this.db.prepare(sql).all(...params) as { file_path: string }[];
- nameToFiles.set(name.toLowerCase(), new Set(rows.map(r => r.file_path)));
- }
- // Distinctive names are those with fewer than 10 file matches (e.g., "scrapeLoop" = 1 file)
- const distinctiveFiles = new Set<string>();
- for (const [, files] of nameToFiles) {
- if (files.size > 0 && files.size < 10) {
- for (const f of files) distinctiveFiles.add(f);
- }
- }
- // Pass 2: Query each name with per-name limit, scoring by co-location
- const perNameLimit = Math.max(8, Math.ceil(limit / names.length));
- const allResults: SearchResult[] = [];
- const seenIds = new Set<string>();
- for (const name of names) {
- let sql = `
- SELECT nodes.*, 1.0 as score
- FROM nodes
- WHERE name COLLATE NOCASE = ?
- `;
- const params: (string | number)[] = [name];
- if (kinds && kinds.length > 0) {
- sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
- params.push(...kinds);
- }
- if (languages && languages.length > 0) {
- sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
- params.push(...languages);
- }
- // Fetch enough to find co-located results among common names
- sql += ' LIMIT ?';
- params.push(Math.max(perNameLimit * 3, 50));
- const rows = this.db.prepare(sql).all(...params) as (NodeRow & { score: number })[];
- const nameResults: SearchResult[] = [];
- for (const row of rows) {
- const node = rowToNode(row);
- if (seenIds.has(node.id)) continue;
- // Boost results in files that also contain distinctive symbols
- const coLocationBoost = distinctiveFiles.has(node.filePath) ? 20 : 0;
- nameResults.push({ node, score: row.score + coLocationBoost });
- }
- // Sort by score (co-located first), take per-name limit
- nameResults.sort((a, b) => b.score - a.score);
- for (const r of nameResults.slice(0, perNameLimit)) {
- seenIds.add(r.node.id);
- allResults.push(r);
- }
- }
- // Sort all results by score so co-located results bubble up
- allResults.sort((a, b) => b.score - a.score);
- return allResults.slice(0, limit);
- }
- /**
- * Find nodes whose name contains a substring (LIKE-based).
- * Useful for CamelCase-part matching where FTS fails because
- * e.g. "TransportSearchAction" is one FTS token, not matchable by "Search"*.
- *
- * Results are ordered by name length (shorter = more likely to be the core type).
- */
- findNodesByNameSubstring(
- substring: string,
- options: SearchOptions & { excludePrefix?: boolean } = {}
- ): SearchResult[] {
- const { kinds, languages, limit = 30, excludePrefix } = options;
- let sql = `
- SELECT nodes.*, 1.0 as score
- FROM nodes
- WHERE name LIKE ?
- `;
- const params: (string | number)[] = [`%${substring}%`];
- // Exclude prefix matches (handled by FTS-based prefix search in Step 2b)
- if (excludePrefix) {
- sql += ` AND name NOT LIKE ?`;
- params.push(`${substring}%`);
- }
- if (kinds && kinds.length > 0) {
- sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
- params.push(...kinds);
- }
- if (languages && languages.length > 0) {
- sql += ` AND language IN (${languages.map(() => '?').join(',')})`;
- params.push(...languages);
- }
- sql += ' ORDER BY length(name) ASC LIMIT ?';
- params.push(limit);
- const rows = this.db.prepare(sql).all(...params) as (NodeRow & { score: number })[];
- return rows.map((row) => ({
- node: rowToNode(row),
- score: row.score,
- }));
- }
- // ===========================================================================
- // Edge Operations
- // ===========================================================================
- /**
- * Insert a new edge
- */
- insertEdge(edge: Edge): void {
- if (!this.stmts.insertEdge) {
- this.stmts.insertEdge = this.db.prepare(`
- INSERT OR IGNORE INTO edges (source, target, kind, metadata, line, col, provenance)
- VALUES (@source, @target, @kind, @metadata, @line, @col, @provenance)
- `);
- }
- this.stmts.insertEdge.run({
- source: edge.source,
- target: edge.target,
- kind: edge.kind,
- metadata: edge.metadata ? JSON.stringify(edge.metadata) : null,
- line: edge.line ?? null,
- col: edge.column ?? null,
- provenance: edge.provenance ?? null,
- });
- }
- /**
- * Insert multiple edges in a transaction
- */
- insertEdges(edges: Edge[]): void {
- if (edges.length === 0) return;
- this.db.transaction(() => {
- const endpointIds = new Set<string>();
- for (const edge of edges) {
- endpointIds.add(edge.source);
- endpointIds.add(edge.target);
- }
- const existingNodeIds = this.getExistingNodeIds([...endpointIds]);
- for (const edge of edges) {
- if (!existingNodeIds.has(edge.source) || !existingNodeIds.has(edge.target)) {
- continue;
- }
- this.insertEdge(edge);
- }
- })();
- }
- /**
- * Delete all edges from a source node
- */
- deleteEdgesBySource(sourceId: string): void {
- if (!this.stmts.deleteEdgesBySource) {
- this.stmts.deleteEdgesBySource = this.db.prepare('DELETE FROM edges WHERE source = ?');
- }
- this.stmts.deleteEdgesBySource.run(sourceId);
- }
- /**
- * Get outgoing edges from a node
- */
- getOutgoingEdges(sourceId: string, kinds?: EdgeKind[], provenance?: string): Edge[] {
- if ((kinds && kinds.length > 0) || provenance) {
- let sql = 'SELECT * FROM edges WHERE source = ?';
- const params: (string | number)[] = [sourceId];
- if (kinds && kinds.length > 0) {
- sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
- params.push(...kinds);
- }
- if (provenance) {
- sql += ' AND provenance = ?';
- params.push(provenance);
- }
- const rows = this.db.prepare(sql).all(...params) as EdgeRow[];
- return rows.map(rowToEdge);
- }
- if (!this.stmts.getEdgesBySource) {
- this.stmts.getEdgesBySource = this.db.prepare('SELECT * FROM edges WHERE source = ?');
- }
- const rows = this.stmts.getEdgesBySource.all(sourceId) as EdgeRow[];
- return rows.map(rowToEdge);
- }
- /**
- * Get incoming edges to a node
- */
- getIncomingEdges(targetId: string, kinds?: EdgeKind[]): Edge[] {
- if (kinds && kinds.length > 0) {
- const sql = `SELECT * FROM edges WHERE target = ? AND kind IN (${kinds.map(() => '?').join(',')})`;
- const rows = this.db.prepare(sql).all(targetId, ...kinds) as EdgeRow[];
- return rows.map(rowToEdge);
- }
- if (!this.stmts.getEdgesByTarget) {
- this.stmts.getEdgesByTarget = this.db.prepare('SELECT * FROM edges WHERE target = ?');
- }
- const rows = this.stmts.getEdgesByTarget.all(targetId) as EdgeRow[];
- return rows.map(rowToEdge);
- }
- /**
- * Find all edges where both source and target are in the given node set.
- * Useful for recovering inter-node connectivity after BFS.
- */
- findEdgesBetweenNodes(nodeIds: string[], kinds?: EdgeKind[]): Edge[] {
- if (nodeIds.length === 0) return [];
- const idsJson = JSON.stringify(nodeIds);
- let sql = `SELECT * FROM edges WHERE source IN (SELECT value FROM json_each(?)) AND target IN (SELECT value FROM json_each(?))`;
- const params: string[] = [idsJson, idsJson];
- if (kinds && kinds.length > 0) {
- sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
- params.push(...kinds);
- }
- const rows = this.db.prepare(sql).all(...params) as EdgeRow[];
- return rows.map(rowToEdge);
- }
- // ===========================================================================
- // File Operations
- // ===========================================================================
- /**
- * Insert or update a file record
- */
- upsertFile(file: FileRecord): void {
- if (!this.stmts.upsertFile) {
- this.stmts.upsertFile = this.db.prepare(`
- INSERT INTO files (path, content_hash, language, size, modified_at, indexed_at, node_count, errors)
- VALUES (@path, @contentHash, @language, @size, @modifiedAt, @indexedAt, @nodeCount, @errors)
- ON CONFLICT(path) DO UPDATE SET
- content_hash = @contentHash,
- language = @language,
- size = @size,
- modified_at = @modifiedAt,
- indexed_at = @indexedAt,
- node_count = @nodeCount,
- errors = @errors
- `);
- }
- this.stmts.upsertFile.run({
- path: file.path,
- contentHash: file.contentHash,
- language: file.language,
- size: file.size,
- modifiedAt: file.modifiedAt,
- indexedAt: file.indexedAt,
- nodeCount: file.nodeCount,
- errors: file.errors ? JSON.stringify(file.errors) : null,
- });
- }
- /**
- * Delete a file record and its nodes
- */
- deleteFile(filePath: string): void {
- this.db.transaction(() => {
- this.deleteNodesByFile(filePath);
- if (!this.stmts.deleteFile) {
- this.stmts.deleteFile = this.db.prepare('DELETE FROM files WHERE path = ?');
- }
- this.stmts.deleteFile.run(filePath);
- })();
- }
- /**
- * Get a file record by path
- */
- getFileByPath(filePath: string): FileRecord | null {
- if (!this.stmts.getFileByPath) {
- this.stmts.getFileByPath = this.db.prepare('SELECT * FROM files WHERE path = ?');
- }
- const row = this.stmts.getFileByPath.get(filePath) as FileRow | undefined;
- return row ? rowToFileRecord(row) : null;
- }
- /**
- * Get all tracked files
- */
- getAllFiles(): FileRecord[] {
- if (!this.stmts.getAllFiles) {
- this.stmts.getAllFiles = this.db.prepare('SELECT * FROM files ORDER BY path');
- }
- const rows = this.stmts.getAllFiles.all() as FileRow[];
- return rows.map(rowToFileRecord);
- }
- /**
- * Get files that need re-indexing (hash changed)
- */
- getStaleFiles(currentHashes: Map<string, string>): FileRecord[] {
- const files = this.getAllFiles();
- return files.filter((f) => {
- const currentHash = currentHashes.get(f.path);
- return currentHash && currentHash !== f.contentHash;
- });
- }
- // ===========================================================================
- // Unresolved References
- // ===========================================================================
- /**
- * Insert an unresolved reference
- */
- insertUnresolvedRef(ref: UnresolvedReference): void {
- if (!this.stmts.insertUnresolved) {
- this.stmts.insertUnresolved = this.db.prepare(`
- INSERT INTO unresolved_refs (from_node_id, reference_name, reference_kind, line, col, candidates, file_path, language)
- VALUES (@fromNodeId, @referenceName, @referenceKind, @line, @col, @candidates, @filePath, @language)
- `);
- }
- this.stmts.insertUnresolved.run({
- fromNodeId: ref.fromNodeId,
- referenceName: ref.referenceName,
- referenceKind: ref.referenceKind,
- line: ref.line,
- col: ref.column,
- candidates: ref.candidates ? JSON.stringify(ref.candidates) : null,
- filePath: ref.filePath ?? '',
- language: ref.language ?? 'unknown',
- });
- }
- /**
- * Insert multiple unresolved references in a transaction
- */
- insertUnresolvedRefsBatch(refs: UnresolvedReference[]): void {
- if (refs.length === 0) return;
- const insert = this.db.transaction(() => {
- for (const ref of refs) {
- this.insertUnresolvedRef(ref);
- }
- });
- insert();
- }
- /**
- * Delete unresolved references from a node
- */
- deleteUnresolvedByNode(nodeId: string): void {
- if (!this.stmts.deleteUnresolvedByNode) {
- this.stmts.deleteUnresolvedByNode = this.db.prepare(
- 'DELETE FROM unresolved_refs WHERE from_node_id = ?'
- );
- }
- this.stmts.deleteUnresolvedByNode.run(nodeId);
- }
- /**
- * Get unresolved references by name (for resolution)
- */
- getUnresolvedByName(name: string): UnresolvedReference[] {
- if (!this.stmts.getUnresolvedByName) {
- this.stmts.getUnresolvedByName = this.db.prepare(
- 'SELECT * FROM unresolved_refs WHERE reference_name = ?'
- );
- }
- const rows = this.stmts.getUnresolvedByName.all(name) as UnresolvedRefRow[];
- return rows.map((row) => ({
- fromNodeId: row.from_node_id,
- referenceName: row.reference_name,
- referenceKind: row.reference_kind as EdgeKind,
- line: row.line,
- column: row.col,
- candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
- filePath: row.file_path,
- language: row.language as Language,
- }));
- }
- /**
- * Get all unresolved references
- */
- getUnresolvedReferences(): UnresolvedReference[] {
- const rows = this.db.prepare('SELECT * FROM unresolved_refs').all() as UnresolvedRefRow[];
- return rows.map((row) => ({
- fromNodeId: row.from_node_id,
- referenceName: row.reference_name,
- referenceKind: row.reference_kind as EdgeKind,
- line: row.line,
- column: row.col,
- candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
- filePath: row.file_path,
- language: row.language as Language,
- }));
- }
- /**
- * Get the count of unresolved references without loading them into memory
- */
- getUnresolvedReferencesCount(): number {
- if (!this.stmts.getUnresolvedCount) {
- this.stmts.getUnresolvedCount = this.db.prepare(
- 'SELECT COUNT(*) as count FROM unresolved_refs'
- );
- }
- const row = this.stmts.getUnresolvedCount.get() as { count: number };
- return row.count;
- }
- /**
- * Get a batch of unresolved references using LIMIT/OFFSET pagination.
- * Used to process references in bounded memory chunks.
- */
- getUnresolvedReferencesBatch(offset: number, limit: number): UnresolvedReference[] {
- if (!this.stmts.getUnresolvedBatch) {
- this.stmts.getUnresolvedBatch = this.db.prepare(
- 'SELECT * FROM unresolved_refs LIMIT ? OFFSET ?'
- );
- }
- const rows = this.stmts.getUnresolvedBatch.all(limit, offset) as UnresolvedRefRow[];
- return rows.map((row) => ({
- fromNodeId: row.from_node_id,
- referenceName: row.reference_name,
- referenceKind: row.reference_kind as EdgeKind,
- line: row.line,
- column: row.col,
- candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
- filePath: row.file_path,
- language: row.language as Language,
- }));
- }
- /**
- * Get all tracked file paths (lightweight — no full FileRecord objects)
- */
- getAllFilePaths(): string[] {
- if (!this.stmts.getAllFilePaths) {
- this.stmts.getAllFilePaths = this.db.prepare('SELECT path FROM files ORDER BY path');
- }
- const rows = this.stmts.getAllFilePaths.all() as Array<{ path: string }>;
- return rows.map((r) => r.path);
- }
- /**
- * Get all distinct node names (lightweight — just name strings for pre-filtering)
- */
- getAllNodeNames(): string[] {
- if (!this.stmts.getAllNodeNames) {
- this.stmts.getAllNodeNames = this.db.prepare('SELECT DISTINCT name FROM nodes');
- }
- const rows = this.stmts.getAllNodeNames.all() as Array<{ name: string }>;
- return rows.map((r) => r.name);
- }
- /**
- * Get unresolved references scoped to specific file paths.
- * Uses the idx_unresolved_file_path index for efficient lookup.
- */
- getUnresolvedReferencesByFiles(filePaths: string[]): UnresolvedReference[] {
- if (filePaths.length === 0) return [];
- const placeholders = filePaths.map(() => '?').join(',');
- const rows = this.db
- .prepare(`SELECT * FROM unresolved_refs WHERE file_path IN (${placeholders})`)
- .all(...filePaths) as UnresolvedRefRow[];
- return rows.map((row) => ({
- fromNodeId: row.from_node_id,
- referenceName: row.reference_name,
- referenceKind: row.reference_kind as EdgeKind,
- line: row.line,
- column: row.col,
- candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
- filePath: row.file_path,
- language: row.language as Language,
- }));
- }
- /**
- * Delete all unresolved references (after resolution)
- */
- clearUnresolvedReferences(): void {
- this.db.exec('DELETE FROM unresolved_refs');
- }
- /**
- * Delete resolved references by their IDs
- */
- deleteResolvedReferences(fromNodeIds: string[]): void {
- if (fromNodeIds.length === 0) return;
- const placeholders = fromNodeIds.map(() => '?').join(',');
- this.db.prepare(`DELETE FROM unresolved_refs WHERE from_node_id IN (${placeholders})`).run(...fromNodeIds);
- }
- /**
- * Delete specific resolved references by (fromNodeId, referenceName, referenceKind) tuples.
- * More precise than deleteResolvedReferences — only removes refs that were actually resolved.
- */
- deleteSpecificResolvedReferences(refs: Array<{ fromNodeId: string; referenceName: string; referenceKind: string }>): void {
- if (refs.length === 0) return;
- const stmt = this.db.prepare(
- 'DELETE FROM unresolved_refs WHERE from_node_id = ? AND reference_name = ? AND reference_kind = ?'
- );
- const deleteMany = this.db.transaction((items: typeof refs) => {
- for (const ref of items) {
- stmt.run(ref.fromNodeId, ref.referenceName, ref.referenceKind);
- }
- });
- deleteMany(refs);
- }
- // ===========================================================================
- // Statistics
- // ===========================================================================
- /**
- * Lightweight (nodes, edges) count snapshot. Used around an index/sync
- * run to compute true additions across extraction + resolution +
- * synthesis — the per-phase counter in the orchestrator only sees
- * extraction's contribution, which is why the CLI summary under-reported
- * the edge count (resolution + synthesizer edges were invisible).
- */
- getNodeAndEdgeCount(): { nodes: number; edges: number } {
- return this.db
- .prepare('SELECT (SELECT COUNT(*) FROM nodes) AS nodes, (SELECT COUNT(*) FROM edges) AS edges')
- .get() as { nodes: number; edges: number };
- }
- /**
- * Get graph statistics
- */
- getStats(): GraphStats {
- // Single query for all three aggregate counts
- const counts = this.db.prepare(`
- SELECT
- (SELECT COUNT(*) FROM nodes) AS node_count,
- (SELECT COUNT(*) FROM edges) AS edge_count,
- (SELECT COUNT(*) FROM files) AS file_count
- `).get() as { node_count: number; edge_count: number; file_count: number };
- const nodesByKind = {} as Record<NodeKind, number>;
- const nodeKindRows = this.db
- .prepare('SELECT kind, COUNT(*) as count FROM nodes GROUP BY kind')
- .all() as Array<{ kind: string; count: number }>;
- for (const row of nodeKindRows) {
- nodesByKind[row.kind as NodeKind] = row.count;
- }
- const edgesByKind = {} as Record<EdgeKind, number>;
- const edgeKindRows = this.db
- .prepare('SELECT kind, COUNT(*) as count FROM edges GROUP BY kind')
- .all() as Array<{ kind: string; count: number }>;
- for (const row of edgeKindRows) {
- edgesByKind[row.kind as EdgeKind] = row.count;
- }
- const filesByLanguage = {} as Record<Language, number>;
- const languageRows = this.db
- .prepare('SELECT language, COUNT(*) as count FROM files GROUP BY language')
- .all() as Array<{ language: string; count: number }>;
- for (const row of languageRows) {
- filesByLanguage[row.language as Language] = row.count;
- }
- return {
- nodeCount: counts.node_count,
- edgeCount: counts.edge_count,
- fileCount: counts.file_count,
- nodesByKind,
- edgesByKind,
- filesByLanguage,
- dbSizeBytes: 0, // Set by caller using DatabaseConnection.getSize()
- lastUpdated: Date.now(),
- };
- }
- // ===========================================================================
- // Project Metadata
- // ===========================================================================
- /**
- * Get a metadata value by key
- */
- getMetadata(key: string): string | null {
- const row = this.db.prepare('SELECT value FROM project_metadata WHERE key = ?').get(key) as { value: string } | undefined;
- return row?.value ?? null;
- }
- /**
- * Set a metadata key-value pair (upsert)
- */
- setMetadata(key: string, value: string): void {
- this.db.prepare(
- 'INSERT INTO project_metadata (key, value, updated_at) VALUES (?, ?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at'
- ).run(key, value, Date.now());
- }
- /**
- * Get all metadata as a key-value record
- */
- getAllMetadata(): Record<string, string> {
- const rows = this.db.prepare('SELECT key, value FROM project_metadata').all() as { key: string; value: string }[];
- const result: Record<string, string> = {};
- for (const row of rows) {
- result[row.key] = row.value;
- }
- return result;
- }
- /**
- * Clear all data from the database
- */
- clear(): void {
- this.nodeCache.clear();
- this.db.transaction(() => {
- this.db.exec('DELETE FROM unresolved_refs');
- this.db.exec('DELETE FROM edges');
- this.db.exec('DELETE FROM nodes');
- this.db.exec('DELETE FROM files');
- })();
- }
- }
|