/** * Database Queries * * Prepared statements for CRUD operations on the knowledge graph. */ import Database from 'better-sqlite3'; import { Node, Edge, FileRecord, UnresolvedReference, NodeKind, EdgeKind, Language, GraphStats, SearchOptions, SearchResult, } from '../types'; import { safeJsonParse } from '../utils'; /** * Database row types (snake_case from SQLite) */ interface NodeRow { id: string; kind: string; name: string; qualified_name: string; file_path: string; language: string; start_line: number; end_line: number; start_column: number; end_column: number; docstring: string | null; signature: string | null; visibility: string | null; is_exported: number; is_async: number; is_static: number; is_abstract: number; decorators: string | null; type_parameters: string | null; updated_at: number; } interface EdgeRow { id: number; source: string; target: string; kind: string; metadata: string | null; line: number | null; col: number | null; } interface FileRow { path: string; content_hash: string; language: string; size: number; modified_at: number; indexed_at: number; node_count: number; errors: string | null; } interface UnresolvedRefRow { id: number; from_node_id: string; reference_name: string; reference_kind: string; line: number; col: number; candidates: string | null; } /** * Convert database row to Node object */ function rowToNode(row: NodeRow): Node { return { id: row.id, kind: row.kind as NodeKind, name: row.name, qualifiedName: row.qualified_name, filePath: row.file_path, language: row.language as Language, startLine: row.start_line, endLine: row.end_line, startColumn: row.start_column, endColumn: row.end_column, docstring: row.docstring ?? undefined, signature: row.signature ?? undefined, visibility: row.visibility as Node['visibility'], isExported: row.is_exported === 1, isAsync: row.is_async === 1, isStatic: row.is_static === 1, isAbstract: row.is_abstract === 1, decorators: row.decorators ? safeJsonParse(row.decorators, undefined) : undefined, typeParameters: row.type_parameters ? safeJsonParse(row.type_parameters, undefined) : undefined, updatedAt: row.updated_at, }; } /** * Convert database row to Edge object */ function rowToEdge(row: EdgeRow): Edge { return { source: row.source, target: row.target, kind: row.kind as EdgeKind, metadata: row.metadata ? safeJsonParse(row.metadata, undefined) : undefined, line: row.line ?? undefined, column: row.col ?? undefined, }; } /** * Convert database row to FileRecord object */ function rowToFileRecord(row: FileRow): FileRecord { return { path: row.path, contentHash: row.content_hash, language: row.language as Language, size: row.size, modifiedAt: row.modified_at, indexedAt: row.indexed_at, nodeCount: row.node_count, errors: row.errors ? safeJsonParse(row.errors, undefined) : undefined, }; } /** * Query builder for the knowledge graph database */ export class QueryBuilder { private db: Database.Database; // Node cache for frequently accessed nodes (LRU-style, max 1000 entries) private nodeCache: Map = new Map(); private readonly maxCacheSize = 1000; // Prepared statements (lazily initialized) private stmts: { insertNode?: Database.Statement; updateNode?: Database.Statement; deleteNode?: Database.Statement; deleteNodesByFile?: Database.Statement; getNodeById?: Database.Statement; getNodesByFile?: Database.Statement; getNodesByKind?: Database.Statement; insertEdge?: Database.Statement; upsertFile?: Database.Statement; deleteEdgesBySource?: Database.Statement; deleteEdgesByTarget?: Database.Statement; getEdgesBySource?: Database.Statement; getEdgesByTarget?: Database.Statement; insertFile?: Database.Statement; updateFile?: Database.Statement; deleteFile?: Database.Statement; getFileByPath?: Database.Statement; getAllFiles?: Database.Statement; insertUnresolved?: Database.Statement; deleteUnresolvedByNode?: Database.Statement; getUnresolvedByName?: Database.Statement; } = {}; constructor(db: Database.Database) { this.db = db; } // =========================================================================== // Node Operations // =========================================================================== /** * Insert a new node */ insertNode(node: Node): void { if (!this.stmts.insertNode) { this.stmts.insertNode = this.db.prepare(` INSERT INTO nodes ( id, kind, name, qualified_name, file_path, language, start_line, end_line, start_column, end_column, docstring, signature, visibility, is_exported, is_async, is_static, is_abstract, decorators, type_parameters, updated_at ) VALUES ( @id, @kind, @name, @qualifiedName, @filePath, @language, @startLine, @endLine, @startColumn, @endColumn, @docstring, @signature, @visibility, @isExported, @isAsync, @isStatic, @isAbstract, @decorators, @typeParameters, @updatedAt ) `); } // Validate required fields to prevent SQLite bind errors if (!node.id || !node.kind || !node.name || !node.filePath || !node.language) { console.error('[CodeGraph] Skipping node with missing required fields:', { id: node.id, kind: node.kind, name: node.name, filePath: node.filePath, language: node.language, }); return; } try { this.stmts.insertNode.run({ id: node.id, kind: node.kind, name: node.name, qualifiedName: node.qualifiedName ?? node.name, filePath: node.filePath, language: node.language, startLine: node.startLine ?? 0, endLine: node.endLine ?? 0, startColumn: node.startColumn ?? 0, endColumn: node.endColumn ?? 0, docstring: node.docstring ?? null, signature: node.signature ?? null, visibility: node.visibility ?? null, isExported: node.isExported ? 1 : 0, isAsync: node.isAsync ? 1 : 0, isStatic: node.isStatic ? 1 : 0, isAbstract: node.isAbstract ? 1 : 0, decorators: node.decorators ? JSON.stringify(node.decorators) : null, typeParameters: node.typeParameters ? JSON.stringify(node.typeParameters) : null, updatedAt: node.updatedAt ?? Date.now(), }); } catch (error) { const { captureException } = require('../sentry'); captureException(error, { operation: 'insertNode', nodeId: node.id, nodeKind: node.kind, nodeName: node.name, filePath: node.filePath, language: node.language, startLine: node.startLine, }); throw error; } } /** * Insert multiple nodes in a transaction */ insertNodes(nodes: Node[]): void { this.db.transaction(() => { for (const node of nodes) { this.insertNode(node); } })(); } /** * Update an existing node */ updateNode(node: Node): void { if (!this.stmts.updateNode) { this.stmts.updateNode = this.db.prepare(` UPDATE nodes SET kind = @kind, name = @name, qualified_name = @qualifiedName, file_path = @filePath, language = @language, start_line = @startLine, end_line = @endLine, start_column = @startColumn, end_column = @endColumn, docstring = @docstring, signature = @signature, visibility = @visibility, is_exported = @isExported, is_async = @isAsync, is_static = @isStatic, is_abstract = @isAbstract, decorators = @decorators, type_parameters = @typeParameters, updated_at = @updatedAt WHERE id = @id `); } // Invalidate cache before update this.nodeCache.delete(node.id); // Validate required fields if (!node.id || !node.kind || !node.name || !node.filePath || !node.language) { console.error('[CodeGraph] Skipping node update with missing required fields:', node.id); return; } this.stmts.updateNode.run({ id: node.id, kind: node.kind, name: node.name, qualifiedName: node.qualifiedName ?? node.name, filePath: node.filePath, language: node.language, startLine: node.startLine ?? 0, endLine: node.endLine ?? 0, startColumn: node.startColumn ?? 0, endColumn: node.endColumn ?? 0, docstring: node.docstring ?? null, signature: node.signature ?? null, visibility: node.visibility ?? null, isExported: node.isExported ? 1 : 0, isAsync: node.isAsync ? 1 : 0, isStatic: node.isStatic ? 1 : 0, isAbstract: node.isAbstract ? 1 : 0, decorators: node.decorators ? JSON.stringify(node.decorators) : null, typeParameters: node.typeParameters ? JSON.stringify(node.typeParameters) : null, updatedAt: node.updatedAt ?? Date.now(), }); } /** * Delete a node by ID */ deleteNode(id: string): void { if (!this.stmts.deleteNode) { this.stmts.deleteNode = this.db.prepare('DELETE FROM nodes WHERE id = ?'); } // Invalidate cache this.nodeCache.delete(id); this.stmts.deleteNode.run(id); } /** * Delete all nodes for a file */ deleteNodesByFile(filePath: string): void { if (!this.stmts.deleteNodesByFile) { this.stmts.deleteNodesByFile = this.db.prepare('DELETE FROM nodes WHERE file_path = ?'); } // Invalidate cache for nodes in this file for (const [id, node] of this.nodeCache) { if (node.filePath === filePath) { this.nodeCache.delete(id); } } this.stmts.deleteNodesByFile.run(filePath); } /** * Get a node by ID */ getNodeById(id: string): Node | null { // Check cache first if (this.nodeCache.has(id)) { const cached = this.nodeCache.get(id)!; // Move to end to implement LRU (delete and re-add) this.nodeCache.delete(id); this.nodeCache.set(id, cached); return cached; } if (!this.stmts.getNodeById) { this.stmts.getNodeById = this.db.prepare('SELECT * FROM nodes WHERE id = ?'); } const row = this.stmts.getNodeById.get(id) as NodeRow | undefined; if (!row) { return null; } const node = rowToNode(row); this.cacheNode(node); return node; } /** * Add a node to the cache, evicting oldest if needed */ private cacheNode(node: Node): void { if (this.nodeCache.size >= this.maxCacheSize) { // Evict oldest (first) entry const firstKey = this.nodeCache.keys().next().value; if (firstKey) { this.nodeCache.delete(firstKey); } } this.nodeCache.set(node.id, node); } /** * Clear the node cache */ clearCache(): void { this.nodeCache.clear(); } /** * Get all nodes in a file */ getNodesByFile(filePath: string): Node[] { if (!this.stmts.getNodesByFile) { this.stmts.getNodesByFile = this.db.prepare( 'SELECT * FROM nodes WHERE file_path = ? ORDER BY start_line' ); } const rows = this.stmts.getNodesByFile.all(filePath) as NodeRow[]; return rows.map(rowToNode); } /** * Get all nodes of a specific kind */ getNodesByKind(kind: NodeKind): Node[] { if (!this.stmts.getNodesByKind) { this.stmts.getNodesByKind = this.db.prepare('SELECT * FROM nodes WHERE kind = ?'); } const rows = this.stmts.getNodesByKind.all(kind) as NodeRow[]; return rows.map(rowToNode); } /** * Search nodes by name using FTS with fallback to LIKE for better matching * * Search strategy: * 1. Try FTS5 prefix match (query*) for word-start matching * 2. If no results, try LIKE for substring matching (e.g., "signIn" finds "signInWithGoogle") * 3. Score results based on match quality */ searchNodes(query: string, options: SearchOptions = {}): SearchResult[] { const { kinds, languages, limit = 100, offset = 0 } = options; // First try FTS5 with prefix matching let results = this.searchNodesFTS(query, { kinds, languages, limit, offset }); // If no FTS results, try LIKE-based substring search if (results.length === 0 && query.length >= 2) { results = this.searchNodesLike(query, { kinds, languages, limit, offset }); } return results; } /** * FTS5 search with prefix matching */ private searchNodesFTS(query: string, options: SearchOptions): SearchResult[] { const { kinds, languages, limit = 100, offset = 0 } = options; // Add prefix wildcard for better matching (e.g., "auth" matches "AuthService", "authenticate") // Escape special FTS5 characters and add prefix wildcard const ftsQuery = query .replace(/['"*()]/g, '') // Remove special chars .split(/\s+/) .filter(term => term.length > 0) .map(term => `"${term}"*`) // Prefix match each term .join(' OR '); if (!ftsQuery) { return []; } let sql = ` SELECT nodes.*, bm25(nodes_fts) as score FROM nodes_fts JOIN nodes ON nodes_fts.id = nodes.id WHERE nodes_fts MATCH ? `; const params: (string | number)[] = [ftsQuery]; if (kinds && kinds.length > 0) { sql += ` AND nodes.kind IN (${kinds.map(() => '?').join(',')})`; params.push(...kinds); } if (languages && languages.length > 0) { sql += ` AND nodes.language IN (${languages.map(() => '?').join(',')})`; params.push(...languages); } sql += ' ORDER BY score LIMIT ? OFFSET ?'; params.push(limit, offset); try { const rows = this.db.prepare(sql).all(...params) as (NodeRow & { score: number })[]; return rows.map((row) => ({ node: rowToNode(row), score: Math.abs(row.score), // bm25 returns negative scores })); } catch { // FTS query failed, return empty return []; } } /** * LIKE-based substring search for cases where FTS doesn't match * Useful for camelCase matching (e.g., "signIn" finds "signInWithGoogle") */ private searchNodesLike(query: string, options: SearchOptions): SearchResult[] { const { kinds, languages, limit = 100, offset = 0 } = options; let sql = ` SELECT nodes.*, CASE WHEN name = ? THEN 1.0 WHEN name LIKE ? THEN 0.9 WHEN name LIKE ? THEN 0.8 WHEN qualified_name LIKE ? THEN 0.7 ELSE 0.5 END as score FROM nodes WHERE ( name LIKE ? OR qualified_name LIKE ? OR name LIKE ? ) `; // Pattern variants for better matching const exactMatch = query; const startsWith = `${query}%`; const contains = `%${query}%`; const params: (string | number)[] = [ exactMatch, // Exact match score startsWith, // Starts with score contains, // Contains score contains, // Qualified name score contains, // WHERE: name contains contains, // WHERE: qualified_name contains startsWith, // WHERE: name starts with ]; if (kinds && kinds.length > 0) { sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`; params.push(...kinds); } if (languages && languages.length > 0) { sql += ` AND language IN (${languages.map(() => '?').join(',')})`; params.push(...languages); } sql += ' ORDER BY score DESC, length(name) ASC LIMIT ? OFFSET ?'; params.push(limit, offset); const rows = this.db.prepare(sql).all(...params) as (NodeRow & { score: number })[]; return rows.map((row) => ({ node: rowToNode(row), score: row.score, })); } /** * Find nodes by exact name match * * Used for hybrid search - looks up symbols by exact name or case-insensitive match. * Returns high-confidence matches for known symbol names extracted from query. * * @param names - Array of symbol names to look up * @param options - Search options (kinds, languages, limit) * @returns SearchResult array with exact matches scored at 1.0 */ findNodesByExactName(names: string[], options: SearchOptions = {}): SearchResult[] { if (names.length === 0) return []; const { kinds, languages, limit = 50 } = options; // Build query with exact matches (case-insensitive) let sql = ` SELECT nodes.*, CASE WHEN name COLLATE NOCASE IN (${names.map(() => '?').join(',')}) THEN 1.0 ELSE 0.9 END as score FROM nodes WHERE name COLLATE NOCASE IN (${names.map(() => '?').join(',')}) `; // Duplicate names for both SELECT and WHERE clauses const params: (string | number)[] = [...names, ...names]; if (kinds && kinds.length > 0) { sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`; params.push(...kinds); } if (languages && languages.length > 0) { sql += ` AND language IN (${languages.map(() => '?').join(',')})`; params.push(...languages); } sql += ' ORDER BY score DESC, length(name) ASC LIMIT ?'; params.push(limit); const rows = this.db.prepare(sql).all(...params) as (NodeRow & { score: number })[]; return rows.map((row) => ({ node: rowToNode(row), score: row.score, })); } // =========================================================================== // Edge Operations // =========================================================================== /** * Insert a new edge */ insertEdge(edge: Edge): void { if (!this.stmts.insertEdge) { this.stmts.insertEdge = this.db.prepare(` INSERT INTO edges (source, target, kind, metadata, line, col) VALUES (@source, @target, @kind, @metadata, @line, @col) `); } this.stmts.insertEdge.run({ source: edge.source, target: edge.target, kind: edge.kind, metadata: edge.metadata ? JSON.stringify(edge.metadata) : null, line: edge.line ?? null, col: edge.column ?? null, }); } /** * Insert multiple edges in a transaction */ insertEdges(edges: Edge[]): void { this.db.transaction(() => { for (const edge of edges) { this.insertEdge(edge); } })(); } /** * Delete all edges from a source node */ deleteEdgesBySource(sourceId: string): void { if (!this.stmts.deleteEdgesBySource) { this.stmts.deleteEdgesBySource = this.db.prepare('DELETE FROM edges WHERE source = ?'); } this.stmts.deleteEdgesBySource.run(sourceId); } /** * Get outgoing edges from a node */ getOutgoingEdges(sourceId: string, kinds?: EdgeKind[]): Edge[] { if (kinds && kinds.length > 0) { const sql = `SELECT * FROM edges WHERE source = ? AND kind IN (${kinds.map(() => '?').join(',')})`; const rows = this.db.prepare(sql).all(sourceId, ...kinds) as EdgeRow[]; return rows.map(rowToEdge); } if (!this.stmts.getEdgesBySource) { this.stmts.getEdgesBySource = this.db.prepare('SELECT * FROM edges WHERE source = ?'); } const rows = this.stmts.getEdgesBySource.all(sourceId) as EdgeRow[]; return rows.map(rowToEdge); } /** * Get incoming edges to a node */ getIncomingEdges(targetId: string, kinds?: EdgeKind[]): Edge[] { if (kinds && kinds.length > 0) { const sql = `SELECT * FROM edges WHERE target = ? AND kind IN (${kinds.map(() => '?').join(',')})`; const rows = this.db.prepare(sql).all(targetId, ...kinds) as EdgeRow[]; return rows.map(rowToEdge); } if (!this.stmts.getEdgesByTarget) { this.stmts.getEdgesByTarget = this.db.prepare('SELECT * FROM edges WHERE target = ?'); } const rows = this.stmts.getEdgesByTarget.all(targetId) as EdgeRow[]; return rows.map(rowToEdge); } // =========================================================================== // File Operations // =========================================================================== /** * Insert or update a file record */ upsertFile(file: FileRecord): void { if (!this.stmts.upsertFile) { this.stmts.upsertFile = this.db.prepare(` INSERT INTO files (path, content_hash, language, size, modified_at, indexed_at, node_count, errors) VALUES (@path, @contentHash, @language, @size, @modifiedAt, @indexedAt, @nodeCount, @errors) ON CONFLICT(path) DO UPDATE SET content_hash = @contentHash, language = @language, size = @size, modified_at = @modifiedAt, indexed_at = @indexedAt, node_count = @nodeCount, errors = @errors `); } this.stmts.upsertFile.run({ path: file.path, contentHash: file.contentHash, language: file.language, size: file.size, modifiedAt: file.modifiedAt, indexedAt: file.indexedAt, nodeCount: file.nodeCount, errors: file.errors ? JSON.stringify(file.errors) : null, }); } /** * Delete a file record and its nodes */ deleteFile(filePath: string): void { this.db.transaction(() => { this.deleteNodesByFile(filePath); if (!this.stmts.deleteFile) { this.stmts.deleteFile = this.db.prepare('DELETE FROM files WHERE path = ?'); } this.stmts.deleteFile.run(filePath); })(); } /** * Get a file record by path */ getFileByPath(filePath: string): FileRecord | null { if (!this.stmts.getFileByPath) { this.stmts.getFileByPath = this.db.prepare('SELECT * FROM files WHERE path = ?'); } const row = this.stmts.getFileByPath.get(filePath) as FileRow | undefined; return row ? rowToFileRecord(row) : null; } /** * Get all tracked files */ getAllFiles(): FileRecord[] { if (!this.stmts.getAllFiles) { this.stmts.getAllFiles = this.db.prepare('SELECT * FROM files ORDER BY path'); } const rows = this.stmts.getAllFiles.all() as FileRow[]; return rows.map(rowToFileRecord); } /** * Get files that need re-indexing (hash changed) */ getStaleFiles(currentHashes: Map): FileRecord[] { const files = this.getAllFiles(); return files.filter((f) => { const currentHash = currentHashes.get(f.path); return currentHash && currentHash !== f.contentHash; }); } // =========================================================================== // Unresolved References // =========================================================================== /** * Insert an unresolved reference */ insertUnresolvedRef(ref: UnresolvedReference): void { if (!this.stmts.insertUnresolved) { this.stmts.insertUnresolved = this.db.prepare(` INSERT INTO unresolved_refs (from_node_id, reference_name, reference_kind, line, col, candidates) VALUES (@fromNodeId, @referenceName, @referenceKind, @line, @col, @candidates) `); } this.stmts.insertUnresolved.run({ fromNodeId: ref.fromNodeId, referenceName: ref.referenceName, referenceKind: ref.referenceKind, line: ref.line, col: ref.column, candidates: ref.candidates ? JSON.stringify(ref.candidates) : null, }); } /** * Delete unresolved references from a node */ deleteUnresolvedByNode(nodeId: string): void { if (!this.stmts.deleteUnresolvedByNode) { this.stmts.deleteUnresolvedByNode = this.db.prepare( 'DELETE FROM unresolved_refs WHERE from_node_id = ?' ); } this.stmts.deleteUnresolvedByNode.run(nodeId); } /** * Get unresolved references by name (for resolution) */ getUnresolvedByName(name: string): UnresolvedReference[] { if (!this.stmts.getUnresolvedByName) { this.stmts.getUnresolvedByName = this.db.prepare( 'SELECT * FROM unresolved_refs WHERE reference_name = ?' ); } const rows = this.stmts.getUnresolvedByName.all(name) as UnresolvedRefRow[]; return rows.map((row) => ({ fromNodeId: row.from_node_id, referenceName: row.reference_name, referenceKind: row.reference_kind as EdgeKind, line: row.line, column: row.col, candidates: row.candidates ? safeJsonParse(row.candidates, []) : undefined, })); } /** * Get all unresolved references */ getUnresolvedReferences(): UnresolvedReference[] { const rows = this.db.prepare('SELECT * FROM unresolved_refs').all() as UnresolvedRefRow[]; return rows.map((row) => ({ fromNodeId: row.from_node_id, referenceName: row.reference_name, referenceKind: row.reference_kind as EdgeKind, line: row.line, column: row.col, candidates: row.candidates ? safeJsonParse(row.candidates, []) : undefined, })); } /** * Delete all unresolved references (after resolution) */ clearUnresolvedReferences(): void { this.db.exec('DELETE FROM unresolved_refs'); } /** * Delete resolved references by their IDs */ deleteResolvedReferences(fromNodeIds: string[]): void { if (fromNodeIds.length === 0) return; const placeholders = fromNodeIds.map(() => '?').join(','); this.db.prepare(`DELETE FROM unresolved_refs WHERE from_node_id IN (${placeholders})`).run(...fromNodeIds); } // =========================================================================== // Statistics // =========================================================================== /** * Get graph statistics */ getStats(): GraphStats { const nodeCount = ( this.db.prepare('SELECT COUNT(*) as count FROM nodes').get() as { count: number } ).count; const edgeCount = ( this.db.prepare('SELECT COUNT(*) as count FROM edges').get() as { count: number } ).count; const fileCount = ( this.db.prepare('SELECT COUNT(*) as count FROM files').get() as { count: number } ).count; const nodesByKind = {} as Record; const nodeKindRows = this.db .prepare('SELECT kind, COUNT(*) as count FROM nodes GROUP BY kind') .all() as Array<{ kind: string; count: number }>; for (const row of nodeKindRows) { nodesByKind[row.kind as NodeKind] = row.count; } const edgesByKind = {} as Record; const edgeKindRows = this.db .prepare('SELECT kind, COUNT(*) as count FROM edges GROUP BY kind') .all() as Array<{ kind: string; count: number }>; for (const row of edgeKindRows) { edgesByKind[row.kind as EdgeKind] = row.count; } const filesByLanguage = {} as Record; const languageRows = this.db .prepare('SELECT language, COUNT(*) as count FROM files GROUP BY language') .all() as Array<{ language: string; count: number }>; for (const row of languageRows) { filesByLanguage[row.language as Language] = row.count; } return { nodeCount, edgeCount, fileCount, nodesByKind, edgesByKind, filesByLanguage, dbSizeBytes: 0, // Set by caller using DatabaseConnection.getSize() lastUpdated: Date.now(), }; } /** * Clear all data from the database */ clear(): void { this.nodeCache.clear(); this.db.transaction(() => { this.db.exec('DELETE FROM unresolved_refs'); this.db.exec('DELETE FROM vectors'); this.db.exec('DELETE FROM edges'); this.db.exec('DELETE FROM nodes'); this.db.exec('DELETE FROM files'); })(); } }