Răsfoiți Sursa

feat: Add database schema v3 with optimized node lookups and improved error handling

Adds expression index on lower(name) for memory-efficient case-insensitive searches, replacing in-memory caches that caused OOM on large codebases. Includes batched reference resolution, enhanced error reporting with detailed breakdown by error type, and improved CLI progress display for scanning phases.
Colby McHenry 2 luni în urmă
părinte
comite
e4908e1270

+ 1 - 1
__tests__/foundation.test.ts

@@ -317,7 +317,7 @@ describe('Database Connection', () => {
 
     const version = db.getSchemaVersion();
     expect(version).not.toBeNull();
-    expect(version?.version).toBe(2);
+    expect(version?.version).toBe(3);
 
     db.close();
   });

+ 1 - 1
__tests__/pr19-improvements.test.ts

@@ -299,7 +299,7 @@ describe('Best-Candidate Resolution', () => {
 describe('Schema v2 Migration', () => {
   it.skipIf(!HAS_SQLITE)('should have correct current schema version', async () => {
     const { CURRENT_SCHEMA_VERSION } = await import('../src/db/migrations');
-    expect(CURRENT_SCHEMA_VERSION).toBe(2);
+    expect(CURRENT_SCHEMA_VERSION).toBe(3);
   });
 
   it.skipIf(!HAS_SQLITE)('should have migration for version 2', async () => {

+ 131 - 27
src/bin/codegraph.ts

@@ -192,11 +192,16 @@ function printProgress(progress: IndexProgress): void {
   };
 
   const phaseName = phaseNames[progress.phase] || progress.phase;
-  const bar = progressBar(progress.current, progress.total);
   const file = progress.currentFile ? chalk.dim(` ${progress.currentFile}`) : '';
 
-  // Clear line and print progress
-  process.stdout.write(`\r${chalk.cyan(phaseName)}: ${bar}${file}`.padEnd(100));
+  if (progress.total > 0) {
+    const bar = progressBar(progress.current, progress.total);
+    process.stdout.write(`\r${chalk.cyan(phaseName)}: ${bar}${file}`.padEnd(100));
+  } else {
+    // No known total (e.g. scanning) — show a running count
+    const count = progress.current > 0 ? ` ${chalk.green(formatNumber(progress.current))} found` : '';
+    process.stdout.write(`\r${chalk.cyan(phaseName)}:${count}${file}`.padEnd(100));
+  }
 }
 
 /**
@@ -227,6 +232,121 @@ function warn(message: string): void {
   console.log(chalk.yellow('⚠') + ' ' + message);
 }
 
+/**
+ * Print a summary of indexing results with clear error breakdown
+ */
+function printIndexResult(result: { success: boolean; filesIndexed: number; filesSkipped: number; filesErrored: number; nodesCreated: number; edgesCreated: number; errors: Array<{ message: string; filePath?: string; severity: string; code?: string }>; durationMs: number }, projectPath?: string): void {
+  const hasErrors = result.filesErrored > 0;
+
+  // Always show what was indexed
+  if (result.filesIndexed > 0) {
+    if (hasErrors) {
+      success(`Indexed ${formatNumber(result.filesIndexed)} files (${formatNumber(result.filesErrored)} could not be parsed)`);
+    } else {
+      success(`Indexed ${formatNumber(result.filesIndexed)} files`);
+    }
+    info(`Created ${formatNumber(result.nodesCreated)} nodes and ${formatNumber(result.edgesCreated)} edges`);
+    info(`Completed in ${formatDuration(result.durationMs)}`);
+  } else if (hasErrors) {
+    error(`Indexing failed — all ${formatNumber(result.filesErrored)} files had errors`);
+  } else {
+    warn('No files found to index');
+  }
+
+  // Show error breakdown if there were errors
+  if (hasErrors) {
+    // Group errors by code for a concise summary
+    const errorsByCode = new Map<string, number>();
+    for (const err of result.errors) {
+      if (err.severity === 'error') {
+        const code = err.code || 'unknown';
+        errorsByCode.set(code, (errorsByCode.get(code) || 0) + 1);
+      }
+    }
+
+    const codeLabels: Record<string, string> = {
+      parse_error: 'files failed to parse',
+      read_error: 'files could not be read',
+      size_exceeded: 'files exceeded size limit',
+      path_traversal: 'blocked paths',
+      unsupported_language: 'unsupported language',
+      parser_error: 'parser initialization failures',
+    };
+
+    console.log('');
+    console.log(chalk.dim('  Error breakdown:'));
+    for (const [code, count] of errorsByCode) {
+      const label = codeLabels[code] || code;
+      console.log(chalk.dim(`    ${formatNumber(count)} ${label}`));
+    }
+
+    // Write detailed error log to .codegraph/errors.log
+    if (projectPath) {
+      writeErrorLog(projectPath, result.errors);
+    }
+
+    // Reassure the user the index is usable
+    if (result.filesIndexed > 0) {
+      console.log('');
+      info('The index is fully usable — only the failed files are missing from the graph.');
+      info('This is common in large repos with test fixtures or generated files that use non-standard syntax.');
+    }
+  } else if (projectPath) {
+    // No errors — clean up any stale error log
+    const logPath = path.join(projectPath, '.codegraph', 'errors.log');
+    if (fs.existsSync(logPath)) {
+      fs.unlinkSync(logPath);
+    }
+  }
+}
+
+/**
+ * Write detailed error log to .codegraph/errors.log
+ */
+function writeErrorLog(projectPath: string, errors: Array<{ message: string; filePath?: string; severity: string; code?: string }>): void {
+  const cgDir = path.join(projectPath, '.codegraph');
+  if (!fs.existsSync(cgDir)) return;
+
+  const logPath = path.join(cgDir, 'errors.log');
+
+  // Group errors by file path
+  const errorsByFile = new Map<string, Array<{ message: string; code?: string }>>();
+  const noFileErrors: Array<{ message: string; code?: string }> = [];
+
+  for (const err of errors) {
+    if (err.severity !== 'error') continue;
+    if (err.filePath) {
+      let list = errorsByFile.get(err.filePath);
+      if (!list) {
+        list = [];
+        errorsByFile.set(err.filePath, list);
+      }
+      list.push({ message: err.message, code: err.code });
+    } else {
+      noFileErrors.push({ message: err.message, code: err.code });
+    }
+  }
+
+  const lines: string[] = [
+    `CodeGraph Error Log — ${new Date().toISOString()}`,
+    `${errorsByFile.size} files with errors`,
+    '',
+  ];
+
+  for (const [filePath, fileErrors] of errorsByFile) {
+    for (const err of fileErrors) {
+      lines.push(`${filePath}: ${err.message}`);
+    }
+  }
+
+  for (const err of noFileErrors) {
+    lines.push(err.message);
+  }
+
+  fs.writeFileSync(logPath, lines.join('\n') + '\n');
+  info(`See .codegraph/errors.log for the full list of failed files`);
+}
+
 // =============================================================================
 // Commands
 // =============================================================================
@@ -239,7 +359,8 @@ program
   .description('Initialize CodeGraph in a project directory')
   .option('-i, --index', 'Run initial indexing after initialization')
   .action(async (pathArg: string | undefined, options: { index?: boolean }) => {
-    const projectPath = resolveProjectPath(pathArg);
+    // init should always target the exact path given (or cwd), never walk up parents
+    const projectPath = path.resolve(pathArg || process.cwd());
 
     console.log(chalk.bold('\nInitializing CodeGraph...\n'));
 
@@ -271,13 +392,7 @@ program
         // Clear progress line
         process.stdout.write('\r' + ' '.repeat(100) + '\r');
 
-        if (result.success) {
-          success(`Indexed ${formatNumber(result.filesIndexed)} files`);
-          info(`Created ${formatNumber(result.nodesCreated)} nodes and ${formatNumber(result.edgesCreated)} edges`);
-          info(`Completed in ${formatDuration(result.durationMs)}`);
-        } else {
-          warn(`Indexing completed with ${result.errors.length} errors`);
-        }
+        printIndexResult(result, projectPath);
       } else {
         info('Run "codegraph index" to index the project');
       }
@@ -376,22 +491,11 @@ program
         process.stdout.write('\r' + ' '.repeat(100) + '\r');
       }
 
-      if (result.success) {
-        if (!options.quiet) {
-          success(`Indexed ${formatNumber(result.filesIndexed)} files`);
-          info(`Created ${formatNumber(result.nodesCreated)} nodes and ${formatNumber(result.edgesCreated)} edges`);
-          info(`Completed in ${formatDuration(result.durationMs)}`);
-        }
-      } else {
-        if (!options.quiet) {
-          warn(`Indexing completed with ${result.errors.length} errors`);
-          for (const err of result.errors.slice(0, 5)) {
-            console.log(chalk.dim(`  - ${err.message}`));
-          }
-          if (result.errors.length > 5) {
-            console.log(chalk.dim(`  ... and ${result.errors.length - 5} more`));
-          }
-        }
+      if (!options.quiet) {
+        printIndexResult(result, projectPath);
+      }
+
+      if (!result.success) {
         process.exit(1);
       }
 

+ 10 - 1
src/db/migrations.ts

@@ -9,7 +9,7 @@ import { SqliteDatabase } from './sqlite-adapter';
 /**
  * Current schema version
  */
-export const CURRENT_SCHEMA_VERSION = 2;
+export const CURRENT_SCHEMA_VERSION = 3;
 
 /**
  * Migration definition
@@ -45,6 +45,15 @@ const migrations: Migration[] = [
       `);
     },
   },
+  {
+    version: 3,
+    description: 'Add lower(name) expression index for memory-efficient case-insensitive lookups',
+    up: (db) => {
+      db.exec(`
+        CREATE INDEX IF NOT EXISTS idx_nodes_lower_name ON nodes(lower(name));
+      `);
+    },
+  },
 ];
 
 /**

+ 90 - 0
src/db/queries.ts

@@ -172,6 +172,12 @@ export class QueryBuilder {
     insertUnresolved?: SqliteStatement;
     deleteUnresolvedByNode?: SqliteStatement;
     getUnresolvedByName?: SqliteStatement;
+    getNodesByName?: SqliteStatement;
+    getNodesByQualifiedNameExact?: SqliteStatement;
+    getNodesByLowerName?: SqliteStatement;
+    getUnresolvedCount?: SqliteStatement;
+    getUnresolvedBatch?: SqliteStatement;
+    getAllFilePaths?: SqliteStatement;
   } = {};
 
   constructor(db: SqliteDatabase) {
@@ -425,6 +431,43 @@ export class QueryBuilder {
     return rows.map(rowToNode);
   }
 
+  /**
+   * Get nodes by exact name match (uses idx_nodes_name index)
+   */
+  getNodesByName(name: string): Node[] {
+    if (!this.stmts.getNodesByName) {
+      this.stmts.getNodesByName = this.db.prepare('SELECT * FROM nodes WHERE name = ?');
+    }
+    const rows = this.stmts.getNodesByName.all(name) as NodeRow[];
+    return rows.map(rowToNode);
+  }
+
+  /**
+   * Get nodes by exact qualified name match (uses idx_nodes_qualified_name index)
+   */
+  getNodesByQualifiedNameExact(qualifiedName: string): Node[] {
+    if (!this.stmts.getNodesByQualifiedNameExact) {
+      this.stmts.getNodesByQualifiedNameExact = this.db.prepare(
+        'SELECT * FROM nodes WHERE qualified_name = ?'
+      );
+    }
+    const rows = this.stmts.getNodesByQualifiedNameExact.all(qualifiedName) as NodeRow[];
+    return rows.map(rowToNode);
+  }
+
+  /**
+   * Get nodes by lowercase name match (uses idx_nodes_lower_name expression index)
+   */
+  getNodesByLowerName(lowerName: string): Node[] {
+    if (!this.stmts.getNodesByLowerName) {
+      this.stmts.getNodesByLowerName = this.db.prepare(
+        'SELECT * FROM nodes WHERE lower(name) = ?'
+      );
+    }
+    const rows = this.stmts.getNodesByLowerName.all(lowerName) as NodeRow[];
+    return rows.map(rowToNode);
+  }
+
   /**
    * Search nodes by name using FTS with fallback to LIKE for better matching
    *
@@ -886,6 +929,53 @@ export class QueryBuilder {
     }));
   }
 
+  /**
+   * Get the count of unresolved references without loading them into memory
+   */
+  getUnresolvedReferencesCount(): number {
+    if (!this.stmts.getUnresolvedCount) {
+      this.stmts.getUnresolvedCount = this.db.prepare(
+        'SELECT COUNT(*) as count FROM unresolved_refs'
+      );
+    }
+    const row = this.stmts.getUnresolvedCount.get() as { count: number };
+    return row.count;
+  }
+
+  /**
+   * Get a batch of unresolved references using LIMIT/OFFSET pagination.
+   * Used to process references in bounded memory chunks.
+   */
+  getUnresolvedReferencesBatch(offset: number, limit: number): UnresolvedReference[] {
+    if (!this.stmts.getUnresolvedBatch) {
+      this.stmts.getUnresolvedBatch = this.db.prepare(
+        'SELECT * FROM unresolved_refs LIMIT ? OFFSET ?'
+      );
+    }
+    const rows = this.stmts.getUnresolvedBatch.all(limit, offset) as UnresolvedRefRow[];
+    return rows.map((row) => ({
+      fromNodeId: row.from_node_id,
+      referenceName: row.reference_name,
+      referenceKind: row.reference_kind as EdgeKind,
+      line: row.line,
+      column: row.col,
+      candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
+      filePath: row.file_path,
+      language: row.language as Language,
+    }));
+  }
+
+  /**
+   * Get all tracked file paths (lightweight — no full FileRecord objects)
+   */
+  getAllFilePaths(): string[] {
+    if (!this.stmts.getAllFilePaths) {
+      this.stmts.getAllFilePaths = this.db.prepare('SELECT path FROM files ORDER BY path');
+    }
+    const rows = this.stmts.getAllFilePaths.all() as Array<{ path: string }>;
+    return rows.map((r) => r.path);
+  }
+
   /**
    * Get unresolved references scoped to specific file paths.
    * Uses the idx_unresolved_file_path index for efficient lookup.

+ 1 - 0
src/db/schema.sql

@@ -91,6 +91,7 @@ CREATE INDEX IF NOT EXISTS idx_nodes_qualified_name ON nodes(qualified_name);
 CREATE INDEX IF NOT EXISTS idx_nodes_file_path ON nodes(file_path);
 CREATE INDEX IF NOT EXISTS idx_nodes_language ON nodes(language);
 CREATE INDEX IF NOT EXISTS idx_nodes_file_line ON nodes(file_path, start_line);
+CREATE INDEX IF NOT EXISTS idx_nodes_lower_name ON nodes(lower(name));
 
 -- Full-text search index on node names, docstrings, and signatures
 CREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(

+ 1 - 0
src/extraction/dfm-extractor.ts

@@ -39,6 +39,7 @@ export class DfmExtractor {
       this.errors.push({
         message: `DFM extraction error: ${error instanceof Error ? error.message : String(error)}`,
         severity: 'error',
+        code: 'parse_error',
       });
     }
 

+ 28 - 5
src/extraction/index.ts

@@ -46,6 +46,7 @@ export interface IndexResult {
   success: boolean;
   filesIndexed: number;
   filesSkipped: number;
+  filesErrored: number;
   nodesCreated: number;
   edgesCreated: number;
   errors: ExtractionError[];
@@ -369,6 +370,7 @@ export class ExtractionOrchestrator {
     const errors: ExtractionError[] = [];
     let filesIndexed = 0;
     let filesSkipped = 0;
+    let filesErrored = 0;
     let totalNodes = 0;
     let totalEdges = 0;
 
@@ -393,6 +395,7 @@ export class ExtractionOrchestrator {
         success: false,
         filesIndexed: 0,
         filesSkipped: 0,
+        filesErrored: 0,
         nodesCreated: 0,
         edgesCreated: 0,
         errors: [{ message: 'Aborted', severity: 'error' }],
@@ -416,6 +419,7 @@ export class ExtractionOrchestrator {
           success: false,
           filesIndexed,
           filesSkipped,
+          filesErrored,
           nodesCreated: totalNodes,
           edgesCreated: totalEdges,
           errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
@@ -450,6 +454,7 @@ export class ExtractionOrchestrator {
             success: false,
             filesIndexed,
             filesSkipped,
+            filesErrored,
             nodesCreated: totalNodes,
             edgesCreated: totalEdges,
             errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
@@ -466,9 +471,12 @@ export class ExtractionOrchestrator {
         });
 
         if (error || content === null || stats === null) {
+          filesErrored++;
           errors.push({
             message: `Failed to read file: ${error instanceof Error ? error.message : String(error)}`,
+            filePath,
             severity: 'error',
+            code: 'read_error',
           });
           continue;
         }
@@ -476,6 +484,10 @@ export class ExtractionOrchestrator {
         const result = await this.indexFileWithContent(filePath, content, stats);
 
         if (result.errors.length > 0) {
+          // Annotate errors with file path if not already set
+          for (const err of result.errors) {
+            if (!err.filePath) err.filePath = filePath;
+          }
           errors.push(...result.errors);
         }
 
@@ -483,7 +495,9 @@ export class ExtractionOrchestrator {
           filesIndexed++;
           totalNodes += result.nodes.length;
           totalEdges += result.edges.length;
-        } else if (result.errors.length === 0) {
+        } else if (result.errors.some((e) => e.severity === 'error')) {
+          filesErrored++;
+        } else {
           filesSkipped++;
         }
       }
@@ -499,9 +513,10 @@ export class ExtractionOrchestrator {
     // TODO: Implement reference resolution in Phase 3
 
     return {
-      success: errors.filter((e) => e.severity === 'error').length === 0,
+      success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
       filesIndexed,
       filesSkipped,
+      filesErrored,
       nodesCreated: totalNodes,
       edgesCreated: totalEdges,
       errors,
@@ -517,6 +532,7 @@ export class ExtractionOrchestrator {
     const errors: ExtractionError[] = [];
     let filesIndexed = 0;
     let filesSkipped = 0;
+    let filesErrored = 0;
     let totalNodes = 0;
     let totalEdges = 0;
 
@@ -531,15 +547,18 @@ export class ExtractionOrchestrator {
         filesIndexed++;
         totalNodes += result.nodes.length;
         totalEdges += result.edges.length;
+      } else if (result.errors.some((e) => e.severity === 'error')) {
+        filesErrored++;
       } else {
         filesSkipped++;
       }
     }
 
     return {
-      success: errors.filter((e) => e.severity === 'error').length === 0,
+      success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
       filesIndexed,
       filesSkipped,
+      filesErrored,
       nodesCreated: totalNodes,
       edgesCreated: totalEdges,
       errors,
@@ -558,7 +577,7 @@ export class ExtractionOrchestrator {
         nodes: [],
         edges: [],
         unresolvedReferences: [],
-        errors: [{ message: `Path traversal blocked: ${relativePath}`, severity: 'error' }],
+        errors: [{ message: `Path traversal blocked: ${relativePath}`, filePath: relativePath, severity: 'error', code: 'path_traversal' }],
         durationMs: 0,
       };
     }
@@ -577,7 +596,9 @@ export class ExtractionOrchestrator {
         errors: [
           {
             message: `Failed to read file: ${error instanceof Error ? error.message : String(error)}`,
+            filePath: relativePath,
             severity: 'error',
+            code: 'read_error',
           },
         ],
         durationMs: 0,
@@ -604,7 +625,7 @@ export class ExtractionOrchestrator {
         nodes: [],
         edges: [],
         unresolvedReferences: [],
-        errors: [{ message: 'Path traversal blocked', severity: 'error' }],
+        errors: [{ message: 'Path traversal blocked', filePath: relativePath, severity: 'error', code: 'path_traversal' }],
         durationMs: 0,
       };
     }
@@ -618,7 +639,9 @@ export class ExtractionOrchestrator {
         errors: [
           {
             message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`,
+            filePath: relativePath,
             severity: 'warning',
+            code: 'size_exceeded',
           },
         ],
         durationMs: 0,

+ 1 - 0
src/extraction/liquid-extractor.ts

@@ -48,6 +48,7 @@ export class LiquidExtractor {
       this.errors.push({
         message: `Liquid extraction error: ${error instanceof Error ? error.message : String(error)}`,
         severity: 'error',
+        code: 'parse_error',
       });
     }
 

+ 1 - 0
src/extraction/svelte-extractor.ts

@@ -45,6 +45,7 @@ export class SvelteExtractor {
       this.errors.push({
         message: `Svelte extraction error: ${error instanceof Error ? error.message : String(error)}`,
         severity: 'error',
+        code: 'parse_error',
       });
     }
 

+ 15 - 0
src/extraction/tree-sitter.ts

@@ -117,7 +117,9 @@ export class TreeSitterExtractor {
         errors: [
           {
             message: `Unsupported language: ${this.language}`,
+            filePath: this.filePath,
             severity: 'error',
+            code: 'unsupported_language',
           },
         ],
         durationMs: Date.now() - startTime,
@@ -133,7 +135,9 @@ export class TreeSitterExtractor {
         errors: [
           {
             message: `Failed to get parser for language: ${this.language}`,
+            filePath: this.filePath,
             severity: 'error',
+            code: 'parser_error',
           },
         ],
         durationMs: Date.now() - startTime,
@@ -170,8 +174,19 @@ export class TreeSitterExtractor {
     } catch (error) {
       this.errors.push({
         message: `Parse error: ${error instanceof Error ? error.message : String(error)}`,
+        filePath: this.filePath,
         severity: 'error',
+        code: 'parse_error',
       });
+    } finally {
+      // Free tree-sitter WASM memory immediately — trees hold native heap memory
+      // invisible to V8's GC that accumulates across thousands of files.
+      if (this.tree) {
+        this.tree.delete();
+        this.tree = null;
+      }
+      // Release source string to reduce GC pressure
+      this.source = '';
     }
 
     return {

+ 43 - 18
src/index.ts

@@ -380,15 +380,15 @@ export class CodeGraph {
       try {
         this.fileLock.acquire();
       } catch {
-        return { success: false, filesIndexed: 0, filesSkipped: 0, nodesCreated: 0, edgesCreated: 0, errors: [{ message: 'Could not acquire file lock - another process may be indexing', severity: 'error' as const }], durationMs: 0 };
+        return { success: false, filesIndexed: 0, filesSkipped: 0, filesErrored: 0, nodesCreated: 0, edgesCreated: 0, errors: [{ message: 'Could not acquire file lock - another process may be indexing', severity: 'error' as const }], durationMs: 0 };
       }
       try {
         const result = await this.orchestrator.indexAll(options.onProgress, options.signal);
 
         // Resolve references to create call/import/extends edges
         if (result.success && result.filesIndexed > 0) {
-          // Get count of unresolved references for accurate progress
-          const unresolvedCount = this.queries.getUnresolvedReferences().length;
+          // Get count without loading all refs into memory
+          const unresolvedCount = this.queries.getUnresolvedReferencesCount();
 
           options.onProgress?.({
             phase: 'resolving',
@@ -396,7 +396,7 @@ export class CodeGraph {
             total: unresolvedCount,
           });
 
-          this.resolveReferences((current, total) => {
+          this.resolveReferencesBatched((current, total) => {
             options.onProgress?.({
               phase: 'resolving',
               current,
@@ -422,7 +422,7 @@ export class CodeGraph {
       try {
         this.fileLock.acquire();
       } catch {
-        return { success: false, filesIndexed: 0, filesSkipped: 0, nodesCreated: 0, edgesCreated: 0, errors: [{ message: 'Could not acquire file lock - another process may be indexing', severity: 'error' as const }], durationMs: 0 };
+        return { success: false, filesIndexed: 0, filesSkipped: 0, filesErrored: 0, nodesCreated: 0, edgesCreated: 0, errors: [{ message: 'Could not acquire file lock - another process may be indexing', severity: 'error' as const }], durationMs: 0 };
       }
       try {
         return this.orchestrator.indexFiles(filePaths);
@@ -449,24 +449,41 @@ export class CodeGraph {
 
         // Resolve references if files were updated
         if (result.filesAdded > 0 || result.filesModified > 0) {
-          // Scope resolution to changed files when available (git fast path)
-          const unresolvedRefs = result.changedFilePaths
-            ? this.queries.getUnresolvedReferencesByFiles(result.changedFilePaths)
-            : this.queries.getUnresolvedReferences();
+          if (result.changedFilePaths) {
+            // Scope resolution to changed files (git fast path — bounded set)
+            const unresolvedRefs = this.queries.getUnresolvedReferencesByFiles(result.changedFilePaths);
 
-          options.onProgress?.({
-            phase: 'resolving',
-            current: 0,
-            total: unresolvedRefs.length,
-          });
+            options.onProgress?.({
+              phase: 'resolving',
+              current: 0,
+              total: unresolvedRefs.length,
+            });
+
+            this.resolver.resolveAndPersist(unresolvedRefs, (current, total) => {
+              options.onProgress?.({
+                phase: 'resolving',
+                current,
+                total,
+              });
+            });
+          } else {
+            // No git info — use batched resolution to avoid OOM
+            const unresolvedCount = this.queries.getUnresolvedReferencesCount();
 
-          this.resolver.resolveAndPersist(unresolvedRefs, (current, total) => {
             options.onProgress?.({
               phase: 'resolving',
-              current,
-              total,
+              current: 0,
+              total: unresolvedCount,
             });
-          });
+
+            this.resolveReferencesBatched((current, total) => {
+              options.onProgress?.({
+                phase: 'resolving',
+                current,
+                total,
+              });
+            });
+          }
         }
 
         return result;
@@ -516,6 +533,14 @@ export class CodeGraph {
     return this.resolver.resolveAndPersist(unresolvedRefs, onProgress);
   }
 
+  /**
+   * Resolve references in batches to keep memory bounded on large codebases.
+   * Processes chunks of unresolved refs, persisting results after each batch.
+   */
+  resolveReferencesBatched(onProgress?: (current: number, total: number) => void): ResolutionResult {
+    return this.resolver.resolveAndPersistBatched(onProgress);
+  }
+
   /**
    * Get detected frameworks in the project
    */

+ 3 - 3
src/installer/index.ts

@@ -167,10 +167,10 @@ async function initializeLocalProject(): Promise<void> {
   // Clear progress line
   process.stdout.write('\r' + ' '.repeat(50) + '\r');
 
-  if (result.success) {
-    success(`Indexed ${formatNumber(result.filesIndexed)} files (${formatNumber(result.nodesCreated)} symbols)`);
+  if (result.filesErrored > 0) {
+    success(`Indexed ${formatNumber(result.filesIndexed)} files (${formatNumber(result.filesErrored)} files failed, ${formatNumber(result.nodesCreated)} symbols)`);
   } else {
-    success(`Indexed ${formatNumber(result.filesIndexed)} files with ${result.errors.length} warnings`);
+    success(`Indexed ${formatNumber(result.filesIndexed)} files (${formatNumber(result.nodesCreated)} symbols)`);
   }
 
   cg.close();

+ 92 - 85
src/resolution/index.ts

@@ -34,13 +34,8 @@ export class ReferenceResolver {
   private queries: QueryBuilder;
   private context: ResolutionContext;
   private frameworks: FrameworkResolver[] = [];
-  private nodeCache: Map<string, Node[]> = new Map();
-  private fileCache: Map<string, string | null> = new Map();
-  private nameCache: Map<string, Node[]> = new Map();
-  private qualifiedNameCache: Map<string, Node[]> = new Map();
-  private kindCache: Map<string, Node[]> = new Map();
-  private nodeByIdCache: Map<string, Node> = new Map();
-  private lowerNameCache: Map<string, Node[]> = new Map();
+  private nodeCache: Map<string, Node[]> = new Map(); // per-file node cache (bounded)
+  private fileCache: Map<string, string | null> = new Map(); // per-file content cache (bounded)
   private importMappingCache: Map<string, ImportMapping[]> = new Map();
   private knownFiles: Set<string> | null = null;
   private cachesWarmed = false;
@@ -60,53 +55,15 @@ export class ReferenceResolver {
   }
 
   /**
-   * Pre-load all nodes into memory maps for fast lookup during resolution.
-   * This eliminates repeated SQLite queries and provides the core speedup.
+   * Pre-build lightweight caches for resolution.
+   * Node lookups are now handled by indexed SQLite queries instead of
+   * loading all nodes into memory (which caused OOM on large codebases).
    */
   warmCaches(): void {
     if (this.cachesWarmed) return;
 
-    const allNodes = this.queries.getAllNodes();
-    for (const node of allNodes) {
-      // Index by name
-      const byName = this.nameCache.get(node.name);
-      if (byName) {
-        byName.push(node);
-      } else {
-        this.nameCache.set(node.name, [node]);
-      }
-
-      // Index by qualified name
-      const byQName = this.qualifiedNameCache.get(node.qualifiedName);
-      if (byQName) {
-        byQName.push(node);
-      } else {
-        this.qualifiedNameCache.set(node.qualifiedName, [node]);
-      }
-
-      // Index by kind
-      const byKind = this.kindCache.get(node.kind);
-      if (byKind) {
-        byKind.push(node);
-      } else {
-        this.kindCache.set(node.kind, [node]);
-      }
-
-      // Index by ID
-      this.nodeByIdCache.set(node.id, node);
-
-      // Index by lowercase name (for fuzzy matching)
-      const lowerName = node.name.toLowerCase();
-      const byLower = this.lowerNameCache.get(lowerName);
-      if (byLower) {
-        byLower.push(node);
-      } else {
-        this.lowerNameCache.set(lowerName, [node]);
-      }
-    }
-
-    // Pre-build known files set from index
-    this.knownFiles = new Set(this.queries.getAllFiles().map((f) => f.path));
+    // Only cache the set of known file paths (lightweight string set)
+    this.knownFiles = new Set(this.queries.getAllFilePaths());
 
     this.cachesWarmed = true;
   }
@@ -117,11 +74,6 @@ export class ReferenceResolver {
   clearCaches(): void {
     this.nodeCache.clear();
     this.fileCache.clear();
-    this.nameCache.clear();
-    this.qualifiedNameCache.clear();
-    this.kindCache.clear();
-    this.nodeByIdCache.clear();
-    this.lowerNameCache.clear();
     this.importMappingCache.clear();
     this.knownFiles = null;
     this.cachesWarmed = false;
@@ -140,28 +92,14 @@ export class ReferenceResolver {
       },
 
       getNodesByName: (name: string) => {
-        // Use warm cache if available, otherwise fall back to search
-        if (this.cachesWarmed) {
-          return this.nameCache.get(name) ?? [];
-        }
-        return this.queries.searchNodes(name, { limit: 100 }).map((r) => r.node);
+        return this.queries.getNodesByName(name);
       },
 
       getNodesByQualifiedName: (qualifiedName: string) => {
-        // Use warm cache if available, otherwise fall back to search + filter
-        if (this.cachesWarmed) {
-          return this.qualifiedNameCache.get(qualifiedName) ?? [];
-        }
-        return this.queries
-          .searchNodes(qualifiedName, { limit: 50 })
-          .filter((r) => r.node.qualifiedName === qualifiedName)
-          .map((r) => r.node);
+        return this.queries.getNodesByQualifiedNameExact(qualifiedName);
       },
 
       getNodesByKind: (kind: Node['kind']) => {
-        if (this.cachesWarmed) {
-          return this.kindCache.get(kind) ?? [];
-        }
         return this.queries.getNodesByKind(kind);
       },
 
@@ -203,17 +141,11 @@ export class ReferenceResolver {
       getProjectRoot: () => this.projectRoot,
 
       getAllFiles: () => {
-        return this.queries.getAllFiles().map((f) => f.path);
+        return this.queries.getAllFilePaths();
       },
 
       getNodesByLowerName: (lowerName: string) => {
-        if (this.cachesWarmed) {
-          return this.lowerNameCache.get(lowerName) ?? [];
-        }
-        // Fallback: scan all nodes (expensive, but only used if cache not warm)
-        return this.queries.getAllNodes().filter(
-          (n) => n.name.toLowerCase() === lowerName
-        );
+        return this.queries.getNodesByLowerName(lowerName);
       },
 
       getImportMappings: (filePath: string, language) => {
@@ -389,6 +321,87 @@ export class ReferenceResolver {
     return result;
   }
 
+  /**
+   * Resolve and persist in batches to keep memory bounded.
+   * Processes unresolved references in chunks, persisting edges and cleaning
+   * up resolved refs after each batch to avoid accumulating large arrays.
+   */
+  resolveAndPersistBatched(
+    onProgress?: (current: number, total: number) => void,
+    batchSize: number = 5000
+  ): ResolutionResult {
+    this.warmCaches();
+
+    const total = this.queries.getUnresolvedReferencesCount();
+    let processed = 0;
+    const aggregateStats = {
+      total: 0,
+      resolved: 0,
+      unresolved: 0,
+      byMethod: {} as Record<string, number>,
+    };
+
+    // Process in batches. We always read from offset 0 because resolved refs
+    // are deleted after each batch, shifting the remaining rows forward.
+    while (true) {
+      const batch = this.queries.getUnresolvedReferencesBatch(0, batchSize);
+      if (batch.length === 0) break;
+
+      const result = this.resolveAll(batch);
+
+      // Persist edges immediately
+      const edges = this.createEdges(result.resolved);
+      if (edges.length > 0) {
+        this.queries.insertEdges(edges);
+      }
+
+      // Clean up resolved refs so they don't appear in the next batch
+      if (result.resolved.length > 0) {
+        this.queries.deleteSpecificResolvedReferences(
+          result.resolved.map((r) => ({
+            fromNodeId: r.original.fromNodeId,
+            referenceName: r.original.referenceName,
+            referenceKind: r.original.referenceKind,
+          }))
+        );
+      }
+
+      // Delete unresolvable refs from this batch to avoid re-processing them
+      if (result.unresolved.length > 0) {
+        this.queries.deleteSpecificResolvedReferences(
+          result.unresolved.map((r) => ({
+            fromNodeId: r.fromNodeId,
+            referenceName: r.referenceName,
+            referenceKind: r.referenceKind,
+          }))
+        );
+      }
+
+      // Aggregate stats
+      aggregateStats.total += result.stats.total;
+      aggregateStats.resolved += result.stats.resolved;
+      aggregateStats.unresolved += result.stats.unresolved;
+      for (const [method, count] of Object.entries(result.stats.byMethod)) {
+        aggregateStats.byMethod[method] = (aggregateStats.byMethod[method] || 0) + count;
+      }
+
+      processed += batch.length;
+      onProgress?.(processed, total);
+
+      // If nothing was resolved or removed in this batch, we'd loop forever
+      // on the same rows. Break to avoid infinite loop.
+      if (result.resolved.length === 0 && result.unresolved.length === batch.length) {
+        break;
+      }
+    }
+
+    return {
+      resolved: [],
+      unresolved: [],
+      stats: aggregateStats,
+    };
+  }
+
   /**
    * Get detected frameworks
    */
@@ -513,9 +526,6 @@ export class ReferenceResolver {
    * Get file path from node ID
    */
   private getFilePathFromNodeId(nodeId: string): string {
-    // Check warm cache first
-    const cached = this.nodeByIdCache.get(nodeId);
-    if (cached) return cached.filePath;
     const node = this.queries.getNodeById(nodeId);
     return node?.filePath || '';
   }
@@ -524,9 +534,6 @@ export class ReferenceResolver {
    * Get language from node ID
    */
   private getLanguageFromNodeId(nodeId: string): UnresolvedRef['language'] {
-    // Check warm cache first
-    const cached = this.nodeByIdCache.get(nodeId);
-    if (cached) return cached.language;
     const node = this.queries.getNodeById(nodeId);
     return node?.language || 'unknown';
   }

+ 3 - 0
src/types.ts

@@ -232,6 +232,9 @@ export interface ExtractionError {
   /** Error message */
   message: string;
 
+  /** File path where the error occurred */
+  filePath?: string;
+
   /** Line number if available */
   line?: number;