ソースを参照

Port performance improvements from PR #15

- SQLite performance pragmas: synchronous=NORMAL, 64MB cache,
  memory temp store, 256MB mmap (safe with WAL mode)
- Batch insert for unresolved refs: single transaction instead of
  N individual inserts per file
- Symbol caching (warmCaches): pre-load all nodes into memory maps
  before resolution, eliminating repeated SQLite queries per ref
- Async file I/O: fs.stat/readFile in indexFile() are now non-blocking
- Denormalize filePath/language onto UnresolvedReference: avoids N
  node lookups during resolution, with schema migration v2

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Colby McHenry 4 ヶ月 前
コミット
d80900f653
7 ファイル変更149 行追加23 行削除
  1. 10 0
      src/db/index.ts
  2. 11 12
      src/db/migrations.ts
  3. 47 2
      src/db/queries.ts
  4. 4 0
      src/db/schema.sql
  5. 11 5
      src/extraction/index.ts
  6. 60 4
      src/resolution/index.ts
  7. 6 0
      src/types.ts

+ 10 - 0
src/db/index.ts

@@ -41,6 +41,11 @@ export class DatabaseConnection {
     // Wait up to 2 minutes if database is locked by another process
     // (indexing operations can hold locks for extended periods)
     db.pragma('busy_timeout = 120000');
+    // Performance tuning
+    db.pragma('synchronous = NORMAL');     // Safe with WAL mode
+    db.pragma('cache_size = -64000');      // 64 MB page cache
+    db.pragma('temp_store = MEMORY');      // Temp tables in memory
+    db.pragma('mmap_size = 268435456');    // 256 MB memory-mapped I/O
 
     // Run schema initialization
     const schemaPath = path.join(__dirname, 'schema.sql');
@@ -66,6 +71,11 @@ export class DatabaseConnection {
     // Wait up to 2 minutes if database is locked by another process
     // (indexing operations can hold locks for extended periods)
     db.pragma('busy_timeout = 120000');
+    // Performance tuning
+    db.pragma('synchronous = NORMAL');
+    db.pragma('cache_size = -64000');
+    db.pragma('temp_store = MEMORY');
+    db.pragma('mmap_size = 268435456');
 
     // Check and run migrations if needed
     const conn = new DatabaseConnection(db, dbPath);

+ 11 - 12
src/db/migrations.ts

@@ -9,7 +9,7 @@ import Database from 'better-sqlite3';
 /**
  * Current schema version
  */
-export const CURRENT_SCHEMA_VERSION = 1;
+export const CURRENT_SCHEMA_VERSION = 2;
 
 /**
  * Migration definition
@@ -27,17 +27,16 @@ interface Migration {
  * Future migrations go here.
  */
 const migrations: Migration[] = [
-  // Example migration for version 2 (when needed):
-  // {
-  //   version: 2,
-  //   description: 'Add support for module resolution',
-  //   up: (db) => {
-  //     db.exec(`
-  //       ALTER TABLE nodes ADD COLUMN module_path TEXT;
-  //       CREATE INDEX idx_nodes_module_path ON nodes(module_path);
-  //     `);
-  //   },
-  // },
+  {
+    version: 2,
+    description: 'Add filePath and language to unresolved_refs for performance',
+    up: (db) => {
+      db.exec(`
+        ALTER TABLE unresolved_refs ADD COLUMN file_path TEXT;
+        ALTER TABLE unresolved_refs ADD COLUMN language TEXT;
+      `);
+    },
+  },
 ];
 
 /**

+ 47 - 2
src/db/queries.ts

@@ -73,6 +73,8 @@ interface UnresolvedRefRow {
   reference_kind: string;
   line: number;
   col: number;
+  file_path: string | null;
+  language: string | null;
   candidates: string | null;
 }
 
@@ -422,6 +424,14 @@ export class QueryBuilder {
     return rows.map(rowToNode);
   }
 
+  /**
+   * Get all nodes in the database
+   */
+  getAllNodes(): Node[] {
+    const rows = this.db.prepare('SELECT * FROM nodes').all() as NodeRow[];
+    return rows.map(rowToNode);
+  }
+
   /**
    * Search nodes by name using FTS with fallback to LIKE for better matching
    *
@@ -778,8 +788,8 @@ export class QueryBuilder {
   insertUnresolvedRef(ref: UnresolvedReference): void {
     if (!this.stmts.insertUnresolved) {
       this.stmts.insertUnresolved = this.db.prepare(`
-        INSERT INTO unresolved_refs (from_node_id, reference_name, reference_kind, line, col, candidates)
-        VALUES (@fromNodeId, @referenceName, @referenceKind, @line, @col, @candidates)
+        INSERT INTO unresolved_refs (from_node_id, reference_name, reference_kind, line, col, file_path, language, candidates)
+        VALUES (@fromNodeId, @referenceName, @referenceKind, @line, @col, @filePath, @language, @candidates)
       `);
     }
 
@@ -789,10 +799,41 @@ export class QueryBuilder {
       referenceKind: ref.referenceKind,
       line: ref.line,
       col: ref.column,
+      filePath: ref.filePath ?? null,
+      language: ref.language ?? null,
       candidates: ref.candidates ? JSON.stringify(ref.candidates) : null,
     });
   }
 
+  /**
+   * Insert multiple unresolved references in a single transaction
+   */
+  insertUnresolvedRefsBatch(refs: UnresolvedReference[]): void {
+    if (refs.length === 0) return;
+
+    if (!this.stmts.insertUnresolved) {
+      this.stmts.insertUnresolved = this.db.prepare(`
+        INSERT INTO unresolved_refs (from_node_id, reference_name, reference_kind, line, col, file_path, language, candidates)
+        VALUES (@fromNodeId, @referenceName, @referenceKind, @line, @col, @filePath, @language, @candidates)
+      `);
+    }
+
+    this.db.transaction(() => {
+      for (const ref of refs) {
+        this.stmts.insertUnresolved!.run({
+          fromNodeId: ref.fromNodeId,
+          referenceName: ref.referenceName,
+          referenceKind: ref.referenceKind,
+          line: ref.line,
+          col: ref.column,
+          filePath: ref.filePath ?? null,
+          language: ref.language ?? null,
+          candidates: ref.candidates ? JSON.stringify(ref.candidates) : null,
+        });
+      }
+    })();
+  }
+
   /**
    * Delete unresolved references from a node
    */
@@ -821,6 +862,8 @@ export class QueryBuilder {
       referenceKind: row.reference_kind as EdgeKind,
       line: row.line,
       column: row.col,
+      filePath: row.file_path ?? undefined,
+      language: (row.language as Language) ?? undefined,
       candidates: row.candidates ? safeJsonParse<string[]>(row.candidates, []) : undefined,
     }));
   }
@@ -836,6 +879,8 @@ export class QueryBuilder {
       referenceKind: row.reference_kind as EdgeKind,
       line: row.line,
       column: row.col,
+      filePath: row.file_path ?? undefined,
+      language: (row.language as Language) ?? undefined,
       candidates: row.candidates ? safeJsonParse<string[]>(row.candidates, []) : undefined,
     }));
   }

+ 4 - 0
src/db/schema.sql

@@ -11,6 +11,8 @@ CREATE TABLE IF NOT EXISTS schema_versions (
 -- Insert initial version
 INSERT INTO schema_versions (version, applied_at, description)
 VALUES (1, strftime('%s', 'now') * 1000, 'Initial schema');
+INSERT INTO schema_versions (version, applied_at, description)
+VALUES (2, strftime('%s', 'now') * 1000, 'Add filePath and language to unresolved_refs');
 
 -- =============================================================================
 -- Core Tables
@@ -73,6 +75,8 @@ CREATE TABLE IF NOT EXISTS unresolved_refs (
     reference_kind TEXT NOT NULL,
     line INTEGER NOT NULL,
     col INTEGER NOT NULL,
+    file_path TEXT,
+    language TEXT,
     candidates TEXT, -- JSON array
     FOREIGN KEY (from_node_id) REFERENCES nodes(id) ON DELETE CASCADE
 );

+ 11 - 5
src/extraction/index.ts

@@ -5,6 +5,7 @@
  */
 
 import * as fs from 'fs';
+import * as fsp from 'fs/promises';
 import * as path from 'path';
 import * as crypto from 'crypto';
 import {
@@ -399,8 +400,8 @@ export class ExtractionOrchestrator {
     let content: string;
     let stats: fs.Stats;
     try {
-      stats = fs.statSync(fullPath);
-      content = fs.readFileSync(fullPath, 'utf-8');
+      stats = await fsp.stat(fullPath);
+      content = await fsp.readFile(fullPath, 'utf-8');
     } catch (error) {
       captureException(error, { operation: 'extract-file', filePath: fullPath });
       return {
@@ -489,9 +490,14 @@ export class ExtractionOrchestrator {
       this.queries.insertEdges(result.edges);
     }
 
-    // Insert unresolved references
-    for (const ref of result.unresolvedReferences) {
-      this.queries.insertUnresolvedRef(ref);
+    // Insert unresolved references in batch with denormalized filePath/language
+    if (result.unresolvedReferences.length > 0) {
+      const refsWithContext = result.unresolvedReferences.map((ref) => ({
+        ...ref,
+        filePath: ref.filePath ?? filePath,
+        language: ref.language ?? language,
+      }));
+      this.queries.insertUnresolvedRefsBatch(refsWithContext);
     }
 
     // Insert file record

+ 60 - 4
src/resolution/index.ts

@@ -36,6 +36,10 @@ export class ReferenceResolver {
   private frameworks: FrameworkResolver[] = [];
   private nodeCache: Map<string, Node[]> = new Map();
   private fileCache: Map<string, string | null> = new Map();
+  private nameCache: Map<string, Node[]> = new Map();
+  private qualifiedNameCache: Map<string, Node[]> = new Map();
+  private nodeByIdCache: Map<string, Node> = new Map();
+  private cachesWarmed = false;
 
   constructor(projectRoot: string, queries: QueryBuilder) {
     this.projectRoot = projectRoot;
@@ -51,12 +55,48 @@ export class ReferenceResolver {
     this.clearCaches();
   }
 
+  /**
+   * Pre-load all nodes into memory maps for fast lookup during resolution.
+   * This eliminates repeated SQLite queries and provides the core speedup.
+   */
+  warmCaches(): void {
+    if (this.cachesWarmed) return;
+
+    const allNodes = this.queries.getAllNodes();
+    for (const node of allNodes) {
+      // Index by name
+      const byName = this.nameCache.get(node.name);
+      if (byName) {
+        byName.push(node);
+      } else {
+        this.nameCache.set(node.name, [node]);
+      }
+
+      // Index by qualified name
+      const byQName = this.qualifiedNameCache.get(node.qualifiedName);
+      if (byQName) {
+        byQName.push(node);
+      } else {
+        this.qualifiedNameCache.set(node.qualifiedName, [node]);
+      }
+
+      // Index by ID
+      this.nodeByIdCache.set(node.id, node);
+    }
+
+    this.cachesWarmed = true;
+  }
+
   /**
    * Clear internal caches
    */
   clearCaches(): void {
     this.nodeCache.clear();
     this.fileCache.clear();
+    this.nameCache.clear();
+    this.qualifiedNameCache.clear();
+    this.nodeByIdCache.clear();
+    this.cachesWarmed = false;
   }
 
   /**
@@ -72,11 +112,18 @@ export class ReferenceResolver {
       },
 
       getNodesByName: (name: string) => {
+        // Use warm cache if available, otherwise fall back to search
+        if (this.cachesWarmed) {
+          return this.nameCache.get(name) ?? [];
+        }
         return this.queries.searchNodes(name, { limit: 100 }).map((r) => r.node);
       },
 
       getNodesByQualifiedName: (qualifiedName: string) => {
-        // Search for exact qualified name match
+        // Use warm cache if available, otherwise fall back to search + filter
+        if (this.cachesWarmed) {
+          return this.qualifiedNameCache.get(qualifiedName) ?? [];
+        }
         return this.queries
           .searchNodes(qualifiedName, { limit: 50 })
           .filter((r) => r.node.qualifiedName === qualifiedName)
@@ -131,19 +178,22 @@ export class ReferenceResolver {
     unresolvedRefs: UnresolvedReference[],
     onProgress?: (current: number, total: number) => void
   ): ResolutionResult {
+    // Pre-load all nodes into memory for fast lookups
+    this.warmCaches();
+
     const resolved: ResolvedRef[] = [];
     const unresolved: UnresolvedRef[] = [];
     const byMethod: Record<string, number> = {};
 
-    // Convert to our internal format
+    // Convert to our internal format, using denormalized fields when available
     const refs: UnresolvedRef[] = unresolvedRefs.map((ref) => ({
       fromNodeId: ref.fromNodeId,
       referenceName: ref.referenceName,
       referenceKind: ref.referenceKind,
       line: ref.line,
       column: ref.column,
-      filePath: this.getFilePathFromNodeId(ref.fromNodeId),
-      language: this.getLanguageFromNodeId(ref.fromNodeId),
+      filePath: ref.filePath || this.getFilePathFromNodeId(ref.fromNodeId),
+      language: ref.language || this.getLanguageFromNodeId(ref.fromNodeId),
     }));
 
     const total = refs.length;
@@ -311,6 +361,9 @@ export class ReferenceResolver {
    * Get file path from node ID
    */
   private getFilePathFromNodeId(nodeId: string): string {
+    // Check warm cache first
+    const cached = this.nodeByIdCache.get(nodeId);
+    if (cached) return cached.filePath;
     const node = this.queries.getNodeById(nodeId);
     return node?.filePath || '';
   }
@@ -319,6 +372,9 @@ export class ReferenceResolver {
    * Get language from node ID
    */
   private getLanguageFromNodeId(nodeId: string): UnresolvedRef['language'] {
+    // Check warm cache first
+    const cached = this.nodeByIdCache.get(nodeId);
+    if (cached) return cached.language;
     const node = this.queries.getNodeById(nodeId);
     return node?.language || 'unknown';
   }

+ 6 - 0
src/types.ts

@@ -257,6 +257,12 @@ export interface UnresolvedReference {
   line: number;
   column: number;
 
+  /** File path where reference occurs (denormalized for performance) */
+  filePath?: string;
+
+  /** Language of the source file (denormalized for performance) */
+  language?: Language;
+
   /** Possible qualified names it might resolve to */
   candidates?: string[];
 }