Jelajahi Sumber

Merge pull request #25 from MO2k4/feat/db-performance-schema-v2

feat: Schema v2 migration + database performance
Colby Mchenry 4 bulan lalu
induk
melakukan
adaff67219
5 mengubah file dengan 116 tambahan dan 60 penghapusan
  1. 8 0
      src/db/index.ts
  2. 11 3
      src/db/migrations.ts
  3. 81 53
      src/db/queries.ts
  4. 13 4
      src/db/schema.sql
  5. 3 0
      src/types.ts

+ 8 - 0
src/db/index.ts

@@ -52,6 +52,14 @@ export class DatabaseConnection {
     const schema = fs.readFileSync(schemaPath, 'utf-8');
     db.exec(schema);
 
+    // Record current schema version so migrations aren't re-applied on open
+    const currentVersion = getCurrentVersion(db);
+    if (currentVersion < CURRENT_SCHEMA_VERSION) {
+      db.prepare(
+        'INSERT OR IGNORE INTO schema_versions (version, applied_at, description) VALUES (?, ?, ?)'
+      ).run(CURRENT_SCHEMA_VERSION, Date.now(), 'Initial schema includes all migrations');
+    }
+
     return new DatabaseConnection(db, dbPath);
   }
 

+ 11 - 3
src/db/migrations.ts

@@ -29,11 +29,19 @@ interface Migration {
 const migrations: Migration[] = [
   {
     version: 2,
-    description: 'Add filePath and language to unresolved_refs for performance',
+    description: 'Add project metadata, provenance tracking, and unresolved ref context',
     up: (db) => {
       db.exec(`
-        ALTER TABLE unresolved_refs ADD COLUMN file_path TEXT;
-        ALTER TABLE unresolved_refs ADD COLUMN language TEXT;
+        CREATE TABLE IF NOT EXISTS project_metadata (
+          key TEXT PRIMARY KEY,
+          value TEXT NOT NULL,
+          updated_at INTEGER NOT NULL
+        );
+        ALTER TABLE unresolved_refs ADD COLUMN file_path TEXT NOT NULL DEFAULT '';
+        ALTER TABLE unresolved_refs ADD COLUMN language TEXT NOT NULL DEFAULT 'unknown';
+        ALTER TABLE edges ADD COLUMN provenance TEXT DEFAULT NULL;
+        CREATE INDEX IF NOT EXISTS idx_unresolved_file_path ON unresolved_refs(file_path);
+        CREATE INDEX IF NOT EXISTS idx_edges_provenance ON edges(provenance);
       `);
     },
   },

+ 81 - 53
src/db/queries.ts

@@ -54,6 +54,7 @@ interface EdgeRow {
   metadata: string | null;
   line: number | null;
   col: number | null;
+  provenance: string | null;
 }
 
 interface FileRow {
@@ -74,9 +75,9 @@ interface UnresolvedRefRow {
   reference_kind: string;
   line: number;
   col: number;
-  file_path: string | null;
-  language: string | null;
   candidates: string | null;
+  file_path: string;
+  language: string;
 }
 
 /**
@@ -118,6 +119,7 @@ function rowToEdge(row: EdgeRow): Edge {
     metadata: row.metadata ? safeJsonParse(row.metadata, undefined) : undefined,
     line: row.line ?? undefined,
     column: row.col ?? undefined,
+    provenance: row.provenance as Edge['provenance'],
   };
 }
 
@@ -639,8 +641,8 @@ export class QueryBuilder {
   insertEdge(edge: Edge): void {
     if (!this.stmts.insertEdge) {
       this.stmts.insertEdge = this.db.prepare(`
-        INSERT OR IGNORE INTO edges (source, target, kind, metadata, line, col)
-        VALUES (@source, @target, @kind, @metadata, @line, @col)
+        INSERT OR IGNORE INTO edges (source, target, kind, metadata, line, col, provenance)
+        VALUES (@source, @target, @kind, @metadata, @line, @col, @provenance)
       `);
     }
 
@@ -651,6 +653,7 @@ export class QueryBuilder {
       metadata: edge.metadata ? JSON.stringify(edge.metadata) : null,
       line: edge.line ?? null,
       col: edge.column ?? null,
+      provenance: edge.provenance ?? null,
     });
   }
 
@@ -678,10 +681,22 @@ export class QueryBuilder {
   /**
    * Get outgoing edges from a node
    */
-  getOutgoingEdges(sourceId: string, kinds?: EdgeKind[]): Edge[] {
-    if (kinds && kinds.length > 0) {
-      const sql = `SELECT * FROM edges WHERE source = ? AND kind IN (${kinds.map(() => '?').join(',')})`;
-      const rows = this.db.prepare(sql).all(sourceId, ...kinds) as EdgeRow[];
+  getOutgoingEdges(sourceId: string, kinds?: EdgeKind[], provenance?: string): Edge[] {
+    if ((kinds && kinds.length > 0) || provenance) {
+      let sql = 'SELECT * FROM edges WHERE source = ?';
+      const params: (string | number)[] = [sourceId];
+
+      if (kinds && kinds.length > 0) {
+        sql += ` AND kind IN (${kinds.map(() => '?').join(',')})`;
+        params.push(...kinds);
+      }
+
+      if (provenance) {
+        sql += ' AND provenance = ?';
+        params.push(provenance);
+      }
+
+      const rows = this.db.prepare(sql).all(...params) as EdgeRow[];
       return rows.map(rowToEdge);
     }
 
@@ -800,8 +815,8 @@ export class QueryBuilder {
   insertUnresolvedRef(ref: UnresolvedReference): void {
     if (!this.stmts.insertUnresolved) {
       this.stmts.insertUnresolved = this.db.prepare(`
-        INSERT INTO unresolved_refs (from_node_id, reference_name, reference_kind, line, col, file_path, language, candidates)
-        VALUES (@fromNodeId, @referenceName, @referenceKind, @line, @col, @filePath, @language, @candidates)
+        INSERT INTO unresolved_refs (from_node_id, reference_name, reference_kind, line, col, candidates, file_path, language)
+        VALUES (@fromNodeId, @referenceName, @referenceKind, @line, @col, @candidates, @filePath, @language)
       `);
     }
 
@@ -811,39 +826,23 @@ export class QueryBuilder {
       referenceKind: ref.referenceKind,
       line: ref.line,
       col: ref.column,
-      filePath: ref.filePath ?? null,
-      language: ref.language ?? null,
       candidates: ref.candidates ? JSON.stringify(ref.candidates) : null,
+      filePath: ref.filePath ?? '',
+      language: ref.language ?? 'unknown',
     });
   }
 
   /**
-   * Insert multiple unresolved references in a single transaction
+   * Insert multiple unresolved references in a transaction
    */
   insertUnresolvedRefsBatch(refs: UnresolvedReference[]): void {
     if (refs.length === 0) return;
-
-    if (!this.stmts.insertUnresolved) {
-      this.stmts.insertUnresolved = this.db.prepare(`
-        INSERT INTO unresolved_refs (from_node_id, reference_name, reference_kind, line, col, file_path, language, candidates)
-        VALUES (@fromNodeId, @referenceName, @referenceKind, @line, @col, @filePath, @language, @candidates)
-      `);
-    }
-
-    this.db.transaction(() => {
+    const insert = this.db.transaction(() => {
       for (const ref of refs) {
-        this.stmts.insertUnresolved!.run({
-          fromNodeId: ref.fromNodeId,
-          referenceName: ref.referenceName,
-          referenceKind: ref.referenceKind,
-          line: ref.line,
-          col: ref.column,
-          filePath: ref.filePath ?? null,
-          language: ref.language ?? null,
-          candidates: ref.candidates ? JSON.stringify(ref.candidates) : null,
-        });
+        this.insertUnresolvedRef(ref);
       }
-    })();
+    });
+    insert();
   }
 
   /**
@@ -874,9 +873,9 @@ export class QueryBuilder {
       referenceKind: row.reference_kind as EdgeKind,
       line: row.line,
       column: row.col,
-      filePath: row.file_path ?? undefined,
-      language: (row.language as Language) ?? undefined,
-      candidates: row.candidates ? safeJsonParse<string[]>(row.candidates, []) : undefined,
+      candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
+      filePath: row.file_path,
+      language: row.language as Language,
     }));
   }
 
@@ -891,9 +890,9 @@ export class QueryBuilder {
       referenceKind: row.reference_kind as EdgeKind,
       line: row.line,
       column: row.col,
-      filePath: row.file_path ?? undefined,
-      language: (row.language as Language) ?? undefined,
-      candidates: row.candidates ? safeJsonParse<string[]>(row.candidates, []) : undefined,
+      candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
+      filePath: row.file_path,
+      language: row.language as Language,
     }));
   }
 
@@ -921,17 +920,13 @@ export class QueryBuilder {
    * Get graph statistics
    */
   getStats(): GraphStats {
-    const nodeCount = (
-      this.db.prepare('SELECT COUNT(*) as count FROM nodes').get() as { count: number }
-    ).count;
-
-    const edgeCount = (
-      this.db.prepare('SELECT COUNT(*) as count FROM edges').get() as { count: number }
-    ).count;
-
-    const fileCount = (
-      this.db.prepare('SELECT COUNT(*) as count FROM files').get() as { count: number }
-    ).count;
+    // Single query for all three aggregate counts
+    const counts = this.db.prepare(`
+      SELECT
+        (SELECT COUNT(*) FROM nodes) AS node_count,
+        (SELECT COUNT(*) FROM edges) AS edge_count,
+        (SELECT COUNT(*) FROM files) AS file_count
+    `).get() as { node_count: number; edge_count: number; file_count: number };
 
     const nodesByKind = {} as Record<NodeKind, number>;
     const nodeKindRows = this.db
@@ -958,9 +953,9 @@ export class QueryBuilder {
     }
 
     return {
-      nodeCount,
-      edgeCount,
-      fileCount,
+      nodeCount: counts.node_count,
+      edgeCount: counts.edge_count,
+      fileCount: counts.file_count,
       nodesByKind,
       edgesByKind,
       filesByLanguage,
@@ -969,6 +964,39 @@ export class QueryBuilder {
     };
   }
 
+  // ===========================================================================
+  // Project Metadata
+  // ===========================================================================
+
+  /**
+   * Get a metadata value by key
+   */
+  getMetadata(key: string): string | null {
+    const row = this.db.prepare('SELECT value FROM project_metadata WHERE key = ?').get(key) as { value: string } | undefined;
+    return row?.value ?? null;
+  }
+
+  /**
+   * Set a metadata key-value pair (upsert)
+   */
+  setMetadata(key: string, value: string): void {
+    this.db.prepare(
+      'INSERT INTO project_metadata (key, value, updated_at) VALUES (?, ?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at'
+    ).run(key, value, Date.now());
+  }
+
+  /**
+   * Get all metadata as a key-value record
+   */
+  getAllMetadata(): Record<string, string> {
+    const rows = this.db.prepare('SELECT key, value FROM project_metadata').all() as { key: string; value: string }[];
+    const result: Record<string, string> = {};
+    for (const row of rows) {
+      result[row.key] = row.value;
+    }
+    return result;
+  }
+
   /**
    * Clear all data from the database
    */

+ 13 - 4
src/db/schema.sql

@@ -11,8 +11,6 @@ CREATE TABLE IF NOT EXISTS schema_versions (
 -- Insert initial version
 INSERT INTO schema_versions (version, applied_at, description)
 VALUES (1, strftime('%s', 'now') * 1000, 'Initial schema');
-INSERT INTO schema_versions (version, applied_at, description)
-VALUES (2, strftime('%s', 'now') * 1000, 'Add filePath and language to unresolved_refs');
 
 -- =============================================================================
 -- Core Tables
@@ -51,6 +49,7 @@ CREATE TABLE IF NOT EXISTS edges (
     metadata TEXT, -- JSON object
     line INTEGER,
     col INTEGER,
+    provenance TEXT DEFAULT NULL,
     FOREIGN KEY (source) REFERENCES nodes(id) ON DELETE CASCADE,
     FOREIGN KEY (target) REFERENCES nodes(id) ON DELETE CASCADE
 );
@@ -75,9 +74,9 @@ CREATE TABLE IF NOT EXISTS unresolved_refs (
     reference_kind TEXT NOT NULL,
     line INTEGER NOT NULL,
     col INTEGER NOT NULL,
-    file_path TEXT,
-    language TEXT,
     candidates TEXT, -- JSON array
+    file_path TEXT NOT NULL DEFAULT '',
+    language TEXT NOT NULL DEFAULT 'unknown',
     FOREIGN KEY (from_node_id) REFERENCES nodes(id) ON DELETE CASCADE
 );
 
@@ -136,6 +135,9 @@ CREATE INDEX IF NOT EXISTS idx_files_modified_at ON files(modified_at);
 -- Unresolved refs indexes
 CREATE INDEX IF NOT EXISTS idx_unresolved_from_node ON unresolved_refs(from_node_id);
 CREATE INDEX IF NOT EXISTS idx_unresolved_name ON unresolved_refs(reference_name);
+CREATE INDEX IF NOT EXISTS idx_unresolved_file_path ON unresolved_refs(file_path);
+CREATE INDEX IF NOT EXISTS idx_unresolved_from_name ON unresolved_refs(from_node_id, reference_name);
+CREATE INDEX IF NOT EXISTS idx_edges_provenance ON edges(provenance);
 
 -- =============================================================================
 -- Vector Storage (for future semantic search)
@@ -152,3 +154,10 @@ CREATE TABLE IF NOT EXISTS vectors (
 );
 
 CREATE INDEX IF NOT EXISTS idx_vectors_model ON vectors(model);
+
+-- Project metadata for version/provenance tracking
+CREATE TABLE IF NOT EXISTS project_metadata (
+    key TEXT PRIMARY KEY,
+    value TEXT NOT NULL,
+    updated_at INTEGER NOT NULL
+);

+ 3 - 0
src/types.ts

@@ -166,6 +166,9 @@ export interface Edge {
 
   /** Column number where relationship occurs */
   column?: number;
+
+  /** How this edge was created */
+  provenance?: 'tree-sitter' | 'scip' | 'heuristic';
 }
 
 /**