Browse Source

fix(db): skip orphaned edges during batch insert (#462)

Nandhis 3 weeks ago
parent
commit
572b1ede18
2 changed files with 81 additions and 3 deletions
  1. 46 0
      __tests__/db-perf.test.ts
  2. 35 3
      src/db/queries.ts

+ 46 - 0
__tests__/db-perf.test.ts

@@ -7,6 +7,7 @@
  *      doesn't serve a stale cached row on next `getNodeById`.
  *   3. `runMaintenance` runs `PRAGMA optimize` + `wal_checkpoint(PASSIVE)`
  *      after indexAll/sync without throwing.
+ *   4. `insertEdges` validates endpoints from the DB, not stale node cache.
  */
 
 import { describe, it, expect, beforeEach, afterEach } from 'vitest';
@@ -128,6 +129,51 @@ describe('insertNode cache invalidation', () => {
   });
 });
 
+describe('insertEdges endpoint validation', () => {
+  let dir: string;
+  let db: DatabaseConnection;
+  let q: QueryBuilder;
+
+  beforeEach(() => {
+    dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-edges-'));
+    db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
+    q = new QueryBuilder(db.getDb());
+  });
+
+  afterEach(() => {
+    db.close();
+    if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('skips edges with missing endpoints instead of failing the whole batch', () => {
+    q.insertNodes([makeNode('source'), makeNode('target'), makeNode('other')]);
+
+    expect(() =>
+      q.insertEdges([
+        { source: 'source', target: 'target', kind: 'calls' },
+        { source: 'source', target: 'missing-target', kind: 'calls' },
+        { source: 'missing-source', target: 'other', kind: 'references' },
+      ])
+    ).not.toThrow();
+
+    const edges = q.getOutgoingEdges('source');
+    expect(edges).toHaveLength(1);
+    expect(edges[0]).toMatchObject({ source: 'source', target: 'target', kind: 'calls' });
+  });
+
+  it('does not trust stale cached nodes when validating edge endpoints', () => {
+    q.insertNodes([makeNode('source'), makeNode('target')]);
+    expect(q.getNodeById('target')!.id).toBe('target');
+
+    db.getDb().prepare('DELETE FROM nodes WHERE id = ?').run('target');
+
+    expect(() =>
+      q.insertEdges([{ source: 'source', target: 'target', kind: 'calls' }])
+    ).not.toThrow();
+    expect(q.getOutgoingEdges('source')).toEqual([]);
+  });
+});
+
 describe('runMaintenance', () => {
   let dir: string;
   let db: DatabaseConnection;

+ 35 - 3
src/db/queries.ts

@@ -21,6 +21,8 @@ import { safeJsonParse } from '../utils';
 import { kindBonus, nameMatchBonus, scorePathRelevance } from '../search/query-utils';
 import { parseQuery, boundedEditDistance } from '../search/query-parser';
 
+const SQLITE_PARAM_CHUNK_SIZE = 500;
+
 /**
  * Database row types (snake_case from SQLite)
  */
@@ -419,9 +421,8 @@ export class QueryBuilder {
     // Chunk under SQLite's parameter limit (default 999, raised to 32766
     // in better-sqlite3 builds — chunk at 500 for safety across both
     // backends and to keep the query plan simple).
-    const CHUNK = 500;
-    for (let i = 0; i < misses.length; i += CHUNK) {
-      const chunk = misses.slice(i, i + CHUNK);
+    for (let i = 0; i < misses.length; i += SQLITE_PARAM_CHUNK_SIZE) {
+      const chunk = misses.slice(i, i + SQLITE_PARAM_CHUNK_SIZE);
       const placeholders = chunk.map(() => '?').join(',');
       const rows = this.db
         .prepare(`SELECT * FROM nodes WHERE id IN (${placeholders})`)
@@ -435,6 +436,25 @@ export class QueryBuilder {
     return out;
   }
 
+  private getExistingNodeIds(ids: readonly string[]): Set<string> {
+    const out = new Set<string>();
+    if (ids.length === 0) return out;
+
+    const uniqueIds = [...new Set(ids)];
+    for (let i = 0; i < uniqueIds.length; i += SQLITE_PARAM_CHUNK_SIZE) {
+      const chunk = uniqueIds.slice(i, i + SQLITE_PARAM_CHUNK_SIZE);
+      const placeholders = chunk.map(() => '?').join(',');
+      const rows = this.db
+        .prepare(`SELECT id FROM nodes WHERE id IN (${placeholders})`)
+        .all(...chunk) as { id: string }[];
+      for (const row of rows) {
+        out.add(row.id);
+      }
+    }
+
+    return out;
+  }
+
   /**
    * Add a node to the cache, evicting oldest if needed
    */
@@ -1039,8 +1059,20 @@ export class QueryBuilder {
    * Insert multiple edges in a transaction
    */
   insertEdges(edges: Edge[]): void {
+    if (edges.length === 0) return;
+
     this.db.transaction(() => {
+      const endpointIds = new Set<string>();
       for (const edge of edges) {
+        endpointIds.add(edge.source);
+        endpointIds.add(edge.target);
+      }
+      const existingNodeIds = this.getExistingNodeIds([...endpointIds]);
+
+      for (const edge of edges) {
+        if (!existingNodeIds.has(edge.source) || !existingNodeIds.has(edge.target)) {
+          continue;
+        }
         this.insertEdge(edge);
       }
     })();