|
|
@@ -16,7 +16,8 @@ import * as path from 'path';
|
|
|
import * as os from 'os';
|
|
|
import { DatabaseConnection } from '../src/db';
|
|
|
import { QueryBuilder } from '../src/db/queries';
|
|
|
-import { Node } from '../src/types';
|
|
|
+import { runMigrations, getCurrentVersion } from '../src/db/migrations';
|
|
|
+import { Node, Edge } from '../src/types';
|
|
|
|
|
|
function makeNode(id: string, name = id): Node {
|
|
|
return {
|
|
|
@@ -249,3 +250,111 @@ describe('runMaintenance', () => {
|
|
|
expect(() => db.runMaintenance()).not.toThrow();
|
|
|
});
|
|
|
});
|
|
|
+
|
|
|
+// The edges table carried no UNIQUE constraint, so `insertEdge`'s
|
|
|
+// `INSERT OR IGNORE` had nothing to conflict on and silently admitted
|
|
|
+// byte-identical duplicate rows when two passes emitted the same edge (#1034).
|
|
|
+// A UNIQUE identity index — `(source, target, kind, IFNULL(line,-1),
|
|
|
+// IFNULL(col,-1))` — makes OR IGNORE actually dedup.
|
|
|
+describe('edge identity uniqueness (#1034)', () => {
|
|
|
+ let dir: string;
|
|
|
+ let db: DatabaseConnection;
|
|
|
+ let q: QueryBuilder;
|
|
|
+
|
|
|
+ beforeEach(() => {
|
|
|
+ dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-edge-uniq-'));
|
|
|
+ db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
|
|
|
+ q = new QueryBuilder(db.getDb());
|
|
|
+ q.insertNodes([makeNode('A'), makeNode('B')]);
|
|
|
+ });
|
|
|
+
|
|
|
+ afterEach(() => {
|
|
|
+ db.close();
|
|
|
+ if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
|
|
|
+ });
|
|
|
+
|
|
|
+ const edgeCount = () =>
|
|
|
+ (db.getDb().prepare('SELECT count(*) AS c FROM edges').get() as { c: number }).c;
|
|
|
+ const mk = (over: Partial<Edge> = {}): Edge => ({
|
|
|
+ source: 'A',
|
|
|
+ target: 'B',
|
|
|
+ kind: 'references',
|
|
|
+ line: 153,
|
|
|
+ column: 12,
|
|
|
+ metadata: { resolvedBy: 'exact-match' },
|
|
|
+ ...over,
|
|
|
+ });
|
|
|
+
|
|
|
+ it('a fresh database has the identity index', () => {
|
|
|
+ const idx = db
|
|
|
+ .getDb()
|
|
|
+ .prepare("SELECT name FROM sqlite_master WHERE type='index' AND name='idx_edges_identity'")
|
|
|
+ .get();
|
|
|
+ expect(idx).toBeTruthy();
|
|
|
+ });
|
|
|
+
|
|
|
+ it('collapses byte-identical edges to a single row', () => {
|
|
|
+ q.insertEdges([mk(), mk(), mk()]);
|
|
|
+ expect(edgeCount()).toBe(1);
|
|
|
+ });
|
|
|
+
|
|
|
+ it('dedups even when only the metadata differs (same structural identity)', () => {
|
|
|
+ q.insertEdges([mk({ metadata: { resolvedBy: 'exact-match' } }), mk({ metadata: { resolvedBy: 'import' } })]);
|
|
|
+ expect(edgeCount()).toBe(1);
|
|
|
+ });
|
|
|
+
|
|
|
+ it('keeps edges that differ in line/col — distinct call sites are not duplicates', () => {
|
|
|
+ q.insertEdges([mk({ column: 12 }), mk({ column: 99 }), mk({ line: 200, column: 1 })]);
|
|
|
+ expect(edgeCount()).toBe(3);
|
|
|
+ });
|
|
|
+
|
|
|
+ it('dedups coordinate-less edges, folding NULL line/col via IFNULL', () => {
|
|
|
+ q.insertEdges([mk({ line: undefined, column: undefined }), mk({ line: undefined, column: undefined })]);
|
|
|
+ expect(edgeCount()).toBe(1);
|
|
|
+ });
|
|
|
+
|
|
|
+ it('dedups across separate insert calls (storage constraint, not a per-batch dedup)', () => {
|
|
|
+ q.insertEdges([mk()]);
|
|
|
+ q.insertEdges([mk()]);
|
|
|
+ expect(edgeCount()).toBe(1);
|
|
|
+ });
|
|
|
+});
|
|
|
+
|
|
|
+describe('migration v6: dedup edges + add identity index on upgrade (#1034)', () => {
|
|
|
+ it('collapses pre-existing duplicate rows, keeps distinct ones, and restores the constraint', () => {
|
|
|
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-mig6-'));
|
|
|
+ const db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
|
|
|
+ const raw = db.getDb();
|
|
|
+ const q = new QueryBuilder(raw);
|
|
|
+ q.insertNodes([makeNode('A'), makeNode('B')]);
|
|
|
+
|
|
|
+ // Recreate a pre-v6 database: without the identity index, `INSERT OR IGNORE`
|
|
|
+ // admits duplicates. Revert the recorded version so migration v6 will re-run.
|
|
|
+ raw.exec('DROP INDEX IF EXISTS idx_edges_identity');
|
|
|
+ raw.prepare('DELETE FROM schema_versions WHERE version >= 6').run();
|
|
|
+ q.insertEdges([
|
|
|
+ { source: 'A', target: 'B', kind: 'references', line: 153, column: 12, metadata: { resolvedBy: 'exact-match' } },
|
|
|
+ { source: 'A', target: 'B', kind: 'references', line: 153, column: 12, metadata: { resolvedBy: 'exact-match' } },
|
|
|
+ { source: 'A', target: 'B', kind: 'calls', line: 200, column: 4 },
|
|
|
+ ]);
|
|
|
+ const count = () => (raw.prepare('SELECT count(*) AS c FROM edges').get() as { c: number }).c;
|
|
|
+ expect(count()).toBe(3); // duplicate admitted while the index was absent
|
|
|
+
|
|
|
+ runMigrations(raw, 5);
|
|
|
+
|
|
|
+ expect(count()).toBe(2); // duplicate collapsed, the distinct `calls` edge kept
|
|
|
+ expect(getCurrentVersion(raw)).toBe(6);
|
|
|
+ const idx = raw
|
|
|
+ .prepare("SELECT name FROM sqlite_master WHERE type='index' AND name='idx_edges_identity'")
|
|
|
+ .get();
|
|
|
+ expect(idx).toBeTruthy();
|
|
|
+ // The constraint now holds — re-inserting the duplicate is a no-op.
|
|
|
+ q.insertEdges([
|
|
|
+ { source: 'A', target: 'B', kind: 'references', line: 153, column: 12, metadata: { resolvedBy: 'x' } },
|
|
|
+ ]);
|
|
|
+ expect(count()).toBe(2);
|
|
|
+
|
|
|
+ db.close();
|
|
|
+ fs.rmSync(dir, { recursive: true, force: true });
|
|
|
+ });
|
|
|
+});
|