db-perf.test.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /**
  2. * DB Performance / Correctness Tests
  3. *
  4. * Regression tests for three changes:
  5. * 1. Batch `getNodesByIds` collapses graph-traversal N+1 reads.
  6. * 2. `insertNode` invalidates the LRU cache so INSERT OR REPLACE
  7. * doesn't serve a stale cached row on next `getNodeById`.
  8. * 3. `runMaintenance` runs `PRAGMA optimize` + `wal_checkpoint(PASSIVE)`
  9. * after indexAll/sync without throwing.
  10. * 4. `insertEdges` validates endpoints from the DB, not stale node cache.
  11. */
  12. import { describe, it, expect, beforeEach, afterEach } from 'vitest';
  13. import * as fs from 'fs';
  14. import * as path from 'path';
  15. import * as os from 'os';
  16. import { DatabaseConnection } from '../src/db';
  17. import { QueryBuilder } from '../src/db/queries';
  18. import { runMigrations, getCurrentVersion } from '../src/db/migrations';
  19. import { Node, Edge } from '../src/types';
  20. function makeNode(id: string, name = id): Node {
  21. return {
  22. id,
  23. kind: 'function',
  24. name,
  25. qualifiedName: name,
  26. filePath: 'a.ts',
  27. language: 'typescript',
  28. startLine: 1,
  29. endLine: 1,
  30. startColumn: 0,
  31. endColumn: 0,
  32. updatedAt: Date.now(),
  33. };
  34. }
  35. describe('getNodesByIds (batch lookup)', () => {
  36. let dir: string;
  37. let db: DatabaseConnection;
  38. let q: QueryBuilder;
  39. beforeEach(() => {
  40. dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-batch-'));
  41. db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
  42. q = new QueryBuilder(db.getDb());
  43. });
  44. afterEach(() => {
  45. db.close();
  46. if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
  47. });
  48. it('returns a Map keyed by id, with one entry per existing node', () => {
  49. q.insertNodes([makeNode('n1'), makeNode('n2'), makeNode('n3')]);
  50. const out = q.getNodesByIds(['n1', 'n2', 'n3']);
  51. expect(out.size).toBe(3);
  52. expect(out.get('n1')!.name).toBe('n1');
  53. expect(out.get('n3')!.name).toBe('n3');
  54. });
  55. it('omits missing IDs from the result map (no nulls, no exceptions)', () => {
  56. q.insertNodes([makeNode('n1'), makeNode('n2')]);
  57. const out = q.getNodesByIds(['n1', 'missing', 'n2']);
  58. expect(out.size).toBe(2);
  59. expect(out.has('missing')).toBe(false);
  60. expect(out.has('n1')).toBe(true);
  61. expect(out.has('n2')).toBe(true);
  62. });
  63. it('handles an empty input array', () => {
  64. expect(q.getNodesByIds([]).size).toBe(0);
  65. });
  66. it('handles batches over the SQLite parameter limit (chunking)', () => {
  67. // Insert 1500 nodes; the helper chunks at 500 internally.
  68. const nodes = Array.from({ length: 1500 }, (_, i) => makeNode(`n${i}`));
  69. q.insertNodes(nodes);
  70. const ids = nodes.map((n) => n.id);
  71. const out = q.getNodesByIds(ids);
  72. expect(out.size).toBe(1500);
  73. // Spot-check a few from the first / middle / last chunk.
  74. expect(out.has('n0')).toBe(true);
  75. expect(out.has('n750')).toBe(true);
  76. expect(out.has('n1499')).toBe(true);
  77. });
  78. it('serves cache hits from memory and queries only the misses', () => {
  79. q.insertNodes([makeNode('n1'), makeNode('n2'), makeNode('n3')]);
  80. // Warm the cache for n1 only.
  81. q.getNodeById('n1');
  82. // Replace the underlying row to make a miss-vs-cache-hit detectable.
  83. db.getDb().prepare('UPDATE nodes SET name = ? WHERE id = ?').run('changed', 'n1');
  84. const out = q.getNodesByIds(['n1', 'n2']);
  85. // The cached n1 (still 'n1', not 'changed') must be returned.
  86. expect(out.get('n1')!.name).toBe('n1');
  87. expect(out.get('n2')!.name).toBe('n2');
  88. });
  89. });
  90. describe('deleteResolvedReferences (chunking)', () => {
  91. let dir: string;
  92. let db: DatabaseConnection;
  93. let q: QueryBuilder;
  94. beforeEach(() => {
  95. dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-delref-'));
  96. db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
  97. q = new QueryBuilder(db.getDb());
  98. });
  99. afterEach(() => {
  100. db.close();
  101. if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
  102. });
  103. it('deletes unresolved refs for more ids than the SQLite parameter limit (#1001)', () => {
  104. // Regression: this method bound every id as one parameter in a single
  105. // IN (...), so passing more ids than SQLITE_MAX_VARIABLE_NUMBER (32766 on
  106. // the bundled node:sqlite) threw "too many SQL variables". Use 33000 to
  107. // clear that ceiling. from_node_id has a FK to nodes, so insert nodes first.
  108. const nodes = Array.from({ length: 33000 }, (_, i) => makeNode(`n${i}`));
  109. q.insertNodes(nodes);
  110. q.insertUnresolvedRefsBatch(
  111. nodes.map((n) => ({
  112. fromNodeId: n.id,
  113. referenceName: 'someName',
  114. referenceKind: 'calls',
  115. line: 1,
  116. column: 0,
  117. }))
  118. );
  119. expect(q.getUnresolvedReferencesCount()).toBe(33000);
  120. const ids = nodes.map((n) => n.id);
  121. expect(() => q.deleteResolvedReferences(ids)).not.toThrow();
  122. expect(q.getUnresolvedReferencesCount()).toBe(0);
  123. });
  124. it('handles an empty input array', () => {
  125. expect(() => q.deleteResolvedReferences([])).not.toThrow();
  126. });
  127. });
  128. describe('insertNode cache invalidation', () => {
  129. let dir: string;
  130. let db: DatabaseConnection;
  131. let q: QueryBuilder;
  132. beforeEach(() => {
  133. dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-cache-'));
  134. db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
  135. q = new QueryBuilder(db.getDb());
  136. });
  137. afterEach(() => {
  138. db.close();
  139. if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
  140. });
  141. it('does not serve a stale cached node after INSERT OR REPLACE', () => {
  142. // Regression: insertNode (which uses INSERT OR REPLACE) used to skip
  143. // cache invalidation, so the next getNodeById returned the pre-replace
  144. // version until LRU eviction.
  145. const original = makeNode('n1', 'oldName');
  146. q.insertNode(original);
  147. const beforeReplace = q.getNodeById('n1');
  148. expect(beforeReplace!.name).toBe('oldName');
  149. // Replace via insertNode (the bug path).
  150. q.insertNode({ ...original, name: 'newName', updatedAt: Date.now() });
  151. const afterReplace = q.getNodeById('n1');
  152. expect(afterReplace!.name).toBe('newName');
  153. });
  154. });
  155. describe('insertEdges endpoint validation', () => {
  156. let dir: string;
  157. let db: DatabaseConnection;
  158. let q: QueryBuilder;
  159. beforeEach(() => {
  160. dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-edges-'));
  161. db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
  162. q = new QueryBuilder(db.getDb());
  163. });
  164. afterEach(() => {
  165. db.close();
  166. if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
  167. });
  168. it('skips edges with missing endpoints instead of failing the whole batch', () => {
  169. q.insertNodes([makeNode('source'), makeNode('target'), makeNode('other')]);
  170. expect(() =>
  171. q.insertEdges([
  172. { source: 'source', target: 'target', kind: 'calls' },
  173. { source: 'source', target: 'missing-target', kind: 'calls' },
  174. { source: 'missing-source', target: 'other', kind: 'references' },
  175. ])
  176. ).not.toThrow();
  177. const edges = q.getOutgoingEdges('source');
  178. expect(edges).toHaveLength(1);
  179. expect(edges[0]).toMatchObject({ source: 'source', target: 'target', kind: 'calls' });
  180. });
  181. it('does not trust stale cached nodes when validating edge endpoints', () => {
  182. q.insertNodes([makeNode('source'), makeNode('target')]);
  183. expect(q.getNodeById('target')!.id).toBe('target');
  184. db.getDb().prepare('DELETE FROM nodes WHERE id = ?').run('target');
  185. expect(() =>
  186. q.insertEdges([{ source: 'source', target: 'target', kind: 'calls' }])
  187. ).not.toThrow();
  188. expect(q.getOutgoingEdges('source')).toEqual([]);
  189. });
  190. });
  191. describe('runMaintenance', () => {
  192. let dir: string;
  193. let db: DatabaseConnection;
  194. beforeEach(() => {
  195. dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-perf-maint-'));
  196. db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
  197. });
  198. afterEach(() => {
  199. db.close();
  200. if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
  201. });
  202. it('runs without throwing on a fresh database', () => {
  203. expect(() => db.runMaintenance()).not.toThrow();
  204. });
  205. it('runs without throwing after writes', () => {
  206. const q = new QueryBuilder(db.getDb());
  207. q.insertNodes([makeNode('n1'), makeNode('n2')]);
  208. expect(() => db.runMaintenance()).not.toThrow();
  209. });
  210. it('swallows failures rather than propagating (best-effort)', () => {
  211. // Close the DB so the underlying handle would normally throw on any
  212. // exec(). runMaintenance must still not propagate.
  213. db.close();
  214. expect(() => db.runMaintenance()).not.toThrow();
  215. });
  216. });
  217. // The edges table carried no UNIQUE constraint, so `insertEdge`'s
  218. // `INSERT OR IGNORE` had nothing to conflict on and silently admitted
  219. // byte-identical duplicate rows when two passes emitted the same edge (#1034).
  220. // A UNIQUE identity index — `(source, target, kind, IFNULL(line,-1),
  221. // IFNULL(col,-1))` — makes OR IGNORE actually dedup.
  222. describe('edge identity uniqueness (#1034)', () => {
  223. let dir: string;
  224. let db: DatabaseConnection;
  225. let q: QueryBuilder;
  226. beforeEach(() => {
  227. dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-edge-uniq-'));
  228. db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
  229. q = new QueryBuilder(db.getDb());
  230. q.insertNodes([makeNode('A'), makeNode('B')]);
  231. });
  232. afterEach(() => {
  233. db.close();
  234. if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
  235. });
  236. const edgeCount = () =>
  237. (db.getDb().prepare('SELECT count(*) AS c FROM edges').get() as { c: number }).c;
  238. const mk = (over: Partial<Edge> = {}): Edge => ({
  239. source: 'A',
  240. target: 'B',
  241. kind: 'references',
  242. line: 153,
  243. column: 12,
  244. metadata: { resolvedBy: 'exact-match' },
  245. ...over,
  246. });
  247. it('a fresh database has the identity index', () => {
  248. const idx = db
  249. .getDb()
  250. .prepare("SELECT name FROM sqlite_master WHERE type='index' AND name='idx_edges_identity'")
  251. .get();
  252. expect(idx).toBeTruthy();
  253. });
  254. it('collapses byte-identical edges to a single row', () => {
  255. q.insertEdges([mk(), mk(), mk()]);
  256. expect(edgeCount()).toBe(1);
  257. });
  258. it('dedups even when only the metadata differs (same structural identity)', () => {
  259. q.insertEdges([mk({ metadata: { resolvedBy: 'exact-match' } }), mk({ metadata: { resolvedBy: 'import' } })]);
  260. expect(edgeCount()).toBe(1);
  261. });
  262. it('keeps edges that differ in line/col — distinct call sites are not duplicates', () => {
  263. q.insertEdges([mk({ column: 12 }), mk({ column: 99 }), mk({ line: 200, column: 1 })]);
  264. expect(edgeCount()).toBe(3);
  265. });
  266. it('dedups coordinate-less edges, folding NULL line/col via IFNULL', () => {
  267. q.insertEdges([mk({ line: undefined, column: undefined }), mk({ line: undefined, column: undefined })]);
  268. expect(edgeCount()).toBe(1);
  269. });
  270. it('dedups across separate insert calls (storage constraint, not a per-batch dedup)', () => {
  271. q.insertEdges([mk()]);
  272. q.insertEdges([mk()]);
  273. expect(edgeCount()).toBe(1);
  274. });
  275. });
  276. describe('migration v6: dedup edges + add identity index on upgrade (#1034)', () => {
  277. it('collapses pre-existing duplicate rows, keeps distinct ones, and restores the constraint', () => {
  278. const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'db-mig6-'));
  279. const db = DatabaseConnection.initialize(path.join(dir, 'test.db'));
  280. const raw = db.getDb();
  281. const q = new QueryBuilder(raw);
  282. q.insertNodes([makeNode('A'), makeNode('B')]);
  283. // Recreate a pre-v6 database: without the identity index, `INSERT OR IGNORE`
  284. // admits duplicates. Revert the recorded version so migration v6 will re-run.
  285. raw.exec('DROP INDEX IF EXISTS idx_edges_identity');
  286. raw.prepare('DELETE FROM schema_versions WHERE version >= 6').run();
  287. q.insertEdges([
  288. { source: 'A', target: 'B', kind: 'references', line: 153, column: 12, metadata: { resolvedBy: 'exact-match' } },
  289. { source: 'A', target: 'B', kind: 'references', line: 153, column: 12, metadata: { resolvedBy: 'exact-match' } },
  290. { source: 'A', target: 'B', kind: 'calls', line: 200, column: 4 },
  291. ]);
  292. const count = () => (raw.prepare('SELECT count(*) AS c FROM edges').get() as { c: number }).c;
  293. expect(count()).toBe(3); // duplicate admitted while the index was absent
  294. runMigrations(raw, 5);
  295. expect(count()).toBe(2); // duplicate collapsed, the distinct `calls` edge kept
  296. expect(getCurrentVersion(raw)).toBe(6);
  297. const idx = raw
  298. .prepare("SELECT name FROM sqlite_master WHERE type='index' AND name='idx_edges_identity'")
  299. .get();
  300. expect(idx).toBeTruthy();
  301. // The constraint now holds — re-inserting the duplicate is a no-op.
  302. q.insertEdges([
  303. { source: 'A', target: 'B', kind: 'references', line: 153, column: 12, metadata: { resolvedBy: 'x' } },
  304. ]);
  305. expect(count()).toBe(2);
  306. db.close();
  307. fs.rmSync(dir, { recursive: true, force: true });
  308. });
  309. });