index-command.test.ts 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. /**
  2. * Regression coverage for issue #874: `codegraph index` produced 0 nodes / 0
  3. * edges while `codegraph init` worked, and appeared to wipe the graph.
  4. *
  5. * Root cause: `index` ran a full extraction against the already-populated DB
  6. * without clearing it first. Every file's content hash still matched, so the
  7. * orchestrator skipped re-inserting all of them, and the run reported its delta
  8. * (after - before = 0) as "0 nodes, 0 edges". The fix makes `index` a true full
  9. * rebuild — clear, then re-index — so it produces the same complete result as a
  10. * fresh `init`.
  11. *
  12. * Exercised end-to-end against the built binary so the CLI wiring (not just the
  13. * library) is covered.
  14. */
  15. import { describe, it, expect, beforeEach, afterEach } from 'vitest';
  16. import { execFileSync } from 'child_process';
  17. import * as fs from 'fs';
  18. import * as path from 'path';
  19. import * as os from 'os';
  20. import { CodeGraph } from '../src';
  21. import { DatabaseConnection } from '../src/db';
  22. const BIN = path.resolve(__dirname, '../dist/bin/codegraph.js');
  23. /** Normalize a PRAGMA read across return shapes (array | object | scalar). */
  24. function pragmaValue(raw: unknown, key: string): unknown {
  25. const row = Array.isArray(raw) ? raw[0] : raw;
  26. if (row !== null && typeof row === 'object') return (row as Record<string, unknown>)[key];
  27. return row;
  28. }
  29. function runCodegraph(args: string[], cwd: string): string {
  30. return execFileSync(process.execPath, [BIN, ...args], {
  31. cwd,
  32. encoding: 'utf-8',
  33. env: { ...process.env, CODEGRAPH_NO_DAEMON: '1' },
  34. stdio: ['ignore', 'pipe', 'pipe'],
  35. });
  36. }
  37. function graphCounts(dir: string): { nodes: number; edges: number } {
  38. const cg = CodeGraph.openSync(dir);
  39. try {
  40. const stats = cg.getStats();
  41. return { nodes: stats.nodeCount, edges: stats.edgeCount };
  42. } finally {
  43. cg.close();
  44. }
  45. }
  46. describe('codegraph index — full re-index keeps the graph populated (#874)', () => {
  47. let tempDir: string;
  48. beforeEach(() => {
  49. tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-index-cmd-'));
  50. // A couple of files with a call edge so there is a non-trivial graph to
  51. // (fail to) reproduce.
  52. fs.writeFileSync(
  53. path.join(tempDir, 'a.ts'),
  54. `export function greet(name: string) { return hello(name); }\n` +
  55. `export function hello(n: string) { return 'hi ' + n; }\n`,
  56. );
  57. fs.writeFileSync(
  58. path.join(tempDir, 'b.ts'),
  59. `import { greet } from './a';\nexport function main() { return greet('world'); }\n`,
  60. );
  61. });
  62. afterEach(() => {
  63. fs.rmSync(tempDir, { recursive: true, force: true });
  64. });
  65. it('reproduces init\'s node/edge counts instead of emptying the index', () => {
  66. runCodegraph(['init'], tempDir);
  67. const afterInit = graphCounts(tempDir);
  68. expect(afterInit.nodes).toBeGreaterThan(0);
  69. expect(afterInit.edges).toBeGreaterThan(0);
  70. const out = runCodegraph(['index'], tempDir);
  71. const afterIndex = graphCounts(tempDir);
  72. // The graph is still fully populated — `index` rebuilt it, it did not wipe it.
  73. expect(afterIndex.nodes).toBe(afterInit.nodes);
  74. expect(afterIndex.edges).toBe(afterInit.edges);
  75. // ...and the CLI reported the real counts, never the misleading "0 nodes".
  76. expect(out).not.toMatch(/\b0 nodes, 0 edges\b/);
  77. expect(out).toMatch(new RegExp(`\\b${afterInit.nodes} nodes\\b`));
  78. });
  79. it('is idempotent: a second index does not grow the graph', () => {
  80. runCodegraph(['init'], tempDir);
  81. runCodegraph(['index'], tempDir);
  82. const first = graphCounts(tempDir);
  83. runCodegraph(['index'], tempDir);
  84. const second = graphCounts(tempDir);
  85. // A clean rebuild each time — no duplicate (re-resolved) edges accumulating
  86. // across runs (the C# "+18 edges" symptom in the report).
  87. expect(second.nodes).toBe(first.nodes);
  88. expect(second.edges).toBe(first.edges);
  89. });
  90. it('--quiet path also rebuilds a populated graph', () => {
  91. runCodegraph(['init'], tempDir);
  92. const afterInit = graphCounts(tempDir);
  93. runCodegraph(['index', '--quiet'], tempDir);
  94. const afterIndex = graphCounts(tempDir);
  95. expect(afterIndex.nodes).toBe(afterInit.nodes);
  96. expect(afterIndex.edges).toBe(afterInit.edges);
  97. });
  98. });
  99. /**
  100. * Regression coverage for issue #1067: a full re-index must RECOVER an existing
  101. * oversized/stale index from earlier versions, not wedge on it.
  102. *
  103. * Root cause: `index` opened the old database and DELETE-d every row to clear
  104. * it. With FTS triggers firing per deleted node, a pre-fix poisoned graph (an
  105. * ignored gitlink corpus scanned into ~1.6M nodes + a multi-GB WAL, #1065) took
  106. * well over the 60s liveness-watchdog window to clear, so the process was
  107. * SIGKILLed before scanning even began and the bad state could never be rebuilt
  108. * away. The fix discards (unlinks) the database files and re-initializes a fresh
  109. * one — O(1) regardless of size — so `index` recovers any prior state.
  110. */
  111. describe('codegraph index — recovers a stale/oversized prior index (#1067)', () => {
  112. let tempDir: string;
  113. const dbPath = (dir: string) => path.join(dir, '.codegraph', 'codegraph.db');
  114. beforeEach(() => {
  115. tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-index-recover-'));
  116. fs.writeFileSync(
  117. path.join(tempDir, 'a.ts'),
  118. `export function greet(name: string) { return hello(name); }\n` +
  119. `export function hello(n: string) { return 'hi ' + n; }\n`,
  120. );
  121. });
  122. afterEach(() => {
  123. fs.rmSync(tempDir, { recursive: true, force: true });
  124. });
  125. it('rebuilds to the current disk state, discarding content for files that no longer exist', () => {
  126. // Stand in for the "old graph indexed an ignored corpus" shape: index a tree
  127. // that also has a junk/ directory, then delete junk/ from disk so the DB now
  128. // carries stale nodes for paths that should no longer be indexed.
  129. const junkDir = path.join(tempDir, 'junk');
  130. fs.mkdirSync(junkDir);
  131. for (let i = 0; i < 12; i++) {
  132. fs.writeFileSync(path.join(junkDir, `j${i}.ts`), `export function j${i}() { return ${i}; }\n`);
  133. }
  134. runCodegraph(['init'], tempDir);
  135. const withJunk = graphCounts(tempDir);
  136. // Remove the corpus from disk. The DB still holds its nodes — the stale,
  137. // oversized prior state #1067 is about.
  138. fs.rmSync(junkDir, { recursive: true, force: true });
  139. runCodegraph(['index'], tempDir);
  140. const recovered = graphCounts(tempDir);
  141. // The rebuild reflects only what's on disk now — the junk nodes are gone…
  142. expect(recovered.nodes).toBeLessThan(withJunk.nodes);
  143. // …and the result is identical to a fresh init of the same (now-smaller) tree.
  144. const fresh = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-index-fresh-'));
  145. try {
  146. fs.copyFileSync(path.join(tempDir, 'a.ts'), path.join(fresh, 'a.ts'));
  147. runCodegraph(['init'], fresh);
  148. const freshCounts = graphCounts(fresh);
  149. expect(recovered.nodes).toBe(freshCounts.nodes);
  150. expect(recovered.edges).toBe(freshCounts.edges);
  151. } finally {
  152. fs.rmSync(fresh, { recursive: true, force: true });
  153. }
  154. });
  155. // The fix rebuilds a fresh DB rather than DELETE-ing rows in place. Prove it
  156. // with a header sentinel: PRAGMA user_version survives an in-place clear but
  157. // not a from-scratch recreate. (An inode check is unreliable — ext4/overlayfs
  158. // recycle the inode number after unlink+recreate.)
  159. it('rebuilds a fresh database rather than clearing the old one in place', () => {
  160. runCodegraph(['init'], tempDir);
  161. const stamp = DatabaseConnection.open(dbPath(tempDir));
  162. stamp.getDb().pragma('user_version = 4242');
  163. stamp.close();
  164. runCodegraph(['index'], tempDir);
  165. const check = DatabaseConnection.open(dbPath(tempDir));
  166. const userVersion = pragmaValue(check.getDb().pragma('user_version'), 'user_version');
  167. check.close();
  168. // Sentinel gone → `index` discarded the old DB and rebuilt it, the path that
  169. // avoids the per-row FTS delete wedge on a poisoned graph (#1067).
  170. expect(Number(userVersion)).not.toBe(4242);
  171. // …and the graph is intact afterwards.
  172. const counts = graphCounts(tempDir);
  173. expect(counts.nodes).toBeGreaterThan(0);
  174. expect(counts.edges).toBeGreaterThan(0);
  175. });
  176. });