sync-fk-regression.test.ts 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. /**
  2. * Sync FK regression test (issue #455).
  3. *
  4. * #62 plugged FK violations at the extraction layer (empty-named nodes whose
  5. * containment edges had no target). #455 reports the same `FOREIGN KEY constraint
  6. * failed` reappearing on v0.9.5, but during *watch sync* on a Python-only project —
  7. * a different trigger than the C/C++ header empty-name issue #62 covered.
  8. *
  9. * The reproducer below drives the same path the daemon takes: extract → resolve →
  10. * insert edges. The resolution pass's `insertEdges` was not guarded the way the
  11. * extraction-layer insert was after #62, so any edge with a stale source/target
  12. * (e.g. a synthesized framework target whose node was deleted by a concurrent
  13. * file rewrite) throws and aborts the sync, leaving the FK error the user sees.
  14. *
  15. * The test asserts: a sequence of file rewrites + sync()s never throws, and the
  16. * graph stays internally consistent (every edge's source + target are real nodes).
  17. */
  18. import { describe, it, expect, beforeAll, afterEach } from 'vitest';
  19. import * as fs from 'fs';
  20. import * as path from 'path';
  21. import * as os from 'os';
  22. import CodeGraph from '../src/index';
  23. import { initGrammars, loadAllGrammars } from '../src/extraction/grammars';
  24. beforeAll(async () => {
  25. await initGrammars();
  26. await loadAllGrammars();
  27. });
  28. describe('watch sync FK regression (#455)', () => {
  29. let tmpDir: string | undefined;
  30. let cg: CodeGraph | undefined;
  31. afterEach(() => {
  32. if (cg) {
  33. cg.close();
  34. cg = undefined;
  35. }
  36. if (tmpDir) {
  37. fs.rmSync(tmpDir, { recursive: true, force: true });
  38. tmpDir = undefined;
  39. }
  40. });
  41. function assertGraphIntegrity(cg: CodeGraph): void {
  42. // Every edge must reference real nodes. If FK was disabled or violated,
  43. // dangling refs would show up here.
  44. const db = (cg as unknown as { db: { getDb(): { prepare(sql: string): { get(): unknown } } } }).db;
  45. const sqlite = db.getDb();
  46. const dangling = sqlite
  47. .prepare(
  48. `SELECT count(*) as c FROM edges e
  49. WHERE NOT EXISTS (SELECT 1 FROM nodes n WHERE n.id = e.source)
  50. OR NOT EXISTS (SELECT 1 FROM nodes n WHERE n.id = e.target)`
  51. )
  52. .get() as { c: number };
  53. expect(dangling.c).toBe(0);
  54. }
  55. it('survives repeated sync() cycles on a Django-style Python project without FK errors', async () => {
  56. tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fk455-'));
  57. // Mimic a small Django app: requirements + manage.py marker, models/views/urls
  58. // in two app packages that cross-reference each other.
  59. fs.writeFileSync(path.join(tmpDir, 'manage.py'), '# django marker\n');
  60. fs.writeFileSync(path.join(tmpDir, 'requirements.txt'), 'django==4.2\n');
  61. fs.mkdirSync(path.join(tmpDir, 'users'));
  62. fs.writeFileSync(path.join(tmpDir, 'users/__init__.py'), '');
  63. fs.writeFileSync(
  64. path.join(tmpDir, 'users/models.py'),
  65. 'class User:\n' +
  66. ' def __init__(self, name):\n' +
  67. ' self.name = name\n'
  68. );
  69. fs.writeFileSync(
  70. path.join(tmpDir, 'users/views.py'),
  71. 'from users.models import User\n' +
  72. 'class UserListView:\n' +
  73. ' def get(self, request):\n' +
  74. ' return User("a")\n'
  75. );
  76. fs.writeFileSync(
  77. path.join(tmpDir, 'users/urls.py'),
  78. 'from django.urls import path\n' +
  79. 'from users.views import UserListView\n' +
  80. 'urlpatterns = [path("users/", UserListView.as_view(), name="user-list")]\n'
  81. );
  82. fs.mkdirSync(path.join(tmpDir, 'posts'));
  83. fs.writeFileSync(path.join(tmpDir, 'posts/__init__.py'), '');
  84. fs.writeFileSync(
  85. path.join(tmpDir, 'posts/models.py'),
  86. 'from users.models import User\n' +
  87. 'class Post:\n' +
  88. ' def __init__(self, author):\n' +
  89. ' self.author = author\n'
  90. );
  91. fs.writeFileSync(
  92. path.join(tmpDir, 'posts/views.py'),
  93. 'from posts.models import Post\n' +
  94. 'class PostListView:\n' +
  95. ' def get(self, request):\n' +
  96. ' return Post(None)\n'
  97. );
  98. fs.writeFileSync(
  99. path.join(tmpDir, 'posts/urls.py'),
  100. 'from django.urls import path\n' +
  101. 'from posts.views import PostListView\n' +
  102. 'urlpatterns = [path("posts/", PostListView.as_view(), name="post-list")]\n'
  103. );
  104. cg = CodeGraph.initSync(tmpDir);
  105. await cg.indexAll();
  106. assertGraphIntegrity(cg);
  107. // Drive the same path the daemon's file watcher drives: a series of file
  108. // rewrites + sync()s. We shuffle line counts on each rewrite so node IDs
  109. // (file:kind:name:line) shift around, forcing real INSERT OR REPLACE +
  110. // CASCADE behavior across files that cross-reference each other.
  111. const targets = [
  112. 'users/views.py',
  113. 'posts/views.py',
  114. 'users/urls.py',
  115. 'posts/urls.py',
  116. 'users/models.py',
  117. ];
  118. for (let iter = 0; iter < 8; iter++) {
  119. const file = targets[iter % targets.length]!;
  120. const full = path.join(tmpDir, file);
  121. const content = fs.readFileSync(full, 'utf8');
  122. // Insert N blank lines at the top to shift every node's line number.
  123. const padded = '\n'.repeat(iter + 1) + content;
  124. // Use a future mtime so the size+mtime pre-filter in
  125. // ExtractionOrchestrator.sync can't skip the file.
  126. fs.writeFileSync(full, padded);
  127. const now = Date.now() + (iter + 1) * 1_000;
  128. fs.utimesSync(full, now / 1000, now / 1000);
  129. // The fix should make this never throw; before the fix, FK errors fire
  130. // during the resolution-layer insertEdges call inside sync().
  131. await expect(cg.sync()).resolves.toBeDefined();
  132. assertGraphIntegrity(cg);
  133. }
  134. });
  135. it("drops resolution edges whose target node is no longer in the graph (the pathology #455 reports)", async () => {
  136. // This narrower test reproduces the exact failure mode the user sees in
  137. // their daemon log: the resolver hands `insertEdges` an edge whose target
  138. // doesn't exist in `nodes`, and the FK constraint aborts the whole sync.
  139. //
  140. // We force the bug by populating the resolver's per-name cache with a
  141. // stale node (whose id is *not* in the DB) and then asking it to resolve
  142. // a reference to that name. Without the fix this throws
  143. // `FOREIGN KEY constraint failed`; with it, the bad edge is filtered out
  144. // and resolution returns normally.
  145. tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fk455-stale-'));
  146. fs.writeFileSync(path.join(tmpDir, 'a.py'), 'def caller():\n Target()\n');
  147. fs.writeFileSync(path.join(tmpDir, 'b.py'), 'class Target:\n pass\n');
  148. cg = CodeGraph.initSync(tmpDir);
  149. await cg.indexAll();
  150. // Reach in to the internals — the simplest way to forge the "stale node
  151. // ID in the resolver's lookup path" condition the production bug arises
  152. // from. The fix is what the test is verifying; touching internals here
  153. // is a means to that end, not a contract we're asserting.
  154. type Internals = {
  155. queries: {
  156. getNodesByName(name: string): Array<{ id: string; name: string }>;
  157. getAllNodeNames(): string[];
  158. };
  159. resolver: {
  160. warmCaches(): void;
  161. resolveAndPersist(
  162. refs: Array<{
  163. fromNodeId: string;
  164. referenceName: string;
  165. referenceKind: string;
  166. line: number;
  167. column: number;
  168. filePath: string;
  169. language: string;
  170. }>
  171. ): { resolved: unknown[] };
  172. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  173. nameCache: { set(key: string, value: any): void };
  174. };
  175. };
  176. const internals = cg as unknown as Internals;
  177. const queries = internals.queries;
  178. const resolver = internals.resolver;
  179. const caller = queries.getNodesByName('caller')[0];
  180. const target = queries.getNodesByName('Target')[0];
  181. expect(caller).toBeDefined();
  182. expect(target).toBeDefined();
  183. // Warm caches so warmCaches no-ops on the resolveAndPersist call below
  184. // and our seeded nameCache entry isn't overwritten.
  185. resolver.warmCaches();
  186. // Forge a stale lookup result: a Node whose `id` doesn't exist in the
  187. // `nodes` table. This is structurally what happens when a framework
  188. // resolver's WeakMap cache hands back a Node that was deleted by a
  189. // concurrent file rewrite — the user's #455 scenario.
  190. const staleNode = { ...target!, id: 'class:stale.py:Target:1' };
  191. resolver.nameCache.set('Target', [staleNode]);
  192. // Ask the resolver to persist an edge that will resolve via the seeded
  193. // (stale) cache entry. Without the FK filter this would throw
  194. // `FOREIGN KEY constraint failed` and abort the whole batch.
  195. expect(() =>
  196. resolver.resolveAndPersist([
  197. {
  198. fromNodeId: caller!.id,
  199. referenceName: 'Target',
  200. referenceKind: 'calls',
  201. line: 2,
  202. column: 4,
  203. filePath: 'a.py',
  204. language: 'python',
  205. },
  206. ])
  207. ).not.toThrow();
  208. // The bad edge must not have been persisted either — FK enforcement is
  209. // still on, and post-fix the dangling-edge count remains zero.
  210. assertGraphIntegrity(cg);
  211. });
  212. });