| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232 |
- /**
- * Sync FK regression test (issue #455).
- *
- * #62 plugged FK violations at the extraction layer (empty-named nodes whose
- * containment edges had no target). #455 reports the same `FOREIGN KEY constraint
- * failed` reappearing on v0.9.5, but during *watch sync* on a Python-only project —
- * a different trigger than the C/C++ header empty-name issue #62 covered.
- *
- * The reproducer below drives the same path the daemon takes: extract → resolve →
- * insert edges. The resolution pass's `insertEdges` was not guarded the way the
- * extraction-layer insert was after #62, so any edge with a stale source/target
- * (e.g. a synthesized framework target whose node was deleted by a concurrent
- * file rewrite) throws and aborts the sync, leaving the FK error the user sees.
- *
- * The test asserts: a sequence of file rewrites + sync()s never throws, and the
- * graph stays internally consistent (every edge's source + target are real nodes).
- */
- import { describe, it, expect, beforeAll, afterEach } from 'vitest';
- import * as fs from 'fs';
- import * as path from 'path';
- import * as os from 'os';
- import CodeGraph from '../src/index';
- import { initGrammars, loadAllGrammars } from '../src/extraction/grammars';
- beforeAll(async () => {
- await initGrammars();
- await loadAllGrammars();
- });
- describe('watch sync FK regression (#455)', () => {
- let tmpDir: string | undefined;
- let cg: CodeGraph | undefined;
- afterEach(() => {
- if (cg) {
- cg.close();
- cg = undefined;
- }
- if (tmpDir) {
- fs.rmSync(tmpDir, { recursive: true, force: true });
- tmpDir = undefined;
- }
- });
- function assertGraphIntegrity(cg: CodeGraph): void {
- // Every edge must reference real nodes. If FK was disabled or violated,
- // dangling refs would show up here.
- const db = (cg as unknown as { db: { getDb(): { prepare(sql: string): { get(): unknown } } } }).db;
- const sqlite = db.getDb();
- const dangling = sqlite
- .prepare(
- `SELECT count(*) as c FROM edges e
- WHERE NOT EXISTS (SELECT 1 FROM nodes n WHERE n.id = e.source)
- OR NOT EXISTS (SELECT 1 FROM nodes n WHERE n.id = e.target)`
- )
- .get() as { c: number };
- expect(dangling.c).toBe(0);
- }
- it('survives repeated sync() cycles on a Django-style Python project without FK errors', async () => {
- tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fk455-'));
- // Mimic a small Django app: requirements + manage.py marker, models/views/urls
- // in two app packages that cross-reference each other.
- fs.writeFileSync(path.join(tmpDir, 'manage.py'), '# django marker\n');
- fs.writeFileSync(path.join(tmpDir, 'requirements.txt'), 'django==4.2\n');
- fs.mkdirSync(path.join(tmpDir, 'users'));
- fs.writeFileSync(path.join(tmpDir, 'users/__init__.py'), '');
- fs.writeFileSync(
- path.join(tmpDir, 'users/models.py'),
- 'class User:\n' +
- ' def __init__(self, name):\n' +
- ' self.name = name\n'
- );
- fs.writeFileSync(
- path.join(tmpDir, 'users/views.py'),
- 'from users.models import User\n' +
- 'class UserListView:\n' +
- ' def get(self, request):\n' +
- ' return User("a")\n'
- );
- fs.writeFileSync(
- path.join(tmpDir, 'users/urls.py'),
- 'from django.urls import path\n' +
- 'from users.views import UserListView\n' +
- 'urlpatterns = [path("users/", UserListView.as_view(), name="user-list")]\n'
- );
- fs.mkdirSync(path.join(tmpDir, 'posts'));
- fs.writeFileSync(path.join(tmpDir, 'posts/__init__.py'), '');
- fs.writeFileSync(
- path.join(tmpDir, 'posts/models.py'),
- 'from users.models import User\n' +
- 'class Post:\n' +
- ' def __init__(self, author):\n' +
- ' self.author = author\n'
- );
- fs.writeFileSync(
- path.join(tmpDir, 'posts/views.py'),
- 'from posts.models import Post\n' +
- 'class PostListView:\n' +
- ' def get(self, request):\n' +
- ' return Post(None)\n'
- );
- fs.writeFileSync(
- path.join(tmpDir, 'posts/urls.py'),
- 'from django.urls import path\n' +
- 'from posts.views import PostListView\n' +
- 'urlpatterns = [path("posts/", PostListView.as_view(), name="post-list")]\n'
- );
- cg = CodeGraph.initSync(tmpDir);
- await cg.indexAll();
- assertGraphIntegrity(cg);
- // Drive the same path the daemon's file watcher drives: a series of file
- // rewrites + sync()s. We shuffle line counts on each rewrite so node IDs
- // (file:kind:name:line) shift around, forcing real INSERT OR REPLACE +
- // CASCADE behavior across files that cross-reference each other.
- const targets = [
- 'users/views.py',
- 'posts/views.py',
- 'users/urls.py',
- 'posts/urls.py',
- 'users/models.py',
- ];
- for (let iter = 0; iter < 8; iter++) {
- const file = targets[iter % targets.length]!;
- const full = path.join(tmpDir, file);
- const content = fs.readFileSync(full, 'utf8');
- // Insert N blank lines at the top to shift every node's line number.
- const padded = '\n'.repeat(iter + 1) + content;
- // Use a future mtime so the size+mtime pre-filter in
- // ExtractionOrchestrator.sync can't skip the file.
- fs.writeFileSync(full, padded);
- const now = Date.now() + (iter + 1) * 1_000;
- fs.utimesSync(full, now / 1000, now / 1000);
- // The fix should make this never throw; before the fix, FK errors fire
- // during the resolution-layer insertEdges call inside sync().
- await expect(cg.sync()).resolves.toBeDefined();
- assertGraphIntegrity(cg);
- }
- });
- it("drops resolution edges whose target node is no longer in the graph (the pathology #455 reports)", async () => {
- // This narrower test reproduces the exact failure mode the user sees in
- // their daemon log: the resolver hands `insertEdges` an edge whose target
- // doesn't exist in `nodes`, and the FK constraint aborts the whole sync.
- //
- // We force the bug by populating the resolver's per-name cache with a
- // stale node (whose id is *not* in the DB) and then asking it to resolve
- // a reference to that name. Without the fix this throws
- // `FOREIGN KEY constraint failed`; with it, the bad edge is filtered out
- // and resolution returns normally.
- tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fk455-stale-'));
- fs.writeFileSync(path.join(tmpDir, 'a.py'), 'def caller():\n Target()\n');
- fs.writeFileSync(path.join(tmpDir, 'b.py'), 'class Target:\n pass\n');
- cg = CodeGraph.initSync(tmpDir);
- await cg.indexAll();
- // Reach in to the internals — the simplest way to forge the "stale node
- // ID in the resolver's lookup path" condition the production bug arises
- // from. The fix is what the test is verifying; touching internals here
- // is a means to that end, not a contract we're asserting.
- type Internals = {
- queries: {
- getNodesByName(name: string): Array<{ id: string; name: string }>;
- getAllNodeNames(): string[];
- };
- resolver: {
- warmCaches(): void;
- resolveAndPersist(
- refs: Array<{
- fromNodeId: string;
- referenceName: string;
- referenceKind: string;
- line: number;
- column: number;
- filePath: string;
- language: string;
- }>
- ): { resolved: unknown[] };
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
- nameCache: { set(key: string, value: any): void };
- };
- };
- const internals = cg as unknown as Internals;
- const queries = internals.queries;
- const resolver = internals.resolver;
- const caller = queries.getNodesByName('caller')[0];
- const target = queries.getNodesByName('Target')[0];
- expect(caller).toBeDefined();
- expect(target).toBeDefined();
- // Warm caches so warmCaches no-ops on the resolveAndPersist call below
- // and our seeded nameCache entry isn't overwritten.
- resolver.warmCaches();
- // Forge a stale lookup result: a Node whose `id` doesn't exist in the
- // `nodes` table. This is structurally what happens when a framework
- // resolver's WeakMap cache hands back a Node that was deleted by a
- // concurrent file rewrite — the user's #455 scenario.
- const staleNode = { ...target!, id: 'class:stale.py:Target:1' };
- resolver.nameCache.set('Target', [staleNode]);
- // Ask the resolver to persist an edge that will resolve via the seeded
- // (stale) cache entry. Without the FK filter this would throw
- // `FOREIGN KEY constraint failed` and abort the whole batch.
- expect(() =>
- resolver.resolveAndPersist([
- {
- fromNodeId: caller!.id,
- referenceName: 'Target',
- referenceKind: 'calls',
- line: 2,
- column: 4,
- filePath: 'a.py',
- language: 'python',
- },
- ])
- ).not.toThrow();
- // The bad edge must not have been persisted either — FK enforcement is
- // still on, and post-fix the dangling-edge count remains zero.
- assertGraphIntegrity(cg);
- });
- });
|