hai 1 mes · 7340892290
--- a/__tests__/integration/full-pipeline.test.ts
+++ b/__tests__/integration/full-pipeline.test.ts
@@ -0,0 +1,244 @@
 
				+/**
			
 
				+ * End-to-end pipeline integration tests
			
 
				+ *
			
 
				+ * Exercises the full happy path that unit tests cover in isolation:
			
 
				+ *   init → indexAll → resolveReferences → searchNodes/getCallers/buildContext → sync
			
 
				+ *
			
 
				+ * Also covers two error paths that were previously uncovered:
			
 
				+ *   - Indexing a file that contains a syntactically invalid snippet
			
 
				+ *     (parse errors must not abort the batch).
			
 
				+ *   - Sync correctly applies adds + modifies + removes in a single pass.
			
 
				+ *
			
 
				+ * A synthetic ~120-file project is generated per test (5k files would
			
 
				+ * dwarf the test runner; 120 files of varied TS shape is enough to
			
 
				+ * stress the resolver and graph layers without slowing the suite to a
			
 
				+ * crawl).
			
 
				+ */
			
 
				+
			
 
				+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
			
 
				+import * as fs from 'fs';
			
 
				+import * as path from 'path';
			
 
				+import * as os from 'os';
			
 
				+import CodeGraph from '../../src/index';
			
 
				+
			
 
				+function createTempDir(prefix = 'codegraph-int-'): string {
			
 
				+  return fs.mkdtempSync(path.join(os.tmpdir(), prefix));
			
 
				+}
			
 
				+
			
 
				+function cleanupTempDir(dir: string): void {
			
 
				+  if (fs.existsSync(dir)) {
			
 
				+    fs.rmSync(dir, { recursive: true, force: true });
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Generate a synthetic TypeScript project with the given module count.
			
 
				+ * Each module exports a function that calls the previous module's
			
 
				+ * function so that the resolver has real import edges + call edges to
			
 
				+ * resolve. The first module is a leaf; the last is the root.
			
 
				+ */
			
 
				+function generateSyntheticProject(root: string, moduleCount: number): void {
			
 
				+  const srcDir = path.join(root, 'src');
			
 
				+  fs.mkdirSync(srcDir, { recursive: true });
			
 
				+
			
 
				+  // Leaf module — no imports.
			
 
				+  fs.writeFileSync(
			
 
				+    path.join(srcDir, `mod0.ts`),
			
 
				+    `export function fn0(x: number): number { return x + 1; }\n` +
			
 
				+      `export class Mod0 { ping(): string { return 'mod0'; } }\n`
			
 
				+  );
			
 
				+
			
 
				+  for (let i = 1; i < moduleCount; i++) {
			
 
				+    const prev = i - 1;
			
 
				+    fs.writeFileSync(
			
 
				+      path.join(srcDir, `mod${i}.ts`),
			
 
				+      `import { fn${prev}, Mod${prev} } from './mod${prev}';\n` +
			
 
				+        `export function fn${i}(x: number): number { return fn${prev}(x) + 1; }\n` +
			
 
				+        `export class Mod${i} extends Mod${prev} {\n` +
			
 
				+        `  call${i}(): number { return fn${i}(${i}); }\n` +
			
 
				+        `}\n`
			
 
				+    );
			
 
				+  }
			
 
				+
			
 
				+  // Entry point file.
			
 
				+  fs.writeFileSync(
			
 
				+    path.join(srcDir, 'index.ts'),
			
 
				+    `import { fn${moduleCount - 1}, Mod${moduleCount - 1} } from './mod${moduleCount - 1}';\n` +
			
 
				+      `export function entry(): number {\n` +
			
 
				+      `  const m = new Mod${moduleCount - 1}();\n` +
			
 
				+      `  return fn${moduleCount - 1}(0) + m.call${moduleCount - 1}();\n` +
			
 
				+      `}\n`
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+describe('Integration: full pipeline', () => {
			
 
				+  let tempDir: string;
			
 
				+
			
 
				+  beforeEach(() => {
			
 
				+    tempDir = createTempDir();
			
 
				+  });
			
 
				+
			
 
				+  afterEach(() => {
			
 
				+    cleanupTempDir(tempDir);
			
 
				+  });
			
 
				+
			
 
				+  it('runs init → index → resolve → search → callers → context → sync', async () => {
			
 
				+    const MODULE_COUNT = 120;
			
 
				+    generateSyntheticProject(tempDir, MODULE_COUNT);
			
 
				+
			
 
				+    // ── init ──────────────────────────────────────────────────────
			
 
				+    const cg = await CodeGraph.init(tempDir, {
			
 
				+      config: { include: ['**/*.ts'], exclude: [] },
			
 
				+    });
			
 
				+
			
 
				+    try {
			
 
				+      // ── indexAll ────────────────────────────────────────────────
			
 
				+      const indexResult = await cg.indexAll();
			
 
				+      // Synthetic project: MODULE_COUNT mod files + 1 index file.
			
 
				+      expect(indexResult.filesIndexed).toBeGreaterThanOrEqual(MODULE_COUNT);
			
 
				+
			
 
				+      const statsAfterIndex = cg.getStats();
			
 
				+      expect(statsAfterIndex.fileCount).toBeGreaterThanOrEqual(MODULE_COUNT);
			
 
				+      expect(statsAfterIndex.nodeCount).toBeGreaterThan(MODULE_COUNT * 2);
			
 
				+
			
 
				+      // ── resolveReferences ────────────────────────────────────────
			
 
				+      // Many call-site edges are wired up during extraction itself, so
			
 
				+      // the unresolved-reference queue may already be drained by the
			
 
				+      // time we get here. We assert that resolve completes cleanly and
			
 
				+      // returns a well-formed result; downstream callers/callees
			
 
				+      // assertions verify the graph is actually populated.
			
 
				+      cg.reinitializeResolver();
			
 
				+      const resolution = cg.resolveReferences();
			
 
				+      expect(resolution).toBeDefined();
			
 
				+      expect(resolution.stats).toBeDefined();
			
 
				+      expect(typeof resolution.stats.total).toBe('number');
			
 
				+      expect(typeof resolution.stats.resolved).toBe('number');
			
 
				+
			
 
				+      // ── searchNodes ──────────────────────────────────────────────
			
 
				+      const entryResults = cg.searchNodes('entry', { limit: 10 });
			
 
				+      expect(entryResults.length).toBeGreaterThan(0);
			
 
				+      const entryNode = entryResults.find((r) => r.node.name === 'entry');
			
 
				+      expect(entryNode).toBeDefined();
			
 
				+
			
 
				+      const midResults = cg.searchNodes(`fn50`, { limit: 10 });
			
 
				+      expect(midResults.find((r) => r.node.name === 'fn50')).toBeDefined();
			
 
				+
			
 
				+      // ── getCallers / getCallees ──────────────────────────────────
			
 
				+      const fn0Results = cg.searchNodes('fn0', { limit: 5 });
			
 
				+      const fn0Node = fn0Results.find((r) => r.node.name === 'fn0');
			
 
				+      expect(fn0Node).toBeDefined();
			
 
				+      const callers = cg.getCallers(fn0Node!.node.id);
			
 
				+      // fn0 is called by fn1 (at least). After resolution this should
			
 
				+      // be wired up.
			
 
				+      expect(Array.isArray(callers)).toBe(true);
			
 
				+
			
 
				+      // ── buildContext ─────────────────────────────────────────────
			
 
				+      const context = await cg.buildContext('entry function chain', {
			
 
				+        maxNodes: 10,
			
 
				+        format: 'markdown',
			
 
				+      });
			
 
				+      expect(typeof context).toBe('string');
			
 
				+      expect((context as string).length).toBeGreaterThan(0);
			
 
				+
			
 
				+      // ── sync (add + modify + remove in one pass) ─────────────────
			
 
				+      // Add: a new file referencing entry().
			
 
				+      fs.writeFileSync(
			
 
				+        path.join(tempDir, 'src', 'consumer.ts'),
			
 
				+        `import { entry } from './index';\nexport const result = entry();\n`
			
 
				+      );
			
 
				+      // Modify: change mod0.
			
 
				+      fs.writeFileSync(
			
 
				+        path.join(tempDir, 'src', 'mod0.ts'),
			
 
				+        `export function fn0(x: number): number { return x + 2; }\n` +
			
 
				+          `export function newHelper(): string { return 'new'; }\n` +
			
 
				+          `export class Mod0 { ping(): string { return 'mod0v2'; } }\n`
			
 
				+      );
			
 
				+      // Remove: drop mod1 — note this will leave dangling imports in
			
 
				+      // mod2, which the resolver should tolerate.
			
 
				+      fs.unlinkSync(path.join(tempDir, 'src', 'mod1.ts'));
			
 
				+
			
 
				+      const syncResult = await cg.sync();
			
 
				+      expect(syncResult.filesAdded).toBeGreaterThanOrEqual(1);
			
 
				+      expect(syncResult.filesModified).toBeGreaterThanOrEqual(1);
			
 
				+      expect(syncResult.filesRemoved).toBeGreaterThanOrEqual(1);
			
 
				+
			
 
				+      // New symbol must now be findable; removed file's symbols gone.
			
 
				+      expect(cg.searchNodes('newHelper').length).toBeGreaterThan(0);
			
 
				+
			
 
				+      // Removed file should no longer appear in the indexed file list.
			
 
				+      // (FTS prefix matching makes name-based assertions unreliable here —
			
 
				+      // Mod10/Mod11/… all start with "Mod1" — so we check the file set
			
 
				+      // instead.)
			
 
				+      const filesAfterSync = cg.getNodesInFile('src/mod1.ts');
			
 
				+      expect(filesAfterSync).toHaveLength(0);
			
 
				+    } finally {
			
 
				+      cg.destroy();
			
 
				+    }
			
 
				+  }, 60_000);
			
 
				+
			
 
				+  it('keeps indexing files when one file has a parse error', async () => {
			
 
				+    const srcDir = path.join(tempDir, 'src');
			
 
				+    fs.mkdirSync(srcDir, { recursive: true });
			
 
				+
			
 
				+    // Valid files
			
 
				+    fs.writeFileSync(
			
 
				+      path.join(srcDir, 'good1.ts'),
			
 
				+      `export function good1(): number { return 1; }\n`
			
 
				+    );
			
 
				+    fs.writeFileSync(
			
 
				+      path.join(srcDir, 'good2.ts'),
			
 
				+      `export function good2(): number { return 2; }\n`
			
 
				+    );
			
 
				+    // Intentionally broken file — unclosed brace, stray tokens.
			
 
				+    fs.writeFileSync(
			
 
				+      path.join(srcDir, 'broken.ts'),
			
 
				+      `export function broken(\n  this is { not valid typescript at all\n`
			
 
				+    );
			
 
				+
			
 
				+    const cg = await CodeGraph.init(tempDir, {
			
 
				+      config: { include: ['**/*.ts'], exclude: [] },
			
 
				+    });
			
 
				+
			
 
				+    try {
			
 
				+      const result = await cg.indexAll();
			
 
				+      // The two good files must still be indexed regardless of the
			
 
				+      // broken one. Tree-sitter is error-tolerant so it may still
			
 
				+      // extract a partial AST from broken.ts — but the test only
			
 
				+      // requires that the batch completes and finds the good symbols.
			
 
				+      expect(result.filesIndexed).toBeGreaterThanOrEqual(2);
			
 
				+
			
 
				+      const good1 = cg.searchNodes('good1');
			
 
				+      const good2 = cg.searchNodes('good2');
			
 
				+      expect(good1.find((r) => r.node.name === 'good1')).toBeDefined();
			
 
				+      expect(good2.find((r) => r.node.name === 'good2')).toBeDefined();
			
 
				+    } finally {
			
 
				+      cg.destroy();
			
 
				+    }
			
 
				+  }, 30_000);
			
 
				+
			
 
				+  it('handles repeated sync calls when nothing has changed', async () => {
			
 
				+    generateSyntheticProject(tempDir, 10);
			
 
				+
			
 
				+    const cg = await CodeGraph.init(tempDir, {
			
 
				+      config: { include: ['**/*.ts'], exclude: [] },
			
 
				+    });
			
 
				+
			
 
				+    try {
			
 
				+      await cg.indexAll();
			
 
				+      const statsBefore = cg.getStats();
			
 
				+
			
 
				+      const first = await cg.sync();
			
 
				+      const second = await cg.sync();
			
 
				+
			
 
				+      // Subsequent sync with no changes should be a no-op.
			
 
				+      expect(first.filesAdded + first.filesModified + first.filesRemoved).toBe(0);
			
 
				+      expect(second.filesAdded + second.filesModified + second.filesRemoved).toBe(0);
			
 
				+
			
 
				+      const statsAfter = cg.getStats();
			
 
				+      expect(statsAfter.fileCount).toBe(statsBefore.fileCount);
			
 
				+      expect(statsAfter.nodeCount).toBe(statsBefore.nodeCount);
			
 
				+    } finally {
			
 
				+      cg.destroy();
			
 
				+    }
			
 
				+  }, 30_000);
			
 
				+});
			
--- a/__tests__/integration/lru-cache.test.ts
+++ b/__tests__/integration/lru-cache.test.ts
@@ -0,0 +1,96 @@
 
				+/**
			
 
				+ * LRUCache unit tests
			
 
				+ *
			
 
				+ * Covers the eviction guarantees that the resolver relies on:
			
 
				+ *   - capacity is enforced (never exceeds max)
			
 
				+ *   - LRU ordering: hot keys survive eviction passes
			
 
				+ *   - has()/get()/set()/clear() behave like the original Map shape
			
 
				+ *   - null values are storable (the fileCache uses null for "failed read")
			
 
				+ */
			
 
				+
			
 
				+import { describe, it, expect } from 'vitest';
			
 
				+import { LRUCache } from '../../src/resolution/lru-cache';
			
 
				+
			
 
				+describe('LRUCache', () => {
			
 
				+  it('enforces capacity by evicting the oldest entry on overflow', () => {
			
 
				+    const cache = new LRUCache<string, number>(3);
			
 
				+    cache.set('a', 1);
			
 
				+    cache.set('b', 2);
			
 
				+    cache.set('c', 3);
			
 
				+    cache.set('d', 4); // evicts 'a'
			
 
				+
			
 
				+    expect(cache.size).toBe(3);
			
 
				+    expect(cache.has('a')).toBe(false);
			
 
				+    expect(cache.get('a')).toBeUndefined();
			
 
				+    expect(cache.get('b')).toBe(2);
			
 
				+    expect(cache.get('c')).toBe(3);
			
 
				+    expect(cache.get('d')).toBe(4);
			
 
				+  });
			
 
				+
			
 
				+  it('promotes touched keys to most-recent so they survive eviction', () => {
			
 
				+    const cache = new LRUCache<string, number>(3);
			
 
				+    cache.set('a', 1);
			
 
				+    cache.set('b', 2);
			
 
				+    cache.set('c', 3);
			
 
				+
			
 
				+    // Touch 'a' — it should now be most-recent.
			
 
				+    expect(cache.get('a')).toBe(1);
			
 
				+
			
 
				+    cache.set('d', 4); // evicts the LRU, which is now 'b' (not 'a')
			
 
				+
			
 
				+    expect(cache.has('a')).toBe(true);
			
 
				+    expect(cache.has('b')).toBe(false);
			
 
				+    expect(cache.has('c')).toBe(true);
			
 
				+    expect(cache.has('d')).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  it('overwriting an existing key refreshes its recency but does not grow size', () => {
			
 
				+    const cache = new LRUCache<string, number>(2);
			
 
				+    cache.set('a', 1);
			
 
				+    cache.set('b', 2);
			
 
				+    cache.set('a', 99); // 'a' is now most-recent
			
 
				+
			
 
				+    expect(cache.size).toBe(2);
			
 
				+    expect(cache.get('a')).toBe(99);
			
 
				+
			
 
				+    cache.set('c', 3); // should evict 'b', not 'a'
			
 
				+
			
 
				+    expect(cache.has('a')).toBe(true);
			
 
				+    expect(cache.has('b')).toBe(false);
			
 
				+    expect(cache.has('c')).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  it('stores null values (used by the file content cache)', () => {
			
 
				+    const cache = new LRUCache<string, string | null>(2);
			
 
				+    cache.set('missing.ts', null);
			
 
				+    expect(cache.has('missing.ts')).toBe(true);
			
 
				+    expect(cache.get('missing.ts')).toBeNull();
			
 
				+  });
			
 
				+
			
 
				+  it('clear() resets the cache', () => {
			
 
				+    const cache = new LRUCache<string, number>(3);
			
 
				+    cache.set('a', 1);
			
 
				+    cache.set('b', 2);
			
 
				+    cache.clear();
			
 
				+    expect(cache.size).toBe(0);
			
 
				+    expect(cache.has('a')).toBe(false);
			
 
				+  });
			
 
				+
			
 
				+  it('rejects non-positive capacity', () => {
			
 
				+    expect(() => new LRUCache(0)).toThrow();
			
 
				+    expect(() => new LRUCache(-1)).toThrow();
			
 
				+    expect(() => new LRUCache(NaN)).toThrow();
			
 
				+  });
			
 
				+
			
 
				+  it('stays bounded under heavy churn (regression for OOM scenario)', () => {
			
 
				+    const cache = new LRUCache<string, number>(100);
			
 
				+    for (let i = 0; i < 10_000; i++) {
			
 
				+      cache.set(`key${i}`, i);
			
 
				+    }
			
 
				+    expect(cache.size).toBe(100);
			
 
				+    // The last 100 keys should still be present, the rest evicted.
			
 
				+    expect(cache.has('key9999')).toBe(true);
			
 
				+    expect(cache.has('key9900')).toBe(true);
			
 
				+    expect(cache.has('key0')).toBe(false);
			
 
				+  });
			
 
				+});
			
--- a/__tests__/integration/mcp-input-limits.test.ts
+++ b/__tests__/integration/mcp-input-limits.test.ts
@@ -0,0 +1,109 @@
 
				+/**
			
 
				+ * MCP tool input-size limits
			
 
				+ *
			
 
				+ * Regression coverage for the DoS vector: MCP clients can ship
			
 
				+ * unbounded payloads (`query`, `task`, `symbol`, `projectPath`,
			
 
				+ * `path`, `pattern`). Before the cap, a 100MB string would hit
			
 
				+ * the FTS5 layer and pin the server. These tests assert that the
			
 
				+ * tool layer rejects oversize inputs early.
			
 
				+ */
			
 
				+
			
 
				+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
			
 
				+import * as fs from 'fs';
			
 
				+import * as path from 'path';
			
 
				+import * as os from 'os';
			
 
				+import CodeGraph from '../../src/index';
			
 
				+import { ToolHandler } from '../../src/mcp/tools';
			
 
				+
			
 
				+describe('MCP input size limits', () => {
			
 
				+  let tempDir: string;
			
 
				+  let cg: CodeGraph;
			
 
				+  let handler: ToolHandler;
			
 
				+
			
 
				+  beforeEach(async () => {
			
 
				+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-mcp-limits-'));
			
 
				+    fs.mkdirSync(path.join(tempDir, 'src'), { recursive: true });
			
 
				+    fs.writeFileSync(
			
 
				+      path.join(tempDir, 'src', 'a.ts'),
			
 
				+      `export function alpha(): number { return 1; }\n`
			
 
				+    );
			
 
				+    cg = await CodeGraph.init(tempDir, {
			
 
				+      config: { include: ['**/*.ts'], exclude: [] },
			
 
				+    });
			
 
				+    await cg.indexAll();
			
 
				+    handler = new ToolHandler(cg);
			
 
				+  });
			
 
				+
			
 
				+  afterEach(() => {
			
 
				+    if (cg) cg.destroy();
			
 
				+    if (fs.existsSync(tempDir)) {
			
 
				+      fs.rmSync(tempDir, { recursive: true, force: true });
			
 
				+    }
			
 
				+  });
			
 
				+
			
 
				+  it('accepts a normal-sized query', async () => {
			
 
				+    const result = await handler.execute('codegraph_search', { query: 'alpha' });
			
 
				+    expect(result.isError).toBeFalsy();
			
 
				+  });
			
 
				+
			
 
				+  it('rejects an oversize query on codegraph_search', async () => {
			
 
				+    const huge = 'a'.repeat(20_000);
			
 
				+    const result = await handler.execute('codegraph_search', { query: huge });
			
 
				+    expect(result.isError).toBe(true);
			
 
				+    expect(result.content[0]!.text).toMatch(/maximum length/i);
			
 
				+  });
			
 
				+
			
 
				+  it('rejects an oversize task on codegraph_context', async () => {
			
 
				+    const huge = 'b'.repeat(50_000);
			
 
				+    const result = await handler.execute('codegraph_context', { task: huge });
			
 
				+    expect(result.isError).toBe(true);
			
 
				+    expect(result.content[0]!.text).toMatch(/maximum length/i);
			
 
				+  });
			
 
				+
			
 
				+  it('rejects an oversize symbol on codegraph_callers', async () => {
			
 
				+    const huge = 'c'.repeat(15_000);
			
 
				+    const result = await handler.execute('codegraph_callers', { symbol: huge });
			
 
				+    expect(result.isError).toBe(true);
			
 
				+    expect(result.content[0]!.text).toMatch(/maximum length/i);
			
 
				+  });
			
 
				+
			
 
				+  it('rejects an oversize symbol on codegraph_impact', async () => {
			
 
				+    const huge = 'd'.repeat(11_000);
			
 
				+    const result = await handler.execute('codegraph_impact', { symbol: huge });
			
 
				+    expect(result.isError).toBe(true);
			
 
				+    expect(result.content[0]!.text).toMatch(/maximum length/i);
			
 
				+  });
			
 
				+
			
 
				+  it('rejects an oversize projectPath', async () => {
			
 
				+    const hugePath = '/tmp/' + 'x'.repeat(5_000);
			
 
				+    const result = await handler.execute('codegraph_search', {
			
 
				+      query: 'alpha',
			
 
				+      projectPath: hugePath,
			
 
				+    });
			
 
				+    expect(result.isError).toBe(true);
			
 
				+    expect(result.content[0]!.text).toMatch(/projectPath/);
			
 
				+  });
			
 
				+
			
 
				+  it('rejects an oversize path filter on codegraph_files', async () => {
			
 
				+    const hugePath = 'src/' + 'y'.repeat(5_000);
			
 
				+    const result = await handler.execute('codegraph_files', { path: hugePath });
			
 
				+    expect(result.isError).toBe(true);
			
 
				+    expect(result.content[0]!.text).toMatch(/path/);
			
 
				+  });
			
 
				+
			
 
				+  it('rejects an oversize glob pattern on codegraph_files', async () => {
			
 
				+    const hugePattern = '*'.repeat(5_000);
			
 
				+    const result = await handler.execute('codegraph_files', { pattern: hugePattern });
			
 
				+    expect(result.isError).toBe(true);
			
 
				+    expect(result.content[0]!.text).toMatch(/pattern/);
			
 
				+  });
			
 
				+
			
 
				+  it('rejects a non-string projectPath', async () => {
			
 
				+    const result = await handler.execute('codegraph_search', {
			
 
				+      query: 'alpha',
			
 
				+      projectPath: 12345 as unknown as string,
			
 
				+    });
			
 
				+    expect(result.isError).toBe(true);
			
 
				+    expect(result.content[0]!.text).toMatch(/projectPath/);
			
 
				+  });
			
 
				+});
			
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -22,6 +22,22 @@ import { join } from 'path';
 
				 /** Maximum output length to prevent context bloat (characters) */
			
 
				 const MAX_OUTPUT_LENGTH = 15000;
			
 
				 
			
 
				+/**
			
 
				+ * Maximum length for free-form string inputs (query, task, symbol).
			
 
				+ * Bounds memory and CPU when a buggy or hostile MCP client sends a
			
 
				+ * huge payload — without this an attacker could ship a 100MB string
			
 
				+ * and force a full FTS5 scan / OOM the server. 10 000 characters is
			
 
				+ * far beyond any realistic legitimate query.
			
 
				+ */
			
 
				+const MAX_INPUT_LENGTH = 10_000;
			
 
				+
			
 
				+/**
			
 
				+ * Maximum length for path-like string inputs (projectPath, path
			
 
				+ * filter, glob pattern). Paths beyond a few thousand chars are
			
 
				+ * never legitimate and signal abuse or a bug upstream.
			
 
				+ */
			
 
				+const MAX_PATH_LENGTH = 4_096;
			
 
				+
			
 
				 /**
			
 
				  * Rust path roots that have no file-system equivalent — `crate` is the
			
 
				  * current crate, `super` is the parent module, `self` is the current
			
@@ -609,12 +625,46 @@ export class ToolHandler {
 
				   }
			
 
				 
			
 
				   /**
			
 
				-   * Validate that a value is a non-empty string
			
 
				+   * Validate that a value is a non-empty string within length bounds.
			
 
				+   *
			
 
				+   * The `maxLength` cap protects against MCP clients that ship huge
			
 
				+   * payloads (10MB+ query strings either by accident or maliciously).
			
 
				+   * Without this, a single oversized input can pin the FTS5 index or
			
 
				+   * exhaust memory before any real work runs.
			
 
				    */
			
 
				-  private validateString(value: unknown, name: string): string | ToolResult {
			
 
				+  private validateString(
			
 
				+    value: unknown,
			
 
				+    name: string,
			
 
				+    maxLength: number = MAX_INPUT_LENGTH
			
 
				+  ): string | ToolResult {
			
 
				     if (typeof value !== 'string' || value.length === 0) {
			
 
				       return this.errorResult(`${name} must be a non-empty string`);
			
 
				     }
			
 
				+    if (value.length > maxLength) {
			
 
				+      return this.errorResult(
			
 
				+        `${name} exceeds maximum length of ${maxLength} characters (got ${value.length})`
			
 
				+      );
			
 
				+    }
			
 
				+    return value;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Validate an optional path-like string input. Returns the value if
			
 
				+   * valid (or undefined), or a ToolResult with the error.
			
 
				+   */
			
 
				+  private validateOptionalPath(
			
 
				+    value: unknown,
			
 
				+    name: string
			
 
				+  ): string | undefined | ToolResult {
			
 
				+    if (value === undefined || value === null) return undefined;
			
 
				+    if (typeof value !== 'string') {
			
 
				+      return this.errorResult(`${name} must be a string`);
			
 
				+    }
			
 
				+    if (value.length > MAX_PATH_LENGTH) {
			
 
				+      return this.errorResult(
			
 
				+        `${name} exceeds maximum length of ${MAX_PATH_LENGTH} characters (got ${value.length})`
			
 
				+      );
			
 
				+    }
			
 
				     return value;
			
 
				   }
			
 
				 
			
@@ -623,6 +673,25 @@ export class ToolHandler {
 
				    */
			
 
				   async execute(toolName: string, args: Record<string, unknown>): Promise<ToolResult> {
			
 
				     try {
			
 
				+      // Cross-cutting input validation. All tools accept an optional
			
 
				+      // `projectPath` and most accept either `query`, `task`, or
			
 
				+      // `symbol` — bound their lengths centrally so individual handlers
			
 
				+      // can stay focused on tool-specific logic.
			
 
				+      const pathCheck = this.validateOptionalPath(args.projectPath, 'projectPath');
			
 
				+      if (typeof pathCheck === 'object' && pathCheck !== undefined) {
			
 
				+        return pathCheck;
			
 
				+      }
			
 
				+      // The `path` and `pattern` properties used by codegraph_files are
			
 
				+      // also path-shaped — apply the same cap.
			
 
				+      if (args.path !== undefined) {
			
 
				+        const check = this.validateOptionalPath(args.path, 'path');
			
 
				+        if (typeof check === 'object' && check !== undefined) return check;
			
 
				+      }
			
 
				+      if (args.pattern !== undefined) {
			
 
				+        const check = this.validateOptionalPath(args.pattern, 'pattern');
			
 
				+        if (typeof check === 'object' && check !== undefined) return check;
			
 
				+      }
			
 
				+
			
 
				       switch (toolName) {
			
 
				         case 'codegraph_search':
			
 
				           return await this.handleSearch(args);
			
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -22,6 +22,24 @@ import { detectFrameworks } from './frameworks';
 
				 import { loadProjectAliases, type AliasMap } from './path-aliases';
			
 
				 import { logDebug } from '../errors';
			
 
				 import type { ReExport } from './types';
			
 
				+import { LRUCache } from './lru-cache';
			
 
				+
			
 
				+/**
			
 
				+ * Cache size limits. Each per-resolver cache is bounded so memory
			
 
				+ * stays flat on large codebases (20k+ files). Sizes were chosen to
			
 
				+ * cover the working set for typical resolution batches without
			
 
				+ * exceeding a few hundred MB worst-case. Override via the env var
			
 
				+ * `CODEGRAPH_RESOLVER_CACHE_SIZE` (single integer applied to all
			
 
				+ * caches) when tuning for very large or very small projects.
			
 
				+ */
			
 
				+const DEFAULT_CACHE_LIMIT = 5_000;
			
 
				+function resolveCacheLimit(): number {
			
 
				+  const raw = process.env.CODEGRAPH_RESOLVER_CACHE_SIZE;
			
 
				+  if (!raw) return DEFAULT_CACHE_LIMIT;
			
 
				+  const parsed = Number.parseInt(raw, 10);
			
 
				+  if (Number.isFinite(parsed) && parsed > 0) return parsed;
			
 
				+  return DEFAULT_CACHE_LIMIT;
			
 
				+}
			
 
				 
			
 
				 // Re-export types
			
 
				 export * from './types';
			
@@ -121,13 +139,16 @@ export class ReferenceResolver {
 
				   private queries: QueryBuilder;
			
 
				   private context: ResolutionContext;
			
 
				   private frameworks: FrameworkResolver[] = [];
			
 
				-  private nodeCache: Map<string, Node[]> = new Map(); // per-file node cache (bounded)
			
 
				-  private fileCache: Map<string, string | null> = new Map(); // per-file content cache (bounded)
			
 
				-  private importMappingCache: Map<string, ImportMapping[]> = new Map();
			
 
				-  private reExportCache: Map<string, ReExport[]> = new Map();
			
 
				-  private nameCache: Map<string, Node[]> = new Map(); // name → nodes cache
			
 
				-  private lowerNameCache: Map<string, Node[]> = new Map(); // lower(name) → nodes cache
			
 
				-  private qualifiedNameCache: Map<string, Node[]> = new Map(); // qualified_name → nodes cache
			
 
				+  // All per-resolver caches are LRU-bounded. Previously these were
			
 
				+  // unbounded Maps that grew with every distinct lookup and OOM'd on
			
 
				+  // codebases with 20k+ files (see issue: unbounded cache growth).
			
 
				+  private nodeCache: LRUCache<string, Node[]>; // per-file node cache
			
 
				+  private fileCache: LRUCache<string, string | null>; // per-file content cache
			
 
				+  private importMappingCache: LRUCache<string, ImportMapping[]>;
			
 
				+  private reExportCache: LRUCache<string, ReExport[]>;
			
 
				+  private nameCache: LRUCache<string, Node[]>; // name → nodes cache
			
 
				+  private lowerNameCache: LRUCache<string, Node[]>; // lower(name) → nodes cache
			
 
				+  private qualifiedNameCache: LRUCache<string, Node[]>; // qualified_name → nodes cache
			
 
				   private knownNames: Set<string> | null = null; // all known symbol names for fast pre-filtering
			
 
				   private knownFiles: Set<string> | null = null;
			
 
				   private cachesWarmed = false;
			
@@ -139,6 +160,19 @@ export class ReferenceResolver {
 
				   constructor(projectRoot: string, queries: QueryBuilder) {
			
 
				     this.projectRoot = projectRoot;
			
 
				     this.queries = queries;
			
 
				+
			
 
				+    const limit = resolveCacheLimit();
			
 
				+    // The content cache is heavier (full file text), so we give it a
			
 
				+    // smaller budget than the metadata caches.
			
 
				+    const contentLimit = Math.max(64, Math.floor(limit / 5));
			
 
				+    this.nodeCache = new LRUCache(limit);
			
 
				+    this.fileCache = new LRUCache(contentLimit);
			
 
				+    this.importMappingCache = new LRUCache(limit);
			
 
				+    this.reExportCache = new LRUCache(limit);
			
 
				+    this.nameCache = new LRUCache(limit);
			
 
				+    this.lowerNameCache = new LRUCache(limit);
			
 
				+    this.qualifiedNameCache = new LRUCache(limit);
			
 
				+
			
 
				     this.context = this.createContext();
			
 
				   }
			
 
				 
			
--- a/src/resolution/lru-cache.ts
+++ b/src/resolution/lru-cache.ts
@@ -0,0 +1,62 @@
 
				+/**
			
 
				+ * Simple LRU cache backed by JavaScript's insertion-ordered Map.
			
 
				+ *
			
 
				+ * Used by ReferenceResolver to bound the per-resolver caches that
			
 
				+ * previously grew without limit and OOM'd on large codebases (20k+
			
 
				+ * files). Each cache is sized independently — see `index.ts` for
			
 
				+ * the chosen limits per cache type.
			
 
				+ *
			
 
				+ * Eviction is plain LRU: on `set`, if the cache is full, the
			
 
				+ * least-recently-used entry (the first one in iteration order) is
			
 
				+ * evicted. Touching via `get` moves the entry to the most-recently-used
			
 
				+ * position so hot keys survive eviction passes.
			
 
				+ */
			
 
				+export class LRUCache<K, V> {
			
 
				+  private readonly max: number;
			
 
				+  private readonly store = new Map<K, V>();
			
 
				+
			
 
				+  constructor(max: number) {
			
 
				+    if (!Number.isFinite(max) || max <= 0) {
			
 
				+      throw new Error(`LRUCache max must be a positive finite number, got ${max}`);
			
 
				+    }
			
 
				+    this.max = Math.floor(max);
			
 
				+  }
			
 
				+
			
 
				+  get size(): number {
			
 
				+    return this.store.size;
			
 
				+  }
			
 
				+
			
 
				+  get(key: K): V | undefined {
			
 
				+    const value = this.store.get(key);
			
 
				+    if (value === undefined) {
			
 
				+      // Distinguish "missing" from "stored undefined" by checking has().
			
 
				+      // We don't store undefined in practice, but be defensive.
			
 
				+      return this.store.has(key) ? value : undefined;
			
 
				+    }
			
 
				+    // Refresh recency by re-inserting.
			
 
				+    this.store.delete(key);
			
 
				+    this.store.set(key, value);
			
 
				+    return value;
			
 
				+  }
			
 
				+
			
 
				+  has(key: K): boolean {
			
 
				+    return this.store.has(key);
			
 
				+  }
			
 
				+
			
 
				+  set(key: K, value: V): void {
			
 
				+    if (this.store.has(key)) {
			
 
				+      this.store.delete(key);
			
 
				+    } else if (this.store.size >= this.max) {
			
 
				+      // Evict the oldest entry — first key in iteration order.
			
 
				+      const oldest = this.store.keys().next().value;
			
 
				+      if (oldest !== undefined) {
			
 
				+        this.store.delete(oldest);
			
 
				+      }
			
 
				+    }
			
 
				+    this.store.set(key, value);
			
 
				+  }
			
 
				+
			
 
				+  clear(): void {
			
 
				+    this.store.clear();
			
 
				+  }
			
 
				+}