il y a 14 heures · dfe13b03c8
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
				 
			
 
				 ## [Unreleased]
			
 
				 
			
 
				+### New Features
			
 
				+
			
 
				+- CodeGraph no longer times out when many agents query it at once. The shared background server that serves all your editor and agent sessions used to run every query on a single thread, so a burst of concurrent requests — for example a swarm of subagents exploring a large monorepo together — queued up behind one another and, while the heavy ones ran, froze the connection so finished answers couldn't even be sent back until the whole batch drained. Past a handful of simultaneous callers that routinely surfaced as MCP request timeouts. The shared server now answers queries across a pool of worker threads, so concurrent requests run in parallel and the connection stays responsive the whole time; when it's genuinely saturated a call returns a brief "busy, retry shortly" note (not an error) instead of hanging past your client's timeout. The pool sizes itself to your machine — roughly one worker per core, leaving one for coordination — and a single editor session is unaffected (no pool, no overhead). Set `CODEGRAPH_QUERY_POOL_SIZE` to choose a specific number of workers, or `0` to revert to single-threaded in-process queries.
			
 
				+
			
 
				 
			
 
				 ## [1.1.1] - 2026-06-24
			
 
				 
			
--- a/__tests__/query-pool.test.ts
+++ b/__tests__/query-pool.test.ts
@@ -0,0 +1,174 @@
 
				+/**
			
 
				+ * QueryPool — the off-loop worker pool that keeps the shared daemon's main
			
 
				+ * event loop free for the MCP transport under concurrent read load (the
			
 
				+ * "10 subagents time out" report). These tests drive the pool's queue / growth /
			
 
				+ * crash-recovery / backstop logic with INJECTED fake workers, so they exercise
			
 
				+ * the real scheduling code without spawning threads or needing a built dist.
			
 
				+ *
			
 
				+ * End-to-end behavior with real worker threads (a worker opens its own WAL read
			
 
				+ * connection and runs codegraph_explore) is validated separately against a real
			
 
				+ * index; here we pin the orchestration that makes that safe and fair.
			
 
				+ */
			
 
				+import { describe, it, expect } from 'vitest';
			
 
				+import { QueryPool, resolvePoolSize, type PoolWorker } from '../src/mcp/query-pool';
			
 
				+import type { ToolResult } from '../src/mcp/tools';
			
 
				+
			
 
				+const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
			
 
				+
			
 
				+interface CallMsg { type: 'call'; id: number; toolName: string; args: Record<string, unknown> }
			
 
				+type Action = { result: ToolResult } | { crash: true } | { hang: true } | { wait: Promise<ToolResult> };
			
 
				+
			
 
				+/**
			
 
				+ * Fake worker speaking the same {type:'ready'|'result'} protocol as the real
			
 
				+ * one. `behavior` decides per call whether to return a result, crash (exit≠0),
			
 
				+ * hang (never reply — exercises the backstop), or wait on a promise (lets a test
			
 
				+ * hold a call in-flight to observe concurrency). Emits 'ready' on a macrotask so
			
 
				+ * the pool has wired its listeners first.
			
 
				+ */
			
 
				+class FakeWorker implements PoolWorker {
			
 
				+  private msgCb?: (m: unknown) => void;
			
 
				+  private exitCb?: (code: number) => void;
			
 
				+  alive = true;
			
 
				+  constructor(private behavior: (m: CallMsg) => Action, readyOk = true) {
			
 
				+    setTimeout(() => { if (this.alive) this.msgCb?.({ type: 'ready', ok: readyOk }); }, 0);
			
 
				+  }
			
 
				+  on(event: string, cb: (...args: any[]) => void): void {
			
 
				+    if (event === 'message') this.msgCb = cb;
			
 
				+    else if (event === 'exit') this.exitCb = cb;
			
 
				+    // 'error' unused by the fakes
			
 
				+  }
			
 
				+  private reply(id: number, result: ToolResult): void {
			
 
				+    if (this.alive) this.msgCb?.({ type: 'result', id, result });
			
 
				+  }
			
 
				+  postMessage(msg: unknown): void {
			
 
				+    const m = msg as CallMsg;
			
 
				+    if (!m || m.type !== 'call') return;
			
 
				+    const action = this.behavior(m);
			
 
				+    if ('crash' in action) {
			
 
				+      this.alive = false;
			
 
				+      setTimeout(() => this.exitCb?.(13), 0); // simulate a crash exit
			
 
				+      return;
			
 
				+    }
			
 
				+    if ('hang' in action) return; // never reply
			
 
				+    if ('wait' in action) { void action.wait.then((r) => this.reply(m.id, r)); return; }
			
 
				+    setTimeout(() => this.reply(m.id, action.result), 0);
			
 
				+  }
			
 
				+  terminate(): Promise<number> { this.alive = false; return Promise.resolve(0); }
			
 
				+}
			
 
				+
			
 
				+const ok = (text: string): ToolResult => ({ content: [{ type: 'text', text }] });
			
 
				+
			
 
				+describe('resolvePoolSize', () => {
			
 
				+  it('honors a numeric override and disables on 0', () => {
			
 
				+    expect(resolvePoolSize('0', 8)).toBe(0);
			
 
				+    expect(resolvePoolSize('3', 8)).toBe(3);
			
 
				+  });
			
 
				+  it('caps the override at the hard ceiling', () => {
			
 
				+    expect(resolvePoolSize('999', 8)).toBe(16);
			
 
				+  });
			
 
				+  it('defaults to clamp(cores-1, 1, 16) when unset/blank/non-numeric', () => {
			
 
				+    expect(resolvePoolSize(undefined, 8)).toBe(7);
			
 
				+    expect(resolvePoolSize('', 8)).toBe(7);
			
 
				+    expect(resolvePoolSize('abc', 8)).toBe(7);
			
 
				+    expect(resolvePoolSize(undefined, 1)).toBe(1);   // never zero
			
 
				+    expect(resolvePoolSize(undefined, 64)).toBe(16); // never above the ceiling
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+describe('QueryPool', () => {
			
 
				+  it('dispatches a call and returns the worker result', async () => {
			
 
				+    const pool = new QueryPool({ root: '/x', size: 1, createWorker: () => new FakeWorker((m) => ({ result: ok(`r:${m.toolName}`) })) });
			
 
				+    const res = await pool.run('codegraph_explore', { query: 'q' });
			
 
				+    expect(res.content[0].text).toBe('r:codegraph_explore');
			
 
				+    await pool.destroy();
			
 
				+  });
			
 
				+
			
 
				+  it('runs N concurrent calls in parallel (not serialized)', async () => {
			
 
				+    let active = 0, maxActive = 0;
			
 
				+    let release!: () => void;
			
 
				+    const gate = new Promise<void>((r) => { release = r; });
			
 
				+    // Each call holds in-flight until the gate opens, so max concurrency across
			
 
				+    // the pool is observable: with size=5 and 5 calls, all 5 should run at once.
			
 
				+    const behavior = (m: CallMsg): Action => ({
			
 
				+      wait: (async () => {
			
 
				+        active++; maxActive = Math.max(maxActive, active);
			
 
				+        await gate;
			
 
				+        active--;
			
 
				+        return ok(`r${m.id}`);
			
 
				+      })(),
			
 
				+    });
			
 
				+    const pool = new QueryPool({ root: '/x', size: 5, createWorker: () => new FakeWorker(behavior) });
			
 
				+    const calls = Promise.all(Array.from({ length: 5 }, (_, i) => pool.run('codegraph_search', { i })));
			
 
				+    await sleep(40); // let all workers spawn (cold-start cap → a few generations) + dispatch
			
 
				+    expect(maxActive).toBe(5);
			
 
				+    release();
			
 
				+    const results = await calls;
			
 
				+    expect(results.every((r) => /^r\d+$/.test(r.content[0].text))).toBe(true);
			
 
				+    await pool.destroy();
			
 
				+  });
			
 
				+
			
 
				+  it('does not spawn the whole pool for a single call (pending-aware growth)', async () => {
			
 
				+    let created = 0;
			
 
				+    const pool = new QueryPool({ root: '/x', size: 8, createWorker: () => { created++; return new FakeWorker((m) => ({ result: ok(`r${m.id}`) })); } });
			
 
				+    await pool.run('codegraph_node', { symbol: 's' });
			
 
				+    // One eager worker + at most the cold-start cap — never all 8.
			
 
				+    expect(created).toBeLessThanOrEqual(2);
			
 
				+    await pool.destroy();
			
 
				+  });
			
 
				+
			
 
				+  it('recovers from a worker crash: retries the in-flight call and respawns', async () => {
			
 
				+    let calls = 0;
			
 
				+    const pool = new QueryPool({
			
 
				+      root: '/x', size: 2, maxRetries: 1,
			
 
				+      // First dispatch crashes its worker; the retry (on a respawn/other worker) succeeds.
			
 
				+      createWorker: () => new FakeWorker((m) => (++calls === 1 ? { crash: true } : { result: ok(`recovered:${m.id}`) })),
			
 
				+    });
			
 
				+    const res = await pool.run('codegraph_explore', { query: 'q' });
			
 
				+    expect(res.isError).toBeFalsy();
			
 
				+    expect(res.content[0].text).toBe('recovered:1');
			
 
				+    await sleep(10);
			
 
				+    // The pool grows lazily, so one call keeps one worker — but the crash must
			
 
				+    // have been replaced (not dropped to zero) and the pool stays healthy and
			
 
				+    // keeps serving.
			
 
				+    expect(pool.liveWorkers).toBeGreaterThanOrEqual(1);
			
 
				+    expect(pool.healthy).toBe(true);
			
 
				+    const again = await pool.run('codegraph_node', { symbol: 's' });
			
 
				+    expect(again.isError).toBeFalsy();
			
 
				+    await pool.destroy();
			
 
				+  });
			
 
				+
			
 
				+  it('fails a poison call gracefully without wedging the pool', async () => {
			
 
				+    // This specific call always crashes its worker; a normal call still works.
			
 
				+    const poison = (m: CallMsg) => m.toolName === 'codegraph_explore';
			
 
				+    const pool = new QueryPool({
			
 
				+      root: '/x', size: 3, maxRetries: 1,
			
 
				+      createWorker: () => new FakeWorker((m) => (poison(m) ? { crash: true } : { result: ok(`ok:${m.id}`) })),
			
 
				+    });
			
 
				+    const bad = await pool.run('codegraph_explore', { query: 'boom' });
			
 
				+    expect(bad.isError).toBe(true); // graceful, after retries
			
 
				+    const good = await pool.run('codegraph_search', { query: 'fine' });
			
 
				+    expect(good.isError).toBeFalsy();
			
 
				+    expect(good.content[0].text).toMatch(/^ok:/);
			
 
				+    await pool.destroy();
			
 
				+  });
			
 
				+
			
 
				+  it('graceful backstop: a call that can\'t be served in time gets success-shaped busy guidance', async () => {
			
 
				+    // 1 worker, every call hangs; soft-timeout small → the caller gets guidance,
			
 
				+    // never a hard error, never a hang.
			
 
				+    const pool = new QueryPool({ root: '/x', size: 1, softTimeoutMs: 60, createWorker: () => new FakeWorker(() => ({ hang: true })) });
			
 
				+    const res = await pool.run('codegraph_explore', { query: 'q' });
			
 
				+    expect(res.isError).toBeFalsy();            // NOT an error (abandonment rule)
			
 
				+    expect(res.content[0].text).toMatch(/busy|retry/i);
			
 
				+    await pool.destroy();
			
 
				+  });
			
 
				+
			
 
				+  it('destroy settles outstanding calls instead of hanging', async () => {
			
 
				+    const pool = new QueryPool({ root: '/x', size: 1, softTimeoutMs: 10_000, createWorker: () => new FakeWorker(() => ({ hang: true })) });
			
 
				+    const pending = pool.run('codegraph_explore', { query: 'q' });
			
 
				+    await sleep(5);
			
 
				+    await pool.destroy();
			
 
				+    const res = await pending; // must resolve, not hang
			
 
				+    expect(res.isError).toBe(true);
			
 
				+    expect(pool.healthy).toBe(false);
			
 
				+  });
			
 
				+});
			
--- a/scripts/agent-eval/repro-concurrent-explore.mjs
+++ b/scripts/agent-eval/repro-concurrent-explore.mjs
@@ -0,0 +1,119 @@
 
				+#!/usr/bin/env node
			
 
				+// Reproduction harness A — does the shared daemon serialize concurrent explore?
			
 
				+//
			
 
				+// Mirrors the daemon's reality: ONE CodeGraph + ONE ToolHandler (as MCPEngine
			
 
				+// shares across all sessions), then fires N concurrent codegraph_explore calls
			
 
				+// and measures:
			
 
				+//   - each call's wall-clock latency + completion order
			
 
				+//   - an event-loop HEARTBEAT (setInterval 50ms): the max gap between ticks is a
			
 
				+//     direct measure of how long synchronous compute blocked the loop. In the
			
 
				+//     real daemon a blocked loop can't flush a finished response or read the
			
 
				+//     next request, so this gap is what starves the MCP transport.
			
 
				+//
			
 
				+// Usage: node repro-concurrent-explore.mjs <repo-with-.codegraph> <N> [timeoutMs]
			
 
				+import { pathToFileURL } from 'node:url';
			
 
				+import { resolve } from 'node:path';
			
 
				+import { performance } from 'node:perf_hooks';
			
 
				+
			
 
				+const [, , repo, nRaw, timeoutRaw] = process.argv;
			
 
				+if (!repo) {
			
 
				+  console.error('usage: repro-concurrent-explore.mjs <repo> <N=10> [timeoutMs=60000]');
			
 
				+  process.exit(1);
			
 
				+}
			
 
				+const N = Number(nRaw) || 10;
			
 
				+const TIMEOUT_MS = Number(timeoutRaw) || 60000; // ~ MCP SDK default request timeout
			
 
				+
			
 
				+const load = async (rel) => import(pathToFileURL(resolve(rel)).href);
			
 
				+const idx = await load('dist/index.js');
			
 
				+const tools = await load('dist/mcp/tools.js');
			
 
				+const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
			
 
				+const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
			
 
				+
			
 
				+// Distinct queries so no two calls are trivially identical. Mix of NL questions
			
 
				+// (exercise FTS + RWR over the whole graph) — the expensive explore path.
			
 
				+const QUERIES = [
			
 
				+  'how does the text model handle edits and undo',
			
 
				+  'how does the file service watch for changes on disk',
			
 
				+  'how does the keybinding service resolve a chord to a command',
			
 
				+  'how does the extension host activate an extension',
			
 
				+  'how does the editor render decorations in the viewport',
			
 
				+  'how does the search service stream results to the UI',
			
 
				+  'how does the terminal process manager spawn a shell',
			
 
				+  'how does the configuration service merge user and workspace settings',
			
 
				+  'how does the debug adapter forward breakpoints to the runtime',
			
 
				+  'how does the quick input widget filter its items',
			
 
				+  'how does the notification service queue and show toasts',
			
 
				+  'how does the git extension compute the diff for a file',
			
 
				+  'how does the language features registry dispatch a hover request',
			
 
				+  'how does the workbench layout restore editor groups on reload',
			
 
				+  'how does the storage service persist state between sessions',
			
 
				+  'how does the menu service build a context menu from contributions',
			
 
				+];
			
 
				+
			
 
				+const cg = CodeGraph.openSync(repo);
			
 
				+let fileCount = 0;
			
 
				+try { fileCount = cg.getStats().fileCount; } catch {}
			
 
				+const handler = new ToolHandler(cg);
			
 
				+
			
 
				+// --- event-loop heartbeat ---
			
 
				+let lastTick = performance.now();
			
 
				+let maxGap = 0;
			
 
				+const gaps = [];
			
 
				+const hb = setInterval(() => {
			
 
				+  const now = performance.now();
			
 
				+  const gap = now - lastTick;
			
 
				+  lastTick = now;
			
 
				+  if (gap > 60) gaps.push(Math.round(gap)); // expected ~50ms; record stalls
			
 
				+  if (gap > maxGap) maxGap = gap;
			
 
				+}, 50);
			
 
				+
			
 
				+function runOne(i) {
			
 
				+  const q = QUERIES[i % QUERIES.length];
			
 
				+  const startedAt = performance.now();
			
 
				+  let timer;
			
 
				+  const timeout = new Promise((res) => {
			
 
				+    timer = setTimeout(() => res({ timedOut: true }), TIMEOUT_MS);
			
 
				+  });
			
 
				+  const work = handler
			
 
				+    .execute('codegraph_explore', { query: q })
			
 
				+    .then((r) => ({ ok: !r.isError, chars: r.content?.[0]?.text?.length ?? 0 }))
			
 
				+    .catch((e) => ({ ok: false, err: String(e?.message ?? e) }));
			
 
				+  return Promise.race([work, timeout]).then((r) => {
			
 
				+    clearTimeout(timer);
			
 
				+    return { i, q, ms: Math.round(performance.now() - startedAt), ...r };
			
 
				+  });
			
 
				+}
			
 
				+
			
 
				+// Baseline: one warm single call (so the first-call cold paths don't skew N).
			
 
				+const warmStart = performance.now();
			
 
				+await runOne(0);
			
 
				+const warmMs = Math.round(performance.now() - warmStart);
			
 
				+
			
 
				+// Reset heartbeat stats for the concurrent run.
			
 
				+gaps.length = 0; maxGap = 0; lastTick = performance.now();
			
 
				+
			
 
				+const batchStart = performance.now();
			
 
				+const results = await Promise.all(Array.from({ length: N }, (_, i) => runOne(i)));
			
 
				+const batchMs = Math.round(performance.now() - batchStart);
			
 
				+clearInterval(hb);
			
 
				+
			
 
				+const lat = results.map((r) => r.ms).sort((a, b) => a - b);
			
 
				+const timeouts = results.filter((r) => r.timedOut).length;
			
 
				+const p = (q) => lat[Math.min(lat.length - 1, Math.floor(q * lat.length))];
			
 
				+
			
 
				+console.log('='.repeat(64));
			
 
				+console.log(`repo=${repo}`);
			
 
				+console.log(`fileCount=${fileCount}  N=${N}  perCallTimeout=${TIMEOUT_MS}ms`);
			
 
				+console.log(`single warm explore: ${warmMs}ms`);
			
 
				+console.log('-'.repeat(64));
			
 
				+console.log(`concurrent batch wall-clock: ${batchMs}ms`);
			
 
				+console.log(`per-call latency  min=${lat[0]}  p50=${p(0.5)}  p90=${p(0.9)}  max=${lat[lat.length - 1]}  (ms)`);
			
 
				+console.log(`TIMEOUTS (>${TIMEOUT_MS}ms): ${timeouts} / ${N}`);
			
 
				+console.log(`event-loop max stall: ${Math.round(maxGap)}ms   stalls>60ms: ${gaps.length}`);
			
 
				+console.log(`  sum of stalls: ${gaps.reduce((a, b) => a + b, 0)}ms   biggest 5: ${gaps.sort((a,b)=>b-a).slice(0,5).join(', ')}`);
			
 
				+console.log('-'.repeat(64));
			
 
				+console.log('SERIALIZATION CHECK:');
			
 
				+console.log(`  if serialized, batch ≈ N×single = ~${N * warmMs}ms;  actual=${batchMs}ms  (ratio ${(batchMs / (N * warmMs)).toFixed(2)})`);
			
 
				+console.log(`  max latency / single = ${(lat[lat.length - 1] / warmMs).toFixed(1)}× (≈N means last call waited for all others)`);
			
 
				+console.log('='.repeat(64));
			
 
				+try { cg.close?.(); } catch {}
			
--- a/scripts/agent-eval/repro-daemon-clients.mjs
+++ b/scripts/agent-eval/repro-daemon-clients.mjs
@@ -0,0 +1,125 @@
 
				+#!/usr/bin/env node
			
 
				+// Reproduction harness B — the FAITHFUL opencode scenario.
			
 
				+//
			
 
				+// Spawns N real `codegraph serve --mcp --path <repo>` processes (each becomes a
			
 
				+// proxy that attaches to ONE shared daemon — exactly what opencode does with N
			
 
				+// subagents), drives clean MCP JSON-RPC over each child's stdio, then fires ONE
			
 
				+// concurrent wave of codegraph_explore tools/call across all N and measures
			
 
				+// end-to-end latency + timeouts. This captures transport-flush starvation: a
			
 
				+// daemon event-loop blocked in synchronous explore compute can neither read the
			
 
				+// next request nor flush a finished response.
			
 
				+//
			
 
				+// Usage: node repro-daemon-clients.mjs <repo> <N=10> [perCallTimeoutMs=60000] [warm=1]
			
 
				+import { spawn } from 'node:child_process';
			
 
				+import { performance } from 'node:perf_hooks';
			
 
				+import { resolve } from 'node:path';
			
 
				+
			
 
				+const [, , repoRaw, nRaw, timeoutRaw, warmRaw] = process.argv;
			
 
				+const repo = resolve(repoRaw || '.');
			
 
				+const N = Number(nRaw) || 10;
			
 
				+const TIMEOUT_MS = Number(timeoutRaw) || 60000;
			
 
				+const WARM = warmRaw === undefined ? true : warmRaw !== '0';
			
 
				+const CLI = resolve('dist/bin/codegraph.js');
			
 
				+
			
 
				+const QUERIES = [
			
 
				+  'how does the text model handle edits and undo',
			
 
				+  'how does the file service watch for changes on disk',
			
 
				+  'how does the keybinding service resolve a chord to a command',
			
 
				+  'how does the extension host activate an extension',
			
 
				+  'how does the editor render decorations in the viewport',
			
 
				+  'how does the search service stream results to the UI',
			
 
				+  'how does the terminal process manager spawn a shell',
			
 
				+  'how does the configuration service merge user and workspace settings',
			
 
				+  'how does the debug adapter forward breakpoints to the runtime',
			
 
				+  'how does the quick input widget filter its items',
			
 
				+  'how does the notification service queue and show toasts',
			
 
				+  'how does the git extension compute the diff for a file',
			
 
				+];
			
 
				+
			
 
				+function makeClient(id) {
			
 
				+  const child = spawn('node', [CLI, 'serve', '--mcp', '--path', repo], {
			
 
				+    env: { ...process.env, CODEGRAPH_TELEMETRY: '0', DO_NOT_TRACK: '1', CODEGRAPH_MCP_LOG_ATTACH: '0' },
			
 
				+    stdio: ['pipe', 'pipe', 'inherit'],
			
 
				+  });
			
 
				+  let buf = '';
			
 
				+  const waiters = new Map(); // id -> resolve
			
 
				+  child.stdout.setEncoding('utf8');
			
 
				+  child.stdout.on('data', (chunk) => {
			
 
				+    buf += chunk;
			
 
				+    let idx;
			
 
				+    while ((idx = buf.indexOf('\n')) !== -1) {
			
 
				+      const line = buf.slice(0, idx).trim();
			
 
				+      buf = buf.slice(idx + 1);
			
 
				+      if (!line) continue;
			
 
				+      let msg; try { msg = JSON.parse(line); } catch { continue; }
			
 
				+      if (msg.id !== undefined && waiters.has(msg.id)) {
			
 
				+        waiters.get(msg.id)(msg);
			
 
				+        waiters.delete(msg.id);
			
 
				+      }
			
 
				+    }
			
 
				+  });
			
 
				+  const send = (obj) => child.stdin.write(JSON.stringify(obj) + '\n');
			
 
				+  const request = (method, params, rpcId, timeoutMs) =>
			
 
				+    new Promise((res) => {
			
 
				+      let timer;
			
 
				+      if (timeoutMs) timer = setTimeout(() => { waiters.delete(rpcId); res({ __timeout: true }); }, timeoutMs);
			
 
				+      waiters.set(rpcId, (m) => { if (timer) clearTimeout(timer); res(m); });
			
 
				+      send({ jsonrpc: '2.0', id: rpcId, method, params });
			
 
				+    });
			
 
				+  return { id, child, send, request };
			
 
				+}
			
 
				+
			
 
				+const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
			
 
				+
			
 
				+const clients = Array.from({ length: N }, (_, i) => makeClient(i));
			
 
				+
			
 
				+// Initialize every client (handshake is answered locally by each proxy, instant).
			
 
				+await Promise.all(clients.map((c) =>
			
 
				+  c.request('initialize', { protocolVersion: '2024-11-05', capabilities: {}, clientInfo: { name: 'repro', version: '1' } }, `init-${c.id}`, 10000)
			
 
				+    .then(() => c.send({ jsonrpc: '2.0', method: 'initialized' }))
			
 
				+));
			
 
				+
			
 
				+// Warm the daemon: one explore through client 0 forces daemon spawn + project
			
 
				+// open + catch-up gate to complete, so the concurrent wave measures the STEADY
			
 
				+// state (the user's real scenario after the first call), not cold start.
			
 
				+if (WARM) {
			
 
				+  process.stderr.write('[repro] warming daemon (first explore triggers spawn+open+catchup)...\n');
			
 
				+  const t0 = performance.now();
			
 
				+  const r = await clients[0].request('tools/call', { name: 'codegraph_explore', arguments: { query: QUERIES[0] } }, 'warm-0', 120000);
			
 
				+  process.stderr.write(`[repro] warm explore took ${Math.round(performance.now() - t0)}ms (timeout=${!!r.__timeout})\n`);
			
 
				+  await sleep(500);
			
 
				+}
			
 
				+
			
 
				+// THE WAVE: fire one explore on every client as simultaneously as possible.
			
 
				+process.stderr.write(`[repro] firing ${N} concurrent explores...\n`);
			
 
				+const waveStart = performance.now();
			
 
				+const results = await Promise.all(clients.map((c, i) => {
			
 
				+  const started = performance.now();
			
 
				+  return c.request('tools/call', { name: 'codegraph_explore', arguments: { query: QUERIES[i % QUERIES.length] } }, `call-${c.id}`, TIMEOUT_MS)
			
 
				+    .then((m) => ({
			
 
				+      id: c.id,
			
 
				+      ms: Math.round(performance.now() - started),
			
 
				+      timedOut: !!m.__timeout,
			
 
				+      ok: !!m.result && !m.result.isError,
			
 
				+      chars: m.result?.content?.[0]?.text?.length ?? 0,
			
 
				+    }));
			
 
				+}));
			
 
				+const waveMs = Math.round(performance.now() - waveStart);
			
 
				+
			
 
				+const lat = results.map((r) => r.ms).sort((a, b) => a - b);
			
 
				+const timeouts = results.filter((r) => r.timedOut).length;
			
 
				+const p = (q) => lat[Math.min(lat.length - 1, Math.floor(q * lat.length))];
			
 
				+
			
 
				+console.log('='.repeat(64));
			
 
				+console.log(`HARNESS B (real daemon + ${N} proxies)   repo=${repo}`);
			
 
				+console.log(`warm=${WARM}  perCallTimeout=${TIMEOUT_MS}ms`);
			
 
				+console.log('-'.repeat(64));
			
 
				+console.log(`wave wall-clock: ${waveMs}ms`);
			
 
				+console.log(`per-call latency  min=${lat[0]}  p50=${p(0.5)}  p90=${p(0.9)}  max=${lat[lat.length - 1]}  (ms)`);
			
 
				+console.log(`TIMEOUTS (>${TIMEOUT_MS}ms): ${timeouts} / ${N}`);
			
 
				+console.log(`completion order (id:ms): ${results.slice().sort((a,b)=>a.ms-b.ms).map(r=>`${r.id}:${r.ms}`).join('  ')}`);
			
 
				+console.log('='.repeat(64));
			
 
				+
			
 
				+for (const c of clients) { try { c.child.stdin.end(); c.child.kill('SIGTERM'); } catch {} }
			
 
				+await sleep(300);
			
 
				+process.exit(0);
			
--- a/src/mcp/daemon.ts
+++ b/src/mcp/daemon.ts
@@ -148,7 +148,12 @@ export class Daemon {
 
				     this.pidPath = getDaemonPidPath(projectRoot);
			
 
				     this.idleTimeoutMs = opts.idleTimeoutMs ?? resolveIdleTimeoutMs();
			
 
				     this.maxIdleMs = opts.maxIdleMs ?? resolveMaxIdleMs();
			
 
				-    this.engine = new MCPEngine();
			
 
				+    // Daemon mode serves many concurrent clients on one event loop, so off-load
			
 
				+    // read-tool dispatch to a worker pool — otherwise concurrent explores
			
 
				+    // serialize and starve the MCP transport (clients time out). Direct mode
			
 
				+    // (one stdio client) leaves the pool off; `CODEGRAPH_QUERY_POOL_SIZE=0`
			
 
				+    // disables it here too.
			
 
				+    this.engine = new MCPEngine({ queryPool: true });
			
 
				     this.engine.setProjectPathHint(projectRoot);
			
 
				   }
			
 
				 
			
--- a/src/mcp/engine.ts
+++ b/src/mcp/engine.ts
@@ -10,10 +10,12 @@
 
				  *   inotify watch set — that's the entire point of issue #411.
			
 
				  */
			
 
				 
			
 
				+import * as os from 'os';
			
 
				 import type CodeGraph from '../index';
			
 
				 import { findNearestCodeGraphRoot } from '../directory';
			
 
				 import { watchDisabledReason } from '../sync';
			
 
				 import { ToolHandler } from './tools';
			
 
				+import { QueryPool, resolvePoolSize } from './query-pool';
			
 
				 
			
 
				 // Lazy-load the heavy CodeGraph chain (sqlite + query/graph/context layers) OFF
			
 
				 // the MCP startup path. It's only needed once a tool actually opens a project —
			
@@ -31,6 +33,15 @@ export interface MCPEngineOptions {
 
				    * cheap. Honors {@link watchDisabledReason} regardless.
			
 
				    */
			
 
				   watch?: boolean;
			
 
				+  /**
			
 
				+   * Whether to off-load read-tool dispatch to a worker-thread pool. Only the
			
 
				+   * SHARED daemon wants this — it serves many concurrent clients on one event
			
 
				+   * loop, so without a pool concurrent explores serialize and starve the MCP
			
 
				+   * transport. Direct mode (one stdio client, no concurrency) leaves it off so a
			
 
				+   * single call never pays a worker round-trip. `CODEGRAPH_QUERY_POOL_SIZE=0`
			
 
				+   * disables it even in daemon mode.
			
 
				+   */
			
 
				+  queryPool?: boolean;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -51,12 +62,39 @@ export class MCPEngine {
 
				   private watcherStarted = false;
			
 
				   private opts: Required<MCPEngineOptions>;
			
 
				   private closed = false;
			
 
				+  // Off-loop read-tool pool (daemon mode only). Created lazily once the default
			
 
				+  // project is open — workers each hold their own WAL read connection.
			
 
				+  private queryPool: QueryPool | null = null;
			
 
				 
			
 
				   constructor(opts: MCPEngineOptions = {}) {
			
 
				-    this.opts = { watch: opts.watch ?? true };
			
 
				+    this.opts = { watch: opts.watch ?? true, queryPool: opts.queryPool ?? false };
			
 
				     this.toolHandler = new ToolHandler(null);
			
 
				   }
			
 
				 
			
 
				+  /**
			
 
				+   * Start the worker-thread query pool once a default project is open (daemon
			
 
				+   * mode only; honors `CODEGRAPH_QUERY_POOL_SIZE`). Idempotent and best-effort:
			
 
				+   * if workers can't spawn on this platform the ToolHandler keeps serving reads
			
 
				+   * in-process, so the pool can only help, never break, tool calls.
			
 
				+   */
			
 
				+  private maybeStartPool(root: string): void {
			
 
				+    if (!this.opts.queryPool || this.queryPool || this.closed) return;
			
 
				+    const size = resolvePoolSize(process.env.CODEGRAPH_QUERY_POOL_SIZE, os.cpus().length);
			
 
				+    if (size <= 0) {
			
 
				+      process.stderr.write('[CodeGraph MCP] Query pool disabled (CODEGRAPH_QUERY_POOL_SIZE=0); serving reads in-process.\n');
			
 
				+      return;
			
 
				+    }
			
 
				+    try {
			
 
				+      this.queryPool = new QueryPool({ root, size });
			
 
				+      this.toolHandler.setQueryPool(this.queryPool);
			
 
				+      process.stderr.write(`[CodeGraph MCP] Query pool: up to ${size} worker thread(s) for concurrent reads.\n`);
			
 
				+    } catch (err) {
			
 
				+      const msg = err instanceof Error ? err.message : String(err);
			
 
				+      process.stderr.write(`[CodeGraph MCP] Query pool unavailable (${msg}); serving reads in-process.\n`);
			
 
				+      this.queryPool = null;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   /**
			
 
				    * Convenience for {@link MCPServer} compatibility: pre-seed an explicit
			
 
				    * project path (from the `--path` CLI flag) without yet opening it. This
			
@@ -133,6 +171,7 @@ export class MCPEngine {
 
				       this.toolHandler.setDefaultCodeGraph(this.cg);
			
 
				       this.startWatching();
			
 
				       this.catchUpSync();
			
 
				+      this.maybeStartPool(resolvedRoot);
			
 
				     } catch {
			
 
				       // Still failing — caller will try again on the next tool call.
			
 
				     }
			
@@ -145,6 +184,13 @@ export class MCPEngine {
 
				   stop(): void {
			
 
				     if (this.closed) return;
			
 
				     this.closed = true;
			
 
				+    // Detach + terminate the worker pool first so no tool call routes to a
			
 
				+    // worker mid-teardown; outstanding pool calls resolve with graceful guidance.
			
 
				+    this.toolHandler.setQueryPool(null);
			
 
				+    if (this.queryPool) {
			
 
				+      void this.queryPool.destroy();
			
 
				+      this.queryPool = null;
			
 
				+    }
			
 
				     this.toolHandler.closeAll();
			
 
				     if (this.cg) {
			
 
				       try { this.cg.close(); } catch { /* ignore */ }
			
@@ -168,6 +214,7 @@ export class MCPEngine {
 
				       this.toolHandler.setDefaultCodeGraph(this.cg);
			
 
				       this.startWatching();
			
 
				       this.catchUpSync();
			
 
				+      this.maybeStartPool(resolvedRoot);
			
 
				     } catch (err) {
			
 
				       const msg = err instanceof Error ? err.message : String(err);
			
 
				       process.stderr.write(`[CodeGraph MCP] Failed to open project at ${resolvedRoot}: ${msg}\n`);
			
--- a/src/mcp/query-pool.ts
+++ b/src/mcp/query-pool.ts
@@ -0,0 +1,308 @@
 
				+/**
			
 
				+ * Query pool — runs CPU-heavy read-tool calls on a pool of worker threads so
			
 
				+ * the shared daemon's main event loop stays free for the MCP transport.
			
 
				+ *
			
 
				+ * Why this exists: see {@link ./query-worker}. One daemon, one event loop, one
			
 
				+ * synchronous SQLite connection serializes every concurrent `codegraph_explore`
			
 
				+ * AND starves the transport (a 10-way wave delivered 0 transport heartbeats in
			
 
				+ * 25s — responses can't flush until the whole batch drains, so clients time
			
 
				+ * out). Spreading the dispatch across worker threads (each its own WAL read
			
 
				+ * connection) restores true multi-core parallelism and an idle main loop.
			
 
				+ *
			
 
				+ * Properties:
			
 
				+ *   - lazy growth: one warm worker on construct, grows to `size` on demand, so a
			
 
				+ *     single-agent session pays for one connection and a 10-subagent burst grows
			
 
				+ *     to the core budget.
			
 
				+ *   - crash recovery: a dead worker is respawned and its in-flight call retried
			
 
				+ *     once; a poison call that keeps crashing fails gracefully (never wedges the
			
 
				+ *     pool). A crash budget trips a circuit breaker (`healthy` → false) so the
			
 
				+ *     caller falls back to in-process dispatch instead of thrashing respawns.
			
 
				+ *   - graceful backstop: a call that can't be served within `softTimeoutMs`
			
 
				+ *     resolves with SUCCESS-shaped "busy, retry" guidance — never `isError`, so
			
 
				+ *     a momentary overload can't teach the agent to abandon codegraph — instead
			
 
				+ *     of hanging past the client's hard timeout.
			
 
				+ */
			
 
				+
			
 
				+import { Worker } from 'worker_threads';
			
 
				+import * as path from 'path';
			
 
				+import * as os from 'os';
			
 
				+import type { ToolResult } from './tools';
			
 
				+
			
 
				+/** Compiled sibling — `query-worker.js` lives next to this file in `dist/mcp/`. */
			
 
				+const WORKER_FILE = path.join(__dirname, 'query-worker.js');
			
 
				+
			
 
				+/**
			
 
				+ * Minimal worker surface the pool drives — satisfied by a real `worker_threads`
			
 
				+ * Worker. Abstracted so tests can inject a fake worker and exercise the pool's
			
 
				+ * queue / growth / crash-recovery / backstop logic without spawning threads or
			
 
				+ * needing a built `dist/`.
			
 
				+ */
			
 
				+export interface PoolWorker {
			
 
				+  postMessage(msg: unknown): void;
			
 
				+  terminate(): Promise<number> | void;
			
 
				+  on(event: 'message', cb: (m: unknown) => void): void;
			
 
				+  on(event: 'error', cb: (e: Error) => void): void;
			
 
				+  on(event: 'exit', cb: (code: number) => void): void;
			
 
				+}
			
 
				+
			
 
				+/** Default linger before a queued call is answered with busy-guidance. */
			
 
				+const DEFAULT_BUSY_TIMEOUT_MS = 45_000; // < the ~60s MCP client request timeout
			
 
				+
			
 
				+/** Hard ceiling on pool size regardless of core count / env. */
			
 
				+const MAX_POOL_SIZE = 16;
			
 
				+
			
 
				+/**
			
 
				+ * Total worker deaths before the pool declares itself unhealthy and the caller
			
 
				+ * reverts to in-process dispatch. High enough to ride out a few transient
			
 
				+ * crashes, low enough that a systematically-broken worker (e.g. a platform that
			
 
				+ * can't spawn threads) degrades quickly instead of respawning forever.
			
 
				+ */
			
 
				+const CRASH_BUDGET = 12;
			
 
				+
			
 
				+/**
			
 
				+ * Max workers cold-starting at once. A worker's cold start is heavy — full
			
 
				+ * module load (tree-sitter etc.) + opening a large WAL DB — and starting the
			
 
				+ * whole pool simultaneously thrashes CPU/I-O so badly it can stall the daemon's
			
 
				+ * main loop for tens of seconds. Warming a couple at a time keeps each start
			
 
				+ * fast; as one reports ready the next begins, so the pool still reaches full
			
 
				+ * size within a few calls of a burst, just without the thundering herd.
			
 
				+ */
			
 
				+const MAX_CONCURRENT_SPAWN = 2;
			
 
				+
			
 
				+/** Shape of a message a worker posts back (ready handshake or a tool result). */
			
 
				+interface WorkerMessage {
			
 
				+  type?: string;
			
 
				+  ok?: boolean;
			
 
				+  id?: number;
			
 
				+  result?: ToolResult;
			
 
				+}
			
 
				+
			
 
				+interface Job {
			
 
				+  id: number;
			
 
				+  toolName: string;
			
 
				+  args: Record<string, unknown>;
			
 
				+  resolve: (r: ToolResult) => void;
			
 
				+  retries: number;
			
 
				+  settled: boolean;
			
 
				+  enqueuedAt: number;
			
 
				+  softTimer?: NodeJS.Timeout;
			
 
				+}
			
 
				+
			
 
				+export interface QueryPoolOptions {
			
 
				+  /** Default project root each worker opens at spawn. */
			
 
				+  root: string;
			
 
				+  /** Max worker threads. Defaults to `clamp(cores-1, 1, 16)`. */
			
 
				+  size?: number;
			
 
				+  /** Linger before a queued call gets busy-guidance. Default 45s. */
			
 
				+  softTimeoutMs?: number;
			
 
				+  /** Retries for an in-flight call whose worker crashed. Default 1. */
			
 
				+  maxRetries?: number;
			
 
				+  /** Worker factory (tests inject a fake). Defaults to a real `worker_threads` Worker. */
			
 
				+  createWorker?: () => PoolWorker;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Resolve the pool size from the `CODEGRAPH_QUERY_POOL_SIZE` override and the
			
 
				+ * machine's core count. `0` (or a negative) explicitly disables the pool (the
			
 
				+ * caller serves in-process — today's behavior). Unset → `clamp(cores-1, 1, 16)`:
			
 
				+ * leave a core for the main loop + OS, but never zero, since even one worker
			
 
				+ * frees the transport and lets responses flush incrementally.
			
 
				+ */
			
 
				+export function resolvePoolSize(envVal: string | undefined, cpuCount: number): number {
			
 
				+  if (envVal !== undefined && envVal !== '') {
			
 
				+    const n = Number(envVal);
			
 
				+    if (Number.isFinite(n) && n >= 0) return Math.min(Math.floor(n), MAX_POOL_SIZE);
			
 
				+    // non-numeric / negative → fall through to the default
			
 
				+  }
			
 
				+  return Math.max(1, Math.min(cpuCount - 1, MAX_POOL_SIZE));
			
 
				+}
			
 
				+
			
 
				+function resolveBusyTimeoutMs(): number {
			
 
				+  const raw = process.env.CODEGRAPH_QUERY_BUSY_TIMEOUT_MS;
			
 
				+  if (raw === undefined || raw === '') return DEFAULT_BUSY_TIMEOUT_MS;
			
 
				+  const n = Number(raw);
			
 
				+  if (!Number.isFinite(n) || n < 1000) return DEFAULT_BUSY_TIMEOUT_MS;
			
 
				+  return Math.floor(n);
			
 
				+}
			
 
				+
			
 
				+/** Success-shaped overload guidance (NEVER isError — see the abandonment rule). */
			
 
				+function busyGuidance(waitedMs: number): ToolResult {
			
 
				+  const secs = Math.max(1, Math.round(waitedMs / 1000));
			
 
				+  return {
			
 
				+    content: [{
			
 
				+      type: 'text',
			
 
				+      text:
			
 
				+        `CodeGraph is busy serving other concurrent requests right now (this call waited ${secs}s in the queue). ` +
			
 
				+        `This is NOT an error and the index is fine — wait a few seconds and retry this exact call; it will return normally. ` +
			
 
				+        `If you can't wait, use your built-in tools for just this one step.`,
			
 
				+    }],
			
 
				+  };
			
 
				+}
			
 
				+
			
 
				+export class QueryPool {
			
 
				+  private idle: PoolWorker[] = [];
			
 
				+  private queue: Job[] = [];
			
 
				+  private inflight = new Map<PoolWorker, Job>();
			
 
				+  private workers = new Set<PoolWorker>();
			
 
				+  // Workers spawned but not yet 'ready'. Growth must count these so a single
			
 
				+  // first call (with the eager worker still starting) doesn't spawn the WHOLE
			
 
				+  // pool at once — N simultaneous cold worker starts (each a full module load +
			
 
				+  // a large DB open) saturate the box and starve the main loop. Grow only when
			
 
				+  // the queue outstrips idle + pending.
			
 
				+  private pendingWorkers = new Set<PoolWorker>();
			
 
				+  private nextId = 1;
			
 
				+  private totalCrashes = 0;
			
 
				+  private destroyed = false;
			
 
				+  private readonly root: string;
			
 
				+  private readonly maxSize: number;
			
 
				+  private readonly softTimeoutMs: number;
			
 
				+  private readonly maxRetries: number;
			
 
				+  private readonly createWorker: () => PoolWorker;
			
 
				+
			
 
				+  constructor(opts: QueryPoolOptions) {
			
 
				+    this.root = opts.root;
			
 
				+    this.maxSize = Math.max(1, Math.min(opts.size ?? Math.max(1, os.cpus().length - 1), MAX_POOL_SIZE));
			
 
				+    this.softTimeoutMs = opts.softTimeoutMs ?? resolveBusyTimeoutMs();
			
 
				+    this.maxRetries = opts.maxRetries ?? 1;
			
 
				+    this.createWorker = opts.createWorker ?? (() => new Worker(WORKER_FILE, { workerData: { root: this.root } }));
			
 
				+    this.spawnOne(); // one eager warm worker, ready for the first call
			
 
				+  }
			
 
				+
			
 
				+  /** Pool size cap (for logging/status). */
			
 
				+  get size(): number { return this.maxSize; }
			
 
				+
			
 
				+  /** Live worker count (for tests/status). */
			
 
				+  get liveWorkers(): number { return this.workers.size; }
			
 
				+
			
 
				+  /**
			
 
				+   * False once the crash budget is exhausted (or after destroy). The ToolHandler
			
 
				+   * checks this and falls back to in-process dispatch — a broken worker platform
			
 
				+   * degrades to today's behavior instead of failing tool calls.
			
 
				+   */
			
 
				+  get healthy(): boolean {
			
 
				+    return !this.destroyed && this.totalCrashes < CRASH_BUDGET;
			
 
				+  }
			
 
				+
			
 
				+  private spawnOne(): void {
			
 
				+    if (this.destroyed || this.workers.size >= this.maxSize) return;
			
 
				+    let w: PoolWorker;
			
 
				+    try {
			
 
				+      w = this.createWorker();
			
 
				+    } catch {
			
 
				+      this.totalCrashes++; // counts toward the circuit breaker
			
 
				+      return;
			
 
				+    }
			
 
				+    this.workers.add(w);
			
 
				+    this.pendingWorkers.add(w);
			
 
				+    w.on('message', (m) => this.onMessage(w, (m ?? {}) as WorkerMessage));
			
 
				+    w.on('error', () => this.onWorkerGone(w));
			
 
				+    w.on('exit', (code) => { if (code !== 0) this.onWorkerGone(w); });
			
 
				+  }
			
 
				+
			
 
				+  private onMessage(w: PoolWorker, m: WorkerMessage): void {
			
 
				+    if (!m) return;
			
 
				+    if (m.type === 'ready') {
			
 
				+      this.pendingWorkers.delete(w);
			
 
				+      if (m.ok === false) this.totalCrashes++; // hard open failure
			
 
				+      this.idle.push(w);
			
 
				+      this.drain();
			
 
				+      return;
			
 
				+    }
			
 
				+    if (m.type === 'result') {
			
 
				+      const job = this.inflight.get(w);
			
 
				+      this.inflight.delete(w);
			
 
				+      this.idle.push(w);
			
 
				+      if (job) this.settle(job, m.result ?? busyGuidance(0));
			
 
				+      this.drain();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // A worker died (crash hook, OOM, segfault, exit≠0). Respawn a replacement and
			
 
				+  // retry its in-flight job once; a job that keeps crashing workers fails
			
 
				+  // gracefully so it can't loop the pool forever.
			
 
				+  private onWorkerGone(w: PoolWorker): void {
			
 
				+    if (!this.workers.has(w)) return; // already handled (error+exit both fire)
			
 
				+    this.workers.delete(w);
			
 
				+    this.pendingWorkers.delete(w);
			
 
				+    this.idle = this.idle.filter((x) => x !== w);
			
 
				+    this.totalCrashes++;
			
 
				+    const job = this.inflight.get(w);
			
 
				+    this.inflight.delete(w);
			
 
				+    try { void w.terminate(); } catch { /* already gone */ }
			
 
				+    if (this.healthy) this.spawnOne(); // keep capacity
			
 
				+    if (job) {
			
 
				+      if (job.retries < this.maxRetries && this.healthy) {
			
 
				+        job.retries++;
			
 
				+        this.queue.unshift(job); // head of line — retry promptly
			
 
				+      } else {
			
 
				+        this.settle(job, { isError: true, content: [{ type: 'text', text: 'codegraph worker crashed; please retry the call.' }] });
			
 
				+      }
			
 
				+    }
			
 
				+    this.drain();
			
 
				+  }
			
 
				+
			
 
				+  private drain(): void {
			
 
				+    // Grow toward maxSize while queued work outstrips workers that are idle OR
			
 
				+    // already on their way up (pending) — so we never spawn the whole pool for a
			
 
				+    // single call whose eager worker just hasn't reported ready yet.
			
 
				+    while (
			
 
				+      this.queue.length > this.idle.length + this.pendingWorkers.size &&
			
 
				+      this.workers.size < this.maxSize &&
			
 
				+      this.pendingWorkers.size < MAX_CONCURRENT_SPAWN &&
			
 
				+      this.healthy
			
 
				+    ) {
			
 
				+      this.spawnOne();
			
 
				+    }
			
 
				+    while (this.idle.length && this.queue.length) {
			
 
				+      // Skip jobs the backstop already answered.
			
 
				+      let job: Job | undefined;
			
 
				+      while (this.queue.length && (job = this.queue.shift()) && job.settled) job = undefined;
			
 
				+      if (!job || job.settled) break;
			
 
				+      const w = this.idle.pop()!;
			
 
				+      this.inflight.set(w, job);
			
 
				+      w.postMessage({ type: 'call', id: job.id, toolName: job.toolName, args: job.args });
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private settle(job: Job, result: ToolResult): void {
			
 
				+    if (job.settled) return; // already answered (by backstop or worker)
			
 
				+    job.settled = true;
			
 
				+    if (job.softTimer) clearTimeout(job.softTimer);
			
 
				+    job.resolve(result);
			
 
				+  }
			
 
				+
			
 
				+  /** Run a read tool on the pool. Always resolves (never rejects). */
			
 
				+  run(toolName: string, args: Record<string, unknown>): Promise<ToolResult> {
			
 
				+    return new Promise<ToolResult>((resolve) => {
			
 
				+      const job: Job = {
			
 
				+        id: this.nextId++, toolName, args, resolve,
			
 
				+        retries: 0, settled: false, enqueuedAt: Date.now(),
			
 
				+      };
			
 
				+      // Don't let the caller wait past softTimeoutMs. The worker may still be
			
 
				+      // busy (we can't cancel synchronous CPU), but the CLIENT gets a prompt,
			
 
				+      // success-shaped "retry" instead of a hard timeout.
			
 
				+      job.softTimer = setTimeout(() => {
			
 
				+        if (!job.settled) this.settle(job, busyGuidance(Date.now() - job.enqueuedAt));
			
 
				+      }, this.softTimeoutMs);
			
 
				+      job.softTimer.unref?.();
			
 
				+      this.queue.push(job);
			
 
				+      this.drain();
			
 
				+    });
			
 
				+  }
			
 
				+
			
 
				+  /** Terminate all workers and answer any outstanding calls gracefully. */
			
 
				+  async destroy(): Promise<void> {
			
 
				+    if (this.destroyed) return;
			
 
				+    this.destroyed = true;
			
 
				+    const ws = [...this.workers];
			
 
				+    this.workers.clear();
			
 
				+    this.pendingWorkers.clear();
			
 
				+    this.idle = [];
			
 
				+    for (const job of [...this.inflight.values(), ...this.queue]) {
			
 
				+      this.settle(job, { isError: true, content: [{ type: 'text', text: 'codegraph is shutting down; retry shortly.' }] });
			
 
				+    }
			
 
				+    this.inflight.clear();
			
 
				+    this.queue = [];
			
 
				+    await Promise.all(ws.map((w) => Promise.resolve(w.terminate()).catch(() => { /* already gone */ })));
			
 
				+  }
			
 
				+}
			
--- a/src/mcp/query-worker.ts
+++ b/src/mcp/query-worker.ts
@@ -0,0 +1,103 @@
 
				+/**
			
 
				+ * Query worker thread — issue: concurrent MCP tool calls starve the daemon.
			
 
				+ *
			
 
				+ * The shared daemon serves every session on ONE event loop with synchronous
			
 
				+ * `node:sqlite`. `codegraph_explore` is CPU-heavy (FTS + RWR/personalized-
			
 
				+ * PageRank + impact + output building) stitched together by microtask `await`s,
			
 
				+ * so N concurrent explores keep the microtask queue continuously full and
			
 
				+ * starve the macrotask phases — timers AND socket I/O. The transport freezes:
			
 
				+ * no response flushes, no request is read, until the whole batch drains. With
			
 
				+ * ~10 subagents that routinely exceeds the MCP client's request timeout.
			
 
				+ *
			
 
				+ * This worker moves the heavy read-tool dispatch OFF the daemon's main loop.
			
 
				+ * Each worker owns its OWN read connection (node:sqlite WAL allows N concurrent
			
 
				+ * readers across connections — verified: a worker reader sees the main writer's
			
 
				+ * committed catch-up/watcher writes), so {@link QueryPool} runs N tool calls in
			
 
				+ * true parallel up to core count while the main loop stays free for the MCP
			
 
				+ * transport. The worker runs {@link ToolHandler.executeReadTool} — validation +
			
 
				+ * dispatch + error classification — and returns the raw {@link ToolResult}; the
			
 
				+ * MAIN thread keeps the catch-up gate, the watcher-state notices (staleness /
			
 
				+ * worktree), `codegraph_status`, and telemetry, none of which a watcher-less
			
 
				+ * read connection can answer.
			
 
				+ */
			
 
				+
			
 
				+import { parentPort, workerData } from 'worker_threads';
			
 
				+import type { ToolResult } from './tools';
			
 
				+
			
 
				+interface WorkerInit {
			
 
				+  root: string;
			
 
				+}
			
 
				+
			
 
				+interface CallMessage {
			
 
				+  type: 'call';
			
 
				+  id: number;
			
 
				+  toolName: string;
			
 
				+  args: Record<string, unknown>;
			
 
				+}
			
 
				+
			
 
				+// Mirror the engine's lazy-require of the heavy CodeGraph + tools chain. This
			
 
				+// module is only ever loaded as a Worker, so the require runs once on spawn.
			
 
				+const loadCodeGraph = (): typeof import('../index').default =>
			
 
				+  (require('../index') as typeof import('../index')).default;
			
 
				+const loadToolHandler = (): typeof import('./tools').ToolHandler =>
			
 
				+  (require('./tools') as typeof import('./tools')).ToolHandler;
			
 
				+
			
 
				+if (parentPort) {
			
 
				+  const port = parentPort;
			
 
				+  const { root } = workerData as WorkerInit;
			
 
				+
			
 
				+  // Open the default project's READ connection once, at spawn. Other repos are
			
 
				+  // opened lazily on first cross-project (projectPath) call by the ToolHandler's
			
 
				+  // own per-handler cache. openSync does not start a watcher — workers are pure
			
 
				+  // readers; the single watcher/writer stays on the daemon's main thread.
			
 
				+  let handler: InstanceType<typeof import('./tools').ToolHandler> | null = null;
			
 
				+  let initError: string | null = null;
			
 
				+  try {
			
 
				+    const cg = loadCodeGraph().openSync(root);
			
 
				+    handler = new (loadToolHandler())(cg);
			
 
				+  } catch (err) {
			
 
				+    initError = err instanceof Error ? err.message : String(err);
			
 
				+  }
			
 
				+
			
 
				+  // Tell the pool we're up. `ok:false` lets the pool count a hard open failure
			
 
				+  // against its crash budget (→ fall back to in-process) without hanging.
			
 
				+  port.postMessage({ type: 'ready', ok: initError === null, error: initError });
			
 
				+
			
 
				+  port.on('message', (msg: CallMessage) => {
			
 
				+    if (!msg || msg.type !== 'call') return;
			
 
				+    void serve(msg);
			
 
				+  });
			
 
				+
			
 
				+  const serve = async (msg: CallMessage): Promise<void> => {
			
 
				+    // Test-only crash hook so the pool's worker-recovery path is exercisable
			
 
				+    // deterministically. Gated behind an env flag only the suite sets — inert in
			
 
				+    // normal operation (and `__test_crash__` isn't a real tool name anyway).
			
 
				+    if (msg.toolName === '__test_crash__' && process.env.CODEGRAPH_QUERY_WORKER_ALLOW_TEST_CRASH === '1') {
			
 
				+      process.exit(13);
			
 
				+    }
			
 
				+    if (!handler) {
			
 
				+      port.postMessage({
			
 
				+        type: 'result',
			
 
				+        id: msg.id,
			
 
				+        result: errorResult(`codegraph worker could not open the project: ${initError}`),
			
 
				+      });
			
 
				+      return;
			
 
				+    }
			
 
				+    try {
			
 
				+      // executeReadTool already classifies NotIndexed/PathRefusal/internal errors
			
 
				+      // into a ToolResult and never throws — the catch is belt-and-suspenders.
			
 
				+      const result: ToolResult = await handler.executeReadTool(msg.toolName, msg.args);
			
 
				+      port.postMessage({ type: 'result', id: msg.id, result });
			
 
				+    } catch (err) {
			
 
				+      port.postMessage({
			
 
				+        type: 'result',
			
 
				+        id: msg.id,
			
 
				+        result: errorResult(err instanceof Error ? err.message : String(err)),
			
 
				+      });
			
 
				+    }
			
 
				+  };
			
 
				+}
			
 
				+
			
 
				+function errorResult(text: string): ToolResult {
			
 
				+  return { isError: true, content: [{ type: 'text', text }] };
			
 
				+}
			
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -5,6 +5,7 @@
 
				  */
			
 
				 
			
 
				 import type CodeGraph from '../index';
			
 
				+import type { QueryPool } from './query-pool';
			
 
				 import { findNearestCodeGraphRoot } from '../directory';
			
 
				 // Lazy-load the heavy CodeGraph chain off the MCP startup path — see the same
			
 
				 // helper in engine.ts. ToolHandler must load to answer tools/list (static
			
@@ -710,9 +711,24 @@ export class ToolHandler {
 
				   // huge repo can't hang the first call (#905); cleared on first await so
			
 
				   // subsequent calls don't pay any cost.
			
 
				   private catchUpGate: Promise<void> | null = null;
			
 
				+  // Optional worker-thread pool for off-loop read-tool dispatch (daemon mode).
			
 
				+  // When set + healthy, the heavy read tools run on a worker so the daemon's
			
 
				+  // main loop stays free for the MCP transport under concurrent load. Null in
			
 
				+  // direct/in-process mode (one client, no concurrency to parallelize).
			
 
				+  private queryPool: QueryPool | null = null;
			
 
				 
			
 
				   constructor(private cg: CodeGraph | null) {}
			
 
				 
			
 
				+  /**
			
 
				+   * Engine-only: attach (or detach with null) the worker-thread query pool. The
			
 
				+   * shared daemon sets this once its default project is open; the workers each
			
 
				+   * hold their own WAL read connection and run {@link executeReadTool}. A
			
 
				+   * worker's own ToolHandler never has a pool, so there is no nested off-loading.
			
 
				+   */
			
 
				+  setQueryPool(pool: QueryPool | null): void {
			
 
				+    this.queryPool = pool;
			
 
				+  }
			
 
				+
			
 
				   /**
			
 
				    * Update the default CodeGraph instance (e.g. after lazy initialization)
			
 
				    */
			
@@ -1244,36 +1260,27 @@ export class ToolHandler {
 
				         if (typeof check === 'object' && check !== undefined) return check;
			
 
				       }
			
 
				 
			
 
				-      // Read tools resolve through a single result variable so cross-cutting
			
 
				-      // notices — worktree-index mismatch (issue #155) and per-file
			
 
				-      // staleness (issue #403) — can be applied in one place. status embeds
			
 
				-      // its own verbose worktree warning but still flows through the
			
 
				-      // staleness wrapper so its pending-files section stays consistent
			
 
				-      // with what the read tools surface.
			
 
				-      let result: ToolResult;
			
 
				-      switch (toolName) {
			
 
				-        case 'codegraph_search':
			
 
				-          result = await this.handleSearch(args); break;
			
 
				-        case 'codegraph_callers':
			
 
				-          result = await this.handleCallers(args); break;
			
 
				-        case 'codegraph_callees':
			
 
				-          result = await this.handleCallees(args); break;
			
 
				-        case 'codegraph_impact':
			
 
				-          result = await this.handleImpact(args); break;
			
 
				-        case 'codegraph_explore':
			
 
				-          result = await this.handleExplore(args); break;
			
 
				-        case 'codegraph_node':
			
 
				-          result = await this.handleNode(args); break;
			
 
				-        case 'codegraph_status':
			
 
				-          // status embeds the pending-files list as a first-class section
			
 
				-          // (see handleStatus), so we skip the auto-banner wrapper here to
			
 
				-          // avoid duplicating the same info at the top of the response.
			
 
				-          return await this.handleStatus(args);
			
 
				-        case 'codegraph_files':
			
 
				-          result = await this.handleFiles(args); break;
			
 
				-        default:
			
 
				-          return this.errorResult(`Unknown tool: ${toolName}`);
			
 
				+      // codegraph_status reports watcher state (pending files, degraded mode,
			
 
				+      // worktree warning) and embeds its own sections — it must run on the MAIN
			
 
				+      // thread against the watched default instance, so it is NEVER off-loaded to
			
 
				+      // a worker (whose read connection has no watcher). It also skips the
			
 
				+      // auto-banner wrapper to avoid duplicating its own pending-files section.
			
 
				+      if (toolName === 'codegraph_status') {
			
 
				+        return await this.handleStatus(args);
			
 
				       }
			
 
				+
			
 
				+      // Read tools: off-load the CPU-heavy dispatch to the worker pool when one
			
 
				+      // is attached and healthy (daemon mode), so the daemon's single event loop
			
 
				+      // stays free for the MCP transport under concurrent load — otherwise N
			
 
				+      // concurrent explores serialize AND starve the transport until the whole
			
 
				+      // batch drains (clients then time out). With no pool (direct mode) or a
			
 
				+      // degraded one, dispatch runs in-process exactly as before. Either way the
			
 
				+      // result flows through the cross-cutting notices — worktree-index mismatch
			
 
				+      // (#155) and per-file staleness (#403) — which need the watched MAIN
			
 
				+      // instance and so are always applied here, never in the worker.
			
 
				+      const result = (this.queryPool && this.queryPool.healthy)
			
 
				+        ? await this.queryPool.run(toolName, args)
			
 
				+        : await this.executeReadTool(toolName, args);
			
 
				       const withWorktree = this.withWorktreeNotice(result, args.projectPath as string | undefined);
			
 
				       return this.withStalenessNotice(withWorktree, args.projectPath as string | undefined);
			
 
				     } catch (err) {
			
@@ -1295,6 +1302,56 @@ export class ToolHandler {
 
				     }
			
 
				   }
			
 
				 
			
 
				+  /**
			
 
				+   * Run a single read tool to completion and return its raw {@link ToolResult},
			
 
				+   * classifying expected failures the same way {@link execute}'s catch does so
			
 
				+   * the SHAPE is identical whether dispatch runs in-process or on a worker:
			
 
				+   * NotIndexed → success-shaped guidance, PathRefusal → clean error, anything
			
 
				+   * else → internal-error-with-retry. Never throws.
			
 
				+   *
			
 
				+   * This is the worker thread's entry point (see {@link ./query-worker}) and the
			
 
				+   * in-process fallback for {@link execute}. It deliberately does NOT run the
			
 
				+   * catch-up gate or the staleness/worktree notices — those need the daemon's
			
 
				+   * watched main instance and stay on the main thread. Cross-cutting allowlist +
			
 
				+   * path validation already ran in {@link execute} before routing here.
			
 
				+   */
			
 
				+  async executeReadTool(toolName: string, args: Record<string, unknown>): Promise<ToolResult> {
			
 
				+    try {
			
 
				+      return await this.dispatchTool(toolName, args);
			
 
				+    } catch (err) {
			
 
				+      if (err instanceof NotIndexedError) {
			
 
				+        return this.textResult(err.message);
			
 
				+      }
			
 
				+      if (err instanceof PathRefusalError) {
			
 
				+        return this.errorResult(err.message);
			
 
				+      }
			
 
				+      return this.errorResult(
			
 
				+        `Tool execution failed: ${err instanceof Error ? err.message : String(err)}. ` +
			
 
				+        'This is an internal codegraph error — retry the call once; if it persists, ' +
			
 
				+        'continue without codegraph for this task.'
			
 
				+      );
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Pure dispatch over the read tools — the switch, with no gate, no notices, no
			
 
				+   * allowlist/validation (the caller owns those). `codegraph_status` is handled
			
 
				+   * on the main thread in {@link execute} and never reaches here. May throw
			
 
				+   * NotIndexed/PathRefusal, which {@link executeReadTool} classifies.
			
 
				+   */
			
 
				+  private async dispatchTool(toolName: string, args: Record<string, unknown>): Promise<ToolResult> {
			
 
				+    switch (toolName) {
			
 
				+      case 'codegraph_search': return await this.handleSearch(args);
			
 
				+      case 'codegraph_callers': return await this.handleCallers(args);
			
 
				+      case 'codegraph_callees': return await this.handleCallees(args);
			
 
				+      case 'codegraph_impact': return await this.handleImpact(args);
			
 
				+      case 'codegraph_explore': return await this.handleExplore(args);
			
 
				+      case 'codegraph_node': return await this.handleNode(args);
			
 
				+      case 'codegraph_files': return await this.handleFiles(args);
			
 
				+      default: return this.errorResult(`Unknown tool: ${toolName}`);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   /**
			
 
				    * Handle codegraph_search
			
 
				    */