mcp-daemon.test.ts 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. /**
  2. * Shared MCP daemon — issue #411.
  3. *
  4. * Validates the daemon architecture in `src/mcp/{daemon,proxy,session,index}.ts`
  5. * AFTER the review fixes:
  6. *
  7. * - The daemon is a *detached* background process; every `serve --mcp`
  8. * invocation is a thin proxy to it. Two invocations against one project
  9. * share ONE daemon.
  10. * - Concurrent launchers converge on a single daemon (the must-fix-1
  11. * lockfile-race: an empty-pidfile window used to let a racing candidate
  12. * delete the winner's lock → two daemons).
  13. * - Killing the launcher that spawned the daemon does NOT take the daemon
  14. * down — other attached clients keep working (the must-fix-2 detach: the
  15. * in-process daemon used to die with its launcher's process group and
  16. * orphan on host SIGKILL, regressing #277).
  17. * - A stale lockfile (dead pid) is cleared; `CODEGRAPH_NO_DAEMON=1` opts out;
  18. * the proxy refuses to attach across a version mismatch; the daemon
  19. * idle-times-out after the last client leaves (so a single session can't
  20. * leak a daemon forever).
  21. *
  22. * These tests intentionally spawn real `node dist/bin/codegraph.js` processes
  23. * over real sockets/pipes — the same surface a Claude Code / Cursor / Codex
  24. * install exercises. The daemon logs to `.codegraph/daemon.log` (it has no
  25. * client stderr of its own), so daemon-side assertions read that file.
  26. *
  27. * `realRoot` vs `tempDir`: processes are spawned with the (possibly symlinked)
  28. * `tempDir` as cwd/rootUri — on macOS `os.tmpdir()` lives under `/var`, a
  29. * symlink to `/private/var`, and a spawned child's `process.cwd()` is already
  30. * realpath'd. The daemon canonicalizes the root with `realpathSync`, so all
  31. * path assertions use `realRoot` (the canonical form). That this matches end to
  32. * end is itself the proof the canonicalization works.
  33. */
  34. import { afterEach, beforeEach, describe, expect, it } from 'vitest';
  35. import { ChildProcessWithoutNullStreams, spawn } from 'child_process';
  36. import * as fs from 'fs';
  37. import * as os from 'os';
  38. import * as path from 'path';
  39. import { CodeGraph } from '../src';
  40. import { getDaemonSocketPath } from '../src/mcp/daemon-paths';
  41. const BIN = path.resolve(__dirname, '../dist/bin/codegraph.js');
  42. interface SpawnedServer {
  43. child: ChildProcessWithoutNullStreams;
  44. stdout: string[];
  45. stderr: string[];
  46. }
  47. function spawnServer(cwd: string, env: NodeJS.ProcessEnv = {}): SpawnedServer {
  48. const child = spawn(process.execPath, [BIN, 'serve', '--mcp'], {
  49. cwd,
  50. stdio: ['pipe', 'pipe', 'pipe'],
  51. env: { ...process.env, ...env },
  52. }) as ChildProcessWithoutNullStreams;
  53. // Swallow spawn/EPIPE errors so killing a child mid-write can't surface as an
  54. // unhandled error that crashes the vitest worker.
  55. child.on('error', () => { /* ignore */ });
  56. child.stdin.on('error', () => { /* ignore */ });
  57. const stdout: string[] = [];
  58. const stderr: string[] = [];
  59. let stdoutBuf = '';
  60. let stderrBuf = '';
  61. child.stdout.on('data', (chunk: Buffer) => {
  62. stdoutBuf += chunk.toString('utf8');
  63. let idx: number;
  64. while ((idx = stdoutBuf.indexOf('\n')) !== -1) {
  65. stdout.push(stdoutBuf.slice(0, idx));
  66. stdoutBuf = stdoutBuf.slice(idx + 1);
  67. }
  68. });
  69. child.stderr.on('data', (chunk: Buffer) => {
  70. stderrBuf += chunk.toString('utf8');
  71. let idx: number;
  72. while ((idx = stderrBuf.indexOf('\n')) !== -1) {
  73. stderr.push(stderrBuf.slice(0, idx));
  74. stderrBuf = stderrBuf.slice(idx + 1);
  75. }
  76. });
  77. return { child, stdout, stderr };
  78. }
  79. function sendMessage(child: ChildProcessWithoutNullStreams, msg: unknown): void {
  80. try { child.stdin.write(JSON.stringify(msg) + '\n'); } catch { /* child may be gone */ }
  81. }
  82. function sendInitialize(child: ChildProcessWithoutNullStreams, rootUri: string, id: number): void {
  83. sendMessage(child, {
  84. jsonrpc: '2.0',
  85. id,
  86. method: 'initialize',
  87. params: {
  88. protocolVersion: '2024-11-05',
  89. capabilities: {},
  90. clientInfo: { name: 'test', version: '0.0.0' },
  91. rootUri,
  92. },
  93. });
  94. }
  95. /** Find a JSON-RPC response with the given id (result OR error) on stdout. */
  96. function findResponse(stdout: string[], id: number): any | null {
  97. for (const line of stdout) {
  98. if (!line.trim()) continue;
  99. try {
  100. const parsed = JSON.parse(line);
  101. if (parsed && parsed.id === id && (parsed.result !== undefined || parsed.error !== undefined)) {
  102. return parsed;
  103. }
  104. } catch { /* not JSON */ }
  105. }
  106. return null;
  107. }
  108. function waitFor<T>(
  109. predicate: () => T | undefined | null | false,
  110. timeoutMs: number,
  111. pollMs = 25,
  112. ): Promise<T> {
  113. return new Promise((resolve, reject) => {
  114. const started = Date.now();
  115. const tick = () => {
  116. let v: T | undefined | null | false;
  117. try { v = predicate(); } catch (e) { return reject(e); }
  118. if (v) return resolve(v as T);
  119. if (Date.now() - started > timeoutMs) return reject(new Error(`Timed out after ${timeoutMs}ms`));
  120. setTimeout(tick, pollMs);
  121. };
  122. tick();
  123. });
  124. }
  125. function isAlive(pid: number): boolean {
  126. try { process.kill(pid, 0); return true; } catch { return false; }
  127. }
  128. function readLockPid(root: string): number | null {
  129. try {
  130. const raw = fs.readFileSync(path.join(root, '.codegraph', 'daemon.pid'), 'utf8');
  131. const info = JSON.parse(raw);
  132. return typeof info.pid === 'number' ? info.pid : null;
  133. } catch { return null; }
  134. }
  135. function readDaemonLog(root: string): string {
  136. try { return fs.readFileSync(path.join(root, '.codegraph', 'daemon.log'), 'utf8'); }
  137. catch { return ''; }
  138. }
  139. function countListeningLines(root: string): number {
  140. return readDaemonLog(root).split('\n').filter((l) => l.includes('[CodeGraph daemon] Listening on')).length;
  141. }
  142. function killTree(...procs: ChildProcessWithoutNullStreams[]): void {
  143. for (const p of procs) {
  144. if (!p.killed) { try { p.kill('SIGKILL'); } catch { /* gone */ } }
  145. }
  146. }
  147. async function waitProcessExit(pid: number, timeoutMs: number): Promise<boolean> {
  148. return waitFor(() => !isAlive(pid), timeoutMs).then(() => true).catch(() => false);
  149. }
  150. describe('Shared MCP daemon (issue #411)', () => {
  151. let tempDir: string; // the (possibly symlinked) path processes are spawned with
  152. let realRoot: string; // its canonical form — what the daemon keys paths on
  153. const servers: SpawnedServer[] = [];
  154. beforeEach(async () => {
  155. tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-mcp-daemon-'));
  156. const cg = await CodeGraph.init(tempDir);
  157. cg.close();
  158. realRoot = fs.realpathSync(tempDir);
  159. });
  160. afterEach(async () => {
  161. killTree(...servers.map((s) => s.child));
  162. // The daemon is detached (not a tracked child) — reap it explicitly via the
  163. // pid it recorded, so a test can't leak a background daemon. Guard against
  164. // our own pid: the version-mismatch test plants `pid: process.pid` in the
  165. // lockfile, and we must never SIGKILL the vitest worker.
  166. const daemonPid = readLockPid(realRoot);
  167. if (daemonPid && daemonPid !== process.pid && isAlive(daemonPid)) {
  168. try { process.kill(daemonPid, 'SIGKILL'); } catch { /* race */ }
  169. }
  170. await new Promise((r) => setTimeout(r, 50));
  171. servers.length = 0;
  172. fs.rmSync(tempDir, { recursive: true, force: true });
  173. });
  174. it('two invocations share ONE detached daemon; both attach as proxies', async () => {
  175. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '15000' };
  176. const first = spawnServer(tempDir, env);
  177. servers.push(first);
  178. sendInitialize(first.child, `file://${tempDir}`, 1);
  179. const firstResp = await waitFor(() => findResponse(first.stdout, 1), 10000);
  180. expect(firstResp.result.serverInfo.name).toBe('codegraph');
  181. // The launcher is a PROXY (not the daemon itself) — that's the detach fix.
  182. await waitFor(() => first.stderr.some((l) => l.includes('Attached to shared daemon')), 8000);
  183. // A detached daemon came up and recorded itself.
  184. await waitFor(() => fs.existsSync(path.join(realRoot, '.codegraph', 'daemon.pid')), 8000);
  185. await waitFor(() => countListeningLines(realRoot) >= 1, 8000);
  186. const daemonPid = readLockPid(realRoot);
  187. expect(daemonPid).toBeTruthy();
  188. expect(isAlive(daemonPid!)).toBe(true);
  189. // The socket exists at the path the code computes from the canonical root.
  190. // On Windows the daemon listens on a named pipe (\\.\pipe\...), which isn't
  191. // a filesystem entry — existsSync doesn't apply there, and the "Attached to
  192. // shared daemon" proof above already confirms the proxy reached it.
  193. if (process.platform !== 'win32') {
  194. expect(fs.existsSync(getDaemonSocketPath(realRoot))).toBe(true);
  195. }
  196. // Second invocation attaches as a proxy to the SAME daemon.
  197. const second = spawnServer(tempDir, env);
  198. servers.push(second);
  199. sendInitialize(second.child, `file://${tempDir}`, 2);
  200. const secondResp = await waitFor(() => findResponse(second.stdout, 2), 10000);
  201. expect(secondResp.result.serverInfo.name).toBe('codegraph');
  202. await waitFor(() => second.stderr.some((l) => l.includes('Attached to shared daemon')), 8000);
  203. // Exactly one daemon ever bound, and it's the same pid both attached to.
  204. expect(countListeningLines(realRoot)).toBe(1);
  205. expect(readLockPid(realRoot)).toBe(daemonPid);
  206. }, 40000);
  207. it('concurrent launchers converge on a single daemon (lockfile race — must-fix 1)', async () => {
  208. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '15000' };
  209. // Fire three launchers as close to simultaneously as possible — this is the
  210. // race window where the old code could end up with two daemons.
  211. const procs = [spawnServer(tempDir, env), spawnServer(tempDir, env), spawnServer(tempDir, env)];
  212. procs.forEach((p, i) => { servers.push(p); sendInitialize(p.child, `file://${tempDir}`, i + 1); });
  213. // All three get a valid initialize response...
  214. for (let i = 0; i < procs.length; i++) {
  215. const resp = await waitFor(() => findResponse(procs[i].stdout, i + 1), 12000);
  216. expect(resp.result.serverInfo.name).toBe('codegraph');
  217. }
  218. // ...and all three attached as proxies (none fell back / wedged).
  219. for (const p of procs) {
  220. await waitFor(() => p.stderr.some((l) => l.includes('Attached to shared daemon')), 10000);
  221. }
  222. // The decisive assertion: exactly ONE daemon bound the socket. Losing
  223. // candidates log "already holds the lock; exiting" and never listen.
  224. expect(countListeningLines(realRoot)).toBe(1);
  225. const daemonPid = readLockPid(realRoot);
  226. expect(daemonPid).toBeTruthy();
  227. expect(isAlive(daemonPid!)).toBe(true);
  228. }, 45000);
  229. it('daemon survives the first client dying; a second client keeps working (must-fix 2 / #277)', async () => {
  230. // Idle high so the daemon doesn't reap mid-test; poll fast so proxy 1
  231. // notices its dead parent quickly.
  232. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '30000', CODEGRAPH_PPID_POLL_MS: '200' };
  233. const first = spawnServer(tempDir, env);
  234. servers.push(first);
  235. sendInitialize(first.child, `file://${tempDir}`, 1);
  236. await waitFor(() => findResponse(first.stdout, 1), 10000);
  237. await waitFor(() => (readLockPid(realRoot) ?? 0) > 0, 8000);
  238. const daemonPid = readLockPid(realRoot)!;
  239. expect(isAlive(daemonPid)).toBe(true);
  240. const second = spawnServer(tempDir, env);
  241. servers.push(second);
  242. sendInitialize(second.child, `file://${tempDir}`, 1);
  243. await waitFor(() => findResponse(second.stdout, 1), 10000);
  244. await waitFor(() => second.stderr.some((l) => l.includes('Attached to shared daemon')), 8000);
  245. // Kill the launcher that spawned the daemon. With the old in-process design
  246. // this would take the daemon (and thus the second client) down.
  247. killTree(first.child);
  248. // The daemon is detached — it must still be alive a beat later.
  249. await new Promise((r) => setTimeout(r, 1500));
  250. expect(isAlive(daemonPid)).toBe(true);
  251. // And the second client can still drive a real tool call through it.
  252. sendMessage(second.child, { jsonrpc: '2.0', id: 2, method: 'tools/list' });
  253. const toolsResp = await waitFor(() => findResponse(second.stdout, 2), 10000);
  254. expect(Array.isArray(toolsResp.result.tools)).toBe(true);
  255. expect(toolsResp.result.tools.length).toBeGreaterThan(0);
  256. }, 45000);
  257. it('CODEGRAPH_NO_DAEMON=1 keeps each process independent (no socket/pidfile)', async () => {
  258. const env = { CODEGRAPH_NO_DAEMON: '1' };
  259. const first = spawnServer(tempDir, env);
  260. servers.push(first);
  261. sendInitialize(first.child, `file://${tempDir}`, 1);
  262. await waitFor(() => findResponse(first.stdout, 1), 10000);
  263. // Direct mode — no daemon machinery touched.
  264. expect(first.stderr.some((l) => l.includes('Attached to shared daemon'))).toBe(false);
  265. expect(fs.existsSync(path.join(realRoot, '.codegraph', 'daemon.pid'))).toBe(false);
  266. expect(fs.existsSync(path.join(realRoot, '.codegraph', 'daemon.log'))).toBe(false);
  267. }, 20000);
  268. it('clears a stale (dead-pid) lockfile and a fresh daemon takes over', async () => {
  269. // Plant a lockfile pointing at a definitely-dead pid + the real socket path.
  270. fs.writeFileSync(
  271. path.join(realRoot, '.codegraph', 'daemon.pid'),
  272. JSON.stringify({
  273. pid: 999_999,
  274. version: '0.0.0-fake',
  275. socketPath: getDaemonSocketPath(realRoot),
  276. startedAt: Date.now() - 1000,
  277. }),
  278. );
  279. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '15000' };
  280. const server = spawnServer(tempDir, env);
  281. servers.push(server);
  282. sendInitialize(server.child, `file://${tempDir}`, 1);
  283. const resp = await waitFor(() => findResponse(server.stdout, 1), 10000).catch((e) => {
  284. throw new Error(`${(e as Error).message}\nstderr:\n${server.stderr.join('\n')}\ndaemon.log:\n${readDaemonLog(realRoot)}`);
  285. });
  286. expect(resp.result.serverInfo.name).toBe('codegraph');
  287. await waitFor(() => countListeningLines(realRoot) >= 1, 10000);
  288. // The pidfile now names a live daemon, not the planted-dead 999999.
  289. const livePid = readLockPid(realRoot);
  290. expect(livePid).not.toBe(999_999);
  291. expect(isAlive(livePid!)).toBe(true);
  292. }, 40000);
  293. it('proxy falls back to direct mode on a daemon version mismatch', async () => {
  294. const net = await import('net');
  295. const sockPath = getDaemonSocketPath(realRoot);
  296. // Plant a live-pid lockfile so the launcher treats the lock as held, and a
  297. // mini-server that answers with a mismatched-version hello.
  298. fs.writeFileSync(
  299. path.join(realRoot, '.codegraph', 'daemon.pid'),
  300. JSON.stringify({ pid: process.pid, version: '0.0.0-mismatch', socketPath: sockPath, startedAt: Date.now() }),
  301. );
  302. const miniServer = net.createServer((sock) => {
  303. sock.write(JSON.stringify({ codegraph: '0.0.0-mismatch', pid: 1, socketPath: sockPath, protocol: 1 }) + '\n');
  304. });
  305. await new Promise<void>((resolve) => miniServer.listen(sockPath, () => resolve()));
  306. try {
  307. const server = spawnServer(tempDir);
  308. servers.push(server);
  309. sendInitialize(server.child, `file://${tempDir}`, 1);
  310. // Despite the mismatched daemon, the client still gets an initialize
  311. // response — the proxy answers the handshake locally and, refusing to
  312. // attach across the version mismatch, serves the session in-process.
  313. const resp = await waitFor(() => findResponse(server.stdout, 1), 10000);
  314. expect(resp.result.serverInfo.name).toBe('codegraph');
  315. await waitFor(
  316. () => server.stderr.some((l) => l.includes('serving this session in-process')),
  317. 6000,
  318. );
  319. } finally {
  320. await new Promise<void>((resolve) => miniServer.close(() => resolve()));
  321. }
  322. }, 30000);
  323. // The over-the-wire client-hello → record → sweep path is covered by the
  324. // deterministic `Daemon.reapDeadClients` unit test in daemon-client-liveness
  325. // (a raw-socket variant here was flaky under heavy parallel load), plus the
  326. // client-hello round-trip exercised by every test above (the real proxy now
  327. // sends it). What stays here is the lifecycle behavior that needs real procs.
  328. it('exits on the inactivity backstop even while a client stays connected (#692)', async () => {
  329. // Backstop short, idle timeout long: with a client connected the idle timer
  330. // never arms, so only the inactivity backstop can take the daemon down.
  331. const env = { CODEGRAPH_DAEMON_MAX_IDLE_MS: '1500', CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '60000' };
  332. const server = spawnServer(tempDir, env);
  333. servers.push(server);
  334. sendInitialize(server.child, `file://${tempDir}`, 1);
  335. await waitFor(() => findResponse(server.stdout, 1), 10000);
  336. await waitFor(() => (readLockPid(realRoot) ?? 0) > 0, 8000);
  337. const daemonPid = readLockPid(realRoot)!;
  338. expect(isAlive(daemonPid)).toBe(true);
  339. // Send nothing further — the client stays connected but idle. The backstop
  340. // should fire and the daemon should exit and clean up its lockfile.
  341. expect(await waitProcessExit(daemonPid, 12000)).toBe(true);
  342. expect(readDaemonLog(realRoot)).toContain('inactivity backstop');
  343. expect(fs.existsSync(path.join(realRoot, '.codegraph', 'daemon.pid'))).toBe(false);
  344. }, 30000);
  345. it('daemon idle-times-out after the last client disconnects', async () => {
  346. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '800', CODEGRAPH_PPID_POLL_MS: '200' };
  347. const server = spawnServer(tempDir, env);
  348. servers.push(server);
  349. sendInitialize(server.child, `file://${tempDir}`, 1);
  350. await waitFor(() => findResponse(server.stdout, 1), 10000);
  351. await waitFor(() => (readLockPid(realRoot) ?? 0) > 0, 8000);
  352. const daemonPid = readLockPid(realRoot)!;
  353. // Close the only client's stdin → proxy exits → daemon refcount hits 0 →
  354. // idle timer fires → daemon exits and cleans up its lockfile.
  355. server.child.stdin.end();
  356. expect(await waitProcessExit(daemonPid, 10000)).toBe(true);
  357. expect(fs.existsSync(path.join(realRoot, '.codegraph', 'daemon.pid'))).toBe(false);
  358. }, 30000);
  359. it('proxy survives the daemon dying mid-session and keeps serving (#662)', async () => {
  360. // The #662 scenario: an MCP host SIGTERM's the shared daemon while a session
  361. // is live. The proxy must NOT exit (losing CodeGraph for that session) — it
  362. // falls back to an in-process engine and keeps answering.
  363. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '30000', CODEGRAPH_PPID_POLL_MS: '5000' };
  364. const server = spawnServer(tempDir, env);
  365. servers.push(server);
  366. sendInitialize(server.child, `file://${tempDir}`, 1);
  367. await waitFor(() => findResponse(server.stdout, 1), 10000);
  368. await waitFor(() => server.stderr.some((l) => l.includes('Attached to shared daemon')), 8000);
  369. await waitFor(() => (readLockPid(realRoot) ?? 0) > 0, 8000);
  370. const daemonPid = readLockPid(realRoot)!;
  371. // A warm call goes through the daemon.
  372. sendMessage(server.child, { jsonrpc: '2.0', id: 2, method: 'tools/call', params: { name: 'codegraph_status', arguments: {} } });
  373. await waitFor(() => findResponse(server.stdout, 2), 10000);
  374. // Kill the daemon out from under the live proxy.
  375. process.kill(daemonPid, 'SIGTERM');
  376. expect(await waitProcessExit(daemonPid, 8000)).toBe(true);
  377. // The proxy must still be alive and still answer — served in-process now.
  378. expect(isAlive(server.child.pid!)).toBe(true);
  379. await waitFor(() => server.stderr.some((l) => l.includes('serving this session in-process')), 8000);
  380. sendMessage(server.child, { jsonrpc: '2.0', id: 3, method: 'tools/call', params: { name: 'codegraph_status', arguments: {} } });
  381. const resp = await waitFor(() => findResponse(server.stdout, 3), 15000);
  382. expect(resp.result !== undefined || resp.error !== undefined).toBe(true);
  383. expect(isAlive(server.child.pid!)).toBe(true);
  384. }, 45000);
  385. });