mcp-daemon.test.ts 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. /**
  2. * Shared MCP daemon — issue #411.
  3. *
  4. * Validates the daemon architecture in `src/mcp/{daemon,proxy,session,index}.ts`
  5. * AFTER the review fixes:
  6. *
  7. * - The daemon is a *detached* background process; every `serve --mcp`
  8. * invocation is a thin proxy to it. Two invocations against one project
  9. * share ONE daemon.
  10. * - Concurrent launchers converge on a single daemon (the must-fix-1
  11. * lockfile-race: an empty-pidfile window used to let a racing candidate
  12. * delete the winner's lock → two daemons).
  13. * - Killing the launcher that spawned the daemon does NOT take the daemon
  14. * down — other attached clients keep working (the must-fix-2 detach: the
  15. * in-process daemon used to die with its launcher's process group and
  16. * orphan on host SIGKILL, regressing #277).
  17. * - A stale lockfile (dead pid) is cleared; `CODEGRAPH_NO_DAEMON=1` opts out;
  18. * the proxy refuses to attach across a version mismatch; the daemon
  19. * idle-times-out after the last client leaves (so a single session can't
  20. * leak a daemon forever).
  21. *
  22. * These tests intentionally spawn real `node dist/bin/codegraph.js` processes
  23. * over real sockets/pipes — the same surface a Claude Code / Cursor / Codex
  24. * install exercises. The daemon logs to `.codegraph/daemon.log` (it has no
  25. * client stderr of its own), so daemon-side assertions read that file.
  26. *
  27. * `realRoot` vs `tempDir`: processes are spawned with the (possibly symlinked)
  28. * `tempDir` as cwd/rootUri — on macOS `os.tmpdir()` lives under `/var`, a
  29. * symlink to `/private/var`, and a spawned child's `process.cwd()` is already
  30. * realpath'd. The daemon canonicalizes the root with `realpathSync`, so all
  31. * path assertions use `realRoot` (the canonical form). That this matches end to
  32. * end is itself the proof the canonicalization works.
  33. */
  34. import { afterEach, beforeEach, describe, expect, it } from 'vitest';
  35. import { ChildProcessWithoutNullStreams, spawn } from 'child_process';
  36. import * as fs from 'fs';
  37. import * as os from 'os';
  38. import * as path from 'path';
  39. import { CodeGraph } from '../src';
  40. import { getDaemonSocketPath } from '../src/mcp/daemon-paths';
  41. const BIN = path.resolve(__dirname, '../dist/bin/codegraph.js');
  42. interface SpawnedServer {
  43. child: ChildProcessWithoutNullStreams;
  44. stdout: string[];
  45. stderr: string[];
  46. }
  47. function spawnServer(cwd: string, env: NodeJS.ProcessEnv = {}): SpawnedServer {
  48. const child = spawn(process.execPath, [BIN, 'serve', '--mcp'], {
  49. cwd,
  50. stdio: ['pipe', 'pipe', 'pipe'],
  51. // #618: the daemon-attach log line is now off by default; opt the test
  52. // harness into it (CODEGRAPH_MCP_LOG_ATTACH=1) so the attach assertions
  53. // below can still observe a successful attach. A per-test env still wins.
  54. env: { CODEGRAPH_MCP_LOG_ATTACH: '1', ...process.env, ...env },
  55. }) as ChildProcessWithoutNullStreams;
  56. // Swallow spawn/EPIPE errors so killing a child mid-write can't surface as an
  57. // unhandled error that crashes the vitest worker.
  58. child.on('error', () => { /* ignore */ });
  59. child.stdin.on('error', () => { /* ignore */ });
  60. const stdout: string[] = [];
  61. const stderr: string[] = [];
  62. let stdoutBuf = '';
  63. let stderrBuf = '';
  64. child.stdout.on('data', (chunk: Buffer) => {
  65. stdoutBuf += chunk.toString('utf8');
  66. let idx: number;
  67. while ((idx = stdoutBuf.indexOf('\n')) !== -1) {
  68. stdout.push(stdoutBuf.slice(0, idx));
  69. stdoutBuf = stdoutBuf.slice(idx + 1);
  70. }
  71. });
  72. child.stderr.on('data', (chunk: Buffer) => {
  73. stderrBuf += chunk.toString('utf8');
  74. let idx: number;
  75. while ((idx = stderrBuf.indexOf('\n')) !== -1) {
  76. stderr.push(stderrBuf.slice(0, idx));
  77. stderrBuf = stderrBuf.slice(idx + 1);
  78. }
  79. });
  80. return { child, stdout, stderr };
  81. }
  82. function sendMessage(child: ChildProcessWithoutNullStreams, msg: unknown): void {
  83. try { child.stdin.write(JSON.stringify(msg) + '\n'); } catch { /* child may be gone */ }
  84. }
  85. function sendInitialize(child: ChildProcessWithoutNullStreams, rootUri: string, id: number): void {
  86. sendMessage(child, {
  87. jsonrpc: '2.0',
  88. id,
  89. method: 'initialize',
  90. params: {
  91. protocolVersion: '2024-11-05',
  92. capabilities: {},
  93. clientInfo: { name: 'test', version: '0.0.0' },
  94. rootUri,
  95. },
  96. });
  97. }
  98. /** Find a JSON-RPC response with the given id (result OR error) on stdout. */
  99. function findResponse(stdout: string[], id: number): any | null {
  100. for (const line of stdout) {
  101. if (!line.trim()) continue;
  102. try {
  103. const parsed = JSON.parse(line);
  104. if (parsed && parsed.id === id && (parsed.result !== undefined || parsed.error !== undefined)) {
  105. return parsed;
  106. }
  107. } catch { /* not JSON */ }
  108. }
  109. return null;
  110. }
  111. function waitFor<T>(
  112. predicate: () => T | undefined | null | false,
  113. timeoutMs: number,
  114. pollMs = 25,
  115. ): Promise<T> {
  116. return new Promise((resolve, reject) => {
  117. const started = Date.now();
  118. const tick = () => {
  119. let v: T | undefined | null | false;
  120. try { v = predicate(); } catch (e) { return reject(e); }
  121. if (v) return resolve(v as T);
  122. if (Date.now() - started > timeoutMs) return reject(new Error(`Timed out after ${timeoutMs}ms`));
  123. setTimeout(tick, pollMs);
  124. };
  125. tick();
  126. });
  127. }
  128. function isAlive(pid: number): boolean {
  129. try { process.kill(pid, 0); return true; } catch { return false; }
  130. }
  131. function readLockPid(root: string): number | null {
  132. try {
  133. const raw = fs.readFileSync(path.join(root, '.codegraph', 'daemon.pid'), 'utf8');
  134. const info = JSON.parse(raw);
  135. return typeof info.pid === 'number' ? info.pid : null;
  136. } catch { return null; }
  137. }
  138. function readDaemonLog(root: string): string {
  139. try { return fs.readFileSync(path.join(root, '.codegraph', 'daemon.log'), 'utf8'); }
  140. catch { return ''; }
  141. }
  142. function countListeningLines(root: string): number {
  143. return readDaemonLog(root).split('\n').filter((l) => l.includes('[CodeGraph daemon] Listening on')).length;
  144. }
  145. function killTree(...procs: ChildProcessWithoutNullStreams[]): void {
  146. for (const p of procs) {
  147. if (!p.killed) { try { p.kill('SIGKILL'); } catch { /* gone */ } }
  148. }
  149. }
  150. async function waitProcessExit(pid: number, timeoutMs: number): Promise<boolean> {
  151. return waitFor(() => !isAlive(pid), timeoutMs).then(() => true).catch(() => false);
  152. }
  153. describe('Shared MCP daemon (issue #411)', () => {
  154. let tempDir: string; // the (possibly symlinked) path processes are spawned with
  155. let realRoot: string; // its canonical form — what the daemon keys paths on
  156. const servers: SpawnedServer[] = [];
  157. beforeEach(async () => {
  158. tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-mcp-daemon-'));
  159. const cg = await CodeGraph.init(tempDir);
  160. cg.close();
  161. realRoot = fs.realpathSync(tempDir);
  162. });
  163. afterEach(async () => {
  164. killTree(...servers.map((s) => s.child));
  165. // The daemon is detached (not a tracked child) — reap it explicitly via the
  166. // pid it recorded, so a test can't leak a background daemon. Guard against
  167. // our own pid: the version-mismatch test plants `pid: process.pid` in the
  168. // lockfile, and we must never SIGKILL the vitest worker.
  169. const daemonPid = readLockPid(realRoot);
  170. if (daemonPid && daemonPid !== process.pid && isAlive(daemonPid)) {
  171. try { process.kill(daemonPid, 'SIGKILL'); } catch { /* race */ }
  172. }
  173. await new Promise((r) => setTimeout(r, 50));
  174. servers.length = 0;
  175. fs.rmSync(tempDir, { recursive: true, force: true });
  176. });
  177. it('two invocations share ONE detached daemon; both attach as proxies', async () => {
  178. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '15000' };
  179. const first = spawnServer(tempDir, env);
  180. servers.push(first);
  181. sendInitialize(first.child, `file://${tempDir}`, 1);
  182. const firstResp = await waitFor(() => findResponse(first.stdout, 1), 10000);
  183. expect(firstResp.result.serverInfo.name).toBe('codegraph');
  184. // The launcher is a PROXY (not the daemon itself) — that's the detach fix.
  185. await waitFor(() => first.stderr.some((l) => l.includes('Attached to shared daemon')), 8000);
  186. // A detached daemon came up and recorded itself.
  187. await waitFor(() => fs.existsSync(path.join(realRoot, '.codegraph', 'daemon.pid')), 8000);
  188. await waitFor(() => countListeningLines(realRoot) >= 1, 8000);
  189. const daemonPid = readLockPid(realRoot);
  190. expect(daemonPid).toBeTruthy();
  191. expect(isAlive(daemonPid!)).toBe(true);
  192. // The socket exists at the path the code computes from the canonical root.
  193. // On Windows the daemon listens on a named pipe (\\.\pipe\...), which isn't
  194. // a filesystem entry — existsSync doesn't apply there, and the "Attached to
  195. // shared daemon" proof above already confirms the proxy reached it.
  196. if (process.platform !== 'win32') {
  197. expect(fs.existsSync(getDaemonSocketPath(realRoot))).toBe(true);
  198. }
  199. // Second invocation attaches as a proxy to the SAME daemon.
  200. const second = spawnServer(tempDir, env);
  201. servers.push(second);
  202. sendInitialize(second.child, `file://${tempDir}`, 2);
  203. const secondResp = await waitFor(() => findResponse(second.stdout, 2), 10000);
  204. expect(secondResp.result.serverInfo.name).toBe('codegraph');
  205. await waitFor(() => second.stderr.some((l) => l.includes('Attached to shared daemon')), 8000);
  206. // Exactly one daemon ever bound, and it's the same pid both attached to.
  207. expect(countListeningLines(realRoot)).toBe(1);
  208. expect(readLockPid(realRoot)).toBe(daemonPid);
  209. }, 40000);
  210. it('concurrent launchers converge on a single daemon (lockfile race — must-fix 1)', async () => {
  211. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '15000' };
  212. // Fire three launchers as close to simultaneously as possible — this is the
  213. // race window where the old code could end up with two daemons.
  214. const procs = [spawnServer(tempDir, env), spawnServer(tempDir, env), spawnServer(tempDir, env)];
  215. procs.forEach((p, i) => { servers.push(p); sendInitialize(p.child, `file://${tempDir}`, i + 1); });
  216. // All three get a valid initialize response...
  217. for (let i = 0; i < procs.length; i++) {
  218. const resp = await waitFor(() => findResponse(procs[i].stdout, i + 1), 12000);
  219. expect(resp.result.serverInfo.name).toBe('codegraph');
  220. }
  221. // ...and all three attached as proxies (none fell back / wedged).
  222. for (const p of procs) {
  223. await waitFor(() => p.stderr.some((l) => l.includes('Attached to shared daemon')), 10000);
  224. }
  225. // The decisive assertion: exactly ONE daemon bound the socket. Losing
  226. // candidates log "already holds the lock; exiting" and never listen.
  227. expect(countListeningLines(realRoot)).toBe(1);
  228. const daemonPid = readLockPid(realRoot);
  229. expect(daemonPid).toBeTruthy();
  230. expect(isAlive(daemonPid!)).toBe(true);
  231. }, 45000);
  232. it('daemon survives the first client dying; a second client keeps working (must-fix 2 / #277)', async () => {
  233. // Idle high so the daemon doesn't reap mid-test; poll fast so proxy 1
  234. // notices its dead parent quickly.
  235. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '30000', CODEGRAPH_PPID_POLL_MS: '200' };
  236. const first = spawnServer(tempDir, env);
  237. servers.push(first);
  238. sendInitialize(first.child, `file://${tempDir}`, 1);
  239. await waitFor(() => findResponse(first.stdout, 1), 10000);
  240. await waitFor(() => (readLockPid(realRoot) ?? 0) > 0, 8000);
  241. const daemonPid = readLockPid(realRoot)!;
  242. expect(isAlive(daemonPid)).toBe(true);
  243. const second = spawnServer(tempDir, env);
  244. servers.push(second);
  245. sendInitialize(second.child, `file://${tempDir}`, 1);
  246. await waitFor(() => findResponse(second.stdout, 1), 10000);
  247. await waitFor(() => second.stderr.some((l) => l.includes('Attached to shared daemon')), 8000);
  248. // Kill the launcher that spawned the daemon. With the old in-process design
  249. // this would take the daemon (and thus the second client) down.
  250. killTree(first.child);
  251. // The daemon is detached — it must still be alive a beat later.
  252. await new Promise((r) => setTimeout(r, 1500));
  253. expect(isAlive(daemonPid)).toBe(true);
  254. // And the second client can still drive a real tool call through it.
  255. sendMessage(second.child, { jsonrpc: '2.0', id: 2, method: 'tools/list' });
  256. const toolsResp = await waitFor(() => findResponse(second.stdout, 2), 10000);
  257. expect(Array.isArray(toolsResp.result.tools)).toBe(true);
  258. expect(toolsResp.result.tools.length).toBeGreaterThan(0);
  259. }, 45000);
  260. it('CODEGRAPH_NO_DAEMON=1 keeps each process independent (no socket/pidfile)', async () => {
  261. const env = { CODEGRAPH_NO_DAEMON: '1' };
  262. const first = spawnServer(tempDir, env);
  263. servers.push(first);
  264. sendInitialize(first.child, `file://${tempDir}`, 1);
  265. await waitFor(() => findResponse(first.stdout, 1), 10000);
  266. // Direct mode — no daemon machinery touched.
  267. expect(first.stderr.some((l) => l.includes('Attached to shared daemon'))).toBe(false);
  268. expect(fs.existsSync(path.join(realRoot, '.codegraph', 'daemon.pid'))).toBe(false);
  269. expect(fs.existsSync(path.join(realRoot, '.codegraph', 'daemon.log'))).toBe(false);
  270. }, 20000);
  271. it('clears a stale (dead-pid) lockfile and a fresh daemon takes over', async () => {
  272. // Plant a lockfile pointing at a definitely-dead pid + the real socket path.
  273. fs.writeFileSync(
  274. path.join(realRoot, '.codegraph', 'daemon.pid'),
  275. JSON.stringify({
  276. pid: 999_999,
  277. version: '0.0.0-fake',
  278. socketPath: getDaemonSocketPath(realRoot),
  279. startedAt: Date.now() - 1000,
  280. }),
  281. );
  282. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '15000' };
  283. const server = spawnServer(tempDir, env);
  284. servers.push(server);
  285. sendInitialize(server.child, `file://${tempDir}`, 1);
  286. const resp = await waitFor(() => findResponse(server.stdout, 1), 10000).catch((e) => {
  287. throw new Error(`${(e as Error).message}\nstderr:\n${server.stderr.join('\n')}\ndaemon.log:\n${readDaemonLog(realRoot)}`);
  288. });
  289. expect(resp.result.serverInfo.name).toBe('codegraph');
  290. await waitFor(() => countListeningLines(realRoot) >= 1, 10000);
  291. // The pidfile now names a live daemon, not the planted-dead 999999.
  292. const livePid = readLockPid(realRoot);
  293. expect(livePid).not.toBe(999_999);
  294. expect(isAlive(livePid!)).toBe(true);
  295. }, 40000);
  296. it('proxy falls back to direct mode on a daemon version mismatch', async () => {
  297. const net = await import('net');
  298. const sockPath = getDaemonSocketPath(realRoot);
  299. // Plant a live-pid lockfile so the launcher treats the lock as held, and a
  300. // mini-server that answers with a mismatched-version hello.
  301. fs.writeFileSync(
  302. path.join(realRoot, '.codegraph', 'daemon.pid'),
  303. JSON.stringify({ pid: process.pid, version: '0.0.0-mismatch', socketPath: sockPath, startedAt: Date.now() }),
  304. );
  305. const miniServer = net.createServer((sock) => {
  306. sock.write(JSON.stringify({ codegraph: '0.0.0-mismatch', pid: 1, socketPath: sockPath, protocol: 1 }) + '\n');
  307. });
  308. await new Promise<void>((resolve) => miniServer.listen(sockPath, () => resolve()));
  309. try {
  310. const server = spawnServer(tempDir);
  311. servers.push(server);
  312. sendInitialize(server.child, `file://${tempDir}`, 1);
  313. // Despite the mismatched daemon, the client still gets an initialize
  314. // response — the proxy answers the handshake locally and, refusing to
  315. // attach across the version mismatch, serves the session in-process.
  316. const resp = await waitFor(() => findResponse(server.stdout, 1), 10000);
  317. expect(resp.result.serverInfo.name).toBe('codegraph');
  318. await waitFor(
  319. () => server.stderr.some((l) => l.includes('serving this session in-process')),
  320. 6000,
  321. );
  322. } finally {
  323. await new Promise<void>((resolve) => miniServer.close(() => resolve()));
  324. }
  325. }, 30000);
  326. // The over-the-wire client-hello → record → sweep path is covered by the
  327. // deterministic `Daemon.reapDeadClients` unit test in daemon-client-liveness
  328. // (a raw-socket variant here was flaky under heavy parallel load), plus the
  329. // client-hello round-trip exercised by every test above (the real proxy now
  330. // sends it). What stays here is the lifecycle behavior that needs real procs.
  331. it('exits on the inactivity backstop even while a client stays connected (#692)', async () => {
  332. // Backstop short, idle timeout long: with a client connected the idle timer
  333. // never arms, so only the inactivity backstop can take the daemon down.
  334. const env = { CODEGRAPH_DAEMON_MAX_IDLE_MS: '1500', CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '60000' };
  335. const server = spawnServer(tempDir, env);
  336. servers.push(server);
  337. sendInitialize(server.child, `file://${tempDir}`, 1);
  338. await waitFor(() => findResponse(server.stdout, 1), 10000);
  339. await waitFor(() => (readLockPid(realRoot) ?? 0) > 0, 8000);
  340. const daemonPid = readLockPid(realRoot)!;
  341. expect(isAlive(daemonPid)).toBe(true);
  342. // Send nothing further — the client stays connected but idle. The backstop
  343. // should fire and the daemon should exit and clean up its lockfile.
  344. expect(await waitProcessExit(daemonPid, 12000)).toBe(true);
  345. expect(readDaemonLog(realRoot)).toContain('inactivity backstop');
  346. expect(fs.existsSync(path.join(realRoot, '.codegraph', 'daemon.pid'))).toBe(false);
  347. }, 30000);
  348. it('daemon idle-times-out after the last client disconnects', async () => {
  349. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '800', CODEGRAPH_PPID_POLL_MS: '200' };
  350. const server = spawnServer(tempDir, env);
  351. servers.push(server);
  352. sendInitialize(server.child, `file://${tempDir}`, 1);
  353. await waitFor(() => findResponse(server.stdout, 1), 10000);
  354. await waitFor(() => (readLockPid(realRoot) ?? 0) > 0, 8000);
  355. const daemonPid = readLockPid(realRoot)!;
  356. // Close the only client's stdin → proxy exits → daemon refcount hits 0 →
  357. // idle timer fires → daemon exits and cleans up its lockfile.
  358. server.child.stdin.end();
  359. expect(await waitProcessExit(daemonPid, 10000)).toBe(true);
  360. expect(fs.existsSync(path.join(realRoot, '.codegraph', 'daemon.pid'))).toBe(false);
  361. }, 30000);
  362. it('proxy survives the daemon dying mid-session and keeps serving (#662)', async () => {
  363. // The #662 scenario: an MCP host SIGTERM's the shared daemon while a session
  364. // is live. The proxy must NOT exit (losing CodeGraph for that session) — it
  365. // falls back to an in-process engine and keeps answering.
  366. const env = { CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS: '30000', CODEGRAPH_PPID_POLL_MS: '5000' };
  367. const server = spawnServer(tempDir, env);
  368. servers.push(server);
  369. sendInitialize(server.child, `file://${tempDir}`, 1);
  370. await waitFor(() => findResponse(server.stdout, 1), 10000);
  371. await waitFor(() => server.stderr.some((l) => l.includes('Attached to shared daemon')), 8000);
  372. await waitFor(() => (readLockPid(realRoot) ?? 0) > 0, 8000);
  373. const daemonPid = readLockPid(realRoot)!;
  374. // A warm call goes through the daemon.
  375. sendMessage(server.child, { jsonrpc: '2.0', id: 2, method: 'tools/call', params: { name: 'codegraph_status', arguments: {} } });
  376. await waitFor(() => findResponse(server.stdout, 2), 10000);
  377. // Kill the daemon out from under the live proxy.
  378. process.kill(daemonPid, 'SIGTERM');
  379. expect(await waitProcessExit(daemonPid, 8000)).toBe(true);
  380. // The proxy must still be alive and still answer — served in-process now.
  381. expect(isAlive(server.child.pid!)).toBe(true);
  382. await waitFor(() => server.stderr.some((l) => l.includes('serving this session in-process')), 8000);
  383. sendMessage(server.child, { jsonrpc: '2.0', id: 3, method: 'tools/call', params: { name: 'codegraph_status', arguments: {} } });
  384. const resp = await waitFor(() => findResponse(server.stdout, 3), 15000);
  385. expect(resp.result !== undefined || resp.error !== undefined).toBe(true);
  386. expect(isAlive(server.child.pid!)).toBe(true);
  387. }, 45000);
  388. });