daemon-socket-fallback.test.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. /**
  2. * Daemon support on socket-incapable filesystems — issue #997 (and the adjacent
  3. * #974 WSL2 DrvFs hazard).
  4. *
  5. * A project on an ExFAT/FAT external volume (or some network mounts / WSL2 DrvFs)
  6. * breaks the daemon at TWO points, BOTH surfacing as ENOTSUP (verified on a real
  7. * macOS fskit ExFAT volume):
  8. *
  9. * 1. Lock acquisition `link()`s a temp file onto `.codegraph/daemon.pid` for
  10. * race-free exclusivity (#411). ExFAT has no hard links, so this throws
  11. * first — before the socket is ever reached. The fix falls back to an
  12. * O_EXCL create (`acquireLockViaExclusiveOpen`).
  13. * 2. The socket `listen()` then throws ENOTSUP regardless of path length, so
  14. * the old length-only tmpdir fallback never triggered. The fix makes the
  15. * socket path an ORDERED candidate list (in-project, then a deterministic
  16. * tmpdir path); the daemon binds the first that works and the proxy connects
  17. * the first that answers, so both converge on the fallback with zero
  18. * coordination.
  19. *
  20. * Both failures report a DIFFERENT errno per OS — ENOTSUP (macOS), EPERM (Linux),
  21. * EISDIR (Windows) — so the fix deliberately does NOT gate on an enumerated set:
  22. * the lock falls back on ANY non-EEXIST link error, the socket relocates on ANY
  23. * non-EADDRINUSE bind error. These tests pin that policy (incl. a deliberately
  24. * unanticipated errno), the candidate list, the candidate-walk binder, and the
  25. * exclusive-open lock primitive. (Throwaway scripts drove the full daemon end-to-
  26. * end on a real macOS ExFAT image, a Linux FAT loopback mount, and a Windows
  27. * exFAT VHD — relocate, serve a real client, rewrite the pidfile — none of which
  28. * can run in CI.)
  29. */
  30. import { afterEach, describe, expect, it } from 'vitest';
  31. import * as fs from 'fs';
  32. import * as net from 'net';
  33. import * as os from 'os';
  34. import * as path from 'path';
  35. import {
  36. getDaemonPidPath,
  37. getDaemonSocketCandidates,
  38. getDaemonSocketPath,
  39. } from '../src/mcp/daemon-paths';
  40. import type { DaemonLockInfo } from '../src/mcp/daemon-paths';
  41. import { decodeLockInfo } from '../src/mcp/daemon-paths';
  42. import {
  43. acquireLockViaExclusiveOpen,
  44. bindFirstUsableSocket,
  45. tryAcquireDaemonLock,
  46. } from '../src/mcp/daemon';
  47. const POSIX = process.platform !== 'win32';
  48. const tmpFiles: string[] = [];
  49. const tmpDirs: string[] = [];
  50. afterEach(() => {
  51. while (tmpFiles.length) {
  52. try { fs.rmSync(tmpFiles.pop()!, { force: true }); } catch { /* best-effort */ }
  53. }
  54. while (tmpDirs.length) {
  55. try { fs.rmSync(tmpDirs.pop()!, { recursive: true, force: true }); } catch { /* best-effort */ }
  56. }
  57. });
  58. /** A stand-in net.Server — bindFirstUsableSocket only ever passes it through. */
  59. const fakeServer = (tag: string): net.Server => ({ tag } as unknown as net.Server);
  60. /** Build an ErrnoException carrying a specific code, like a real listen() error. */
  61. function errno(code: string): NodeJS.ErrnoException {
  62. const e = new Error(`listen ${code}`) as NodeJS.ErrnoException;
  63. e.code = code;
  64. return e;
  65. }
  66. describe('getDaemonSocketCandidates (#997)', () => {
  67. it.runIf(POSIX)('returns [in-project, tmpdir] for a normal short path', () => {
  68. const root = path.join(os.tmpdir(), 'cg-cand-short');
  69. const candidates = getDaemonSocketCandidates(root);
  70. expect(candidates).toHaveLength(2);
  71. expect(candidates[0]).toBe(path.join(root, '.codegraph', 'daemon.sock'));
  72. expect(candidates[1]!.startsWith(os.tmpdir())).toBe(true);
  73. expect(path.basename(candidates[1]!)).toMatch(/^codegraph-[0-9a-f]{16}\.sock$/);
  74. });
  75. it.runIf(POSIX)('drops straight to [tmpdir] when the in-project path is too long', () => {
  76. // A deep root pushes `.codegraph/daemon.sock` past the POSIX socket limit.
  77. const root = path.join('/tmp', 'x'.repeat(120));
  78. const candidates = getDaemonSocketCandidates(root);
  79. expect(candidates).toHaveLength(1);
  80. expect(candidates[0]!.startsWith(os.tmpdir())).toBe(true);
  81. });
  82. it.runIf(POSIX)('is deterministic and project-scoped: same root → same tmpdir fallback', () => {
  83. const root = path.join(os.tmpdir(), 'cg-cand-determinism');
  84. const a = getDaemonSocketCandidates(root);
  85. const b = getDaemonSocketCandidates(root);
  86. expect(a).toEqual(b);
  87. // A different root yields a different (hashed) tmpdir fallback.
  88. const other = getDaemonSocketCandidates(root + '-other');
  89. expect(other[other.length - 1]).not.toBe(a[a.length - 1]);
  90. });
  91. it.runIf(!POSIX)('returns a single named pipe on Windows', () => {
  92. const candidates = getDaemonSocketCandidates('C:/dev/proj');
  93. expect(candidates).toHaveLength(1);
  94. expect(candidates[0]!.startsWith('\\\\.\\pipe\\codegraph-')).toBe(true);
  95. });
  96. it('getDaemonSocketPath returns the preferred candidate (index 0)', () => {
  97. const root = path.join(os.tmpdir(), 'cg-cand-primary');
  98. expect(getDaemonSocketPath(root)).toBe(getDaemonSocketCandidates(root)[0]);
  99. });
  100. });
  101. describe('bindFirstUsableSocket (#997)', () => {
  102. it('binds the first candidate when it works, without relocating', async () => {
  103. const tried: string[] = [];
  104. const relocations: string[] = [];
  105. const result = await bindFirstUsableSocket(
  106. ['/proj/.codegraph/daemon.sock', '/tmp/fallback.sock'],
  107. (p) => { tried.push(p); return Promise.resolve(fakeServer(p)); },
  108. { onRelocate: (from, to) => relocations.push(`${from}->${to}`) },
  109. );
  110. expect(result.socketPath).toBe('/proj/.codegraph/daemon.sock');
  111. expect(tried).toEqual(['/proj/.codegraph/daemon.sock']); // never touched the fallback
  112. expect(relocations).toEqual([]);
  113. });
  114. it('relocates to the tmpdir fallback when the in-project bind throws ENOTSUP', async () => {
  115. const tried: string[] = [];
  116. const relocations: Array<[string, string, string]> = [];
  117. const result = await bindFirstUsableSocket(
  118. ['/exfat/proj/.codegraph/daemon.sock', '/tmp/fallback.sock'],
  119. (p) => {
  120. tried.push(p);
  121. if (p.includes('/exfat/')) return Promise.reject(errno('ENOTSUP'));
  122. return Promise.resolve(fakeServer(p));
  123. },
  124. { onRelocate: (from, to, code) => relocations.push([from, to, code]) },
  125. );
  126. expect(result.socketPath).toBe('/tmp/fallback.sock');
  127. expect(tried).toEqual(['/exfat/proj/.codegraph/daemon.sock', '/tmp/fallback.sock']);
  128. expect(relocations).toEqual([
  129. ['/exfat/proj/.codegraph/daemon.sock', '/tmp/fallback.sock', 'ENOTSUP'],
  130. ]);
  131. });
  132. it('does NOT relocate on EADDRINUSE — it propagates even with a fallback present', async () => {
  133. const tried: string[] = [];
  134. await expect(
  135. bindFirstUsableSocket(
  136. ['/proj/.codegraph/daemon.sock', '/tmp/fallback.sock'],
  137. (p) => { tried.push(p); return Promise.reject(errno('EADDRINUSE')); },
  138. ),
  139. ).rejects.toMatchObject({ code: 'EADDRINUSE' });
  140. expect(tried).toEqual(['/proj/.codegraph/daemon.sock']); // fallback never tried
  141. });
  142. it('propagates a capability error on the LAST candidate (nowhere left to go)', async () => {
  143. // When tmpdir itself can't host a socket, the single-candidate long-path list
  144. // (or the exhausted tail of a longer one) has no fallback — the daemon must
  145. // surface the error so the launcher drops to direct mode (#974).
  146. await expect(
  147. bindFirstUsableSocket(
  148. ['/tmp/only.sock'],
  149. () => Promise.reject(errno('ENOTSUP')),
  150. ),
  151. ).rejects.toMatchObject({ code: 'ENOTSUP' });
  152. });
  153. it('walks past multiple unusable candidates to the first that binds', async () => {
  154. const tried: string[] = [];
  155. const result = await bindFirstUsableSocket(
  156. ['/a.sock', '/b.sock', '/c.sock'],
  157. (p) => {
  158. tried.push(p);
  159. if (p === '/a.sock') return Promise.reject(errno('ENOTSUP'));
  160. if (p === '/b.sock') return Promise.reject(errno('EACCES'));
  161. return Promise.resolve(fakeServer(p));
  162. },
  163. );
  164. expect(result.socketPath).toBe('/c.sock');
  165. expect(tried).toEqual(['/a.sock', '/b.sock', '/c.sock']);
  166. });
  167. it('relocates on an UNEXPECTED errno too — the policy is "anything but EADDRINUSE", not a fixed list', async () => {
  168. // ExFAT/FAT report different bind errnos per OS (ENOTSUP macOS, EPERM Linux),
  169. // so we must NOT gate relocation on an enumerated set — a code we never
  170. // anticipated must still fall through to tmpdir. 'EWEIRD' stands in for any
  171. // such surprise.
  172. const result = await bindFirstUsableSocket(
  173. ['/odd/proj/.codegraph/daemon.sock', '/tmp/fallback.sock'],
  174. (p) => p.includes('/odd/') ? Promise.reject(errno('EWEIRD')) : Promise.resolve(fakeServer(p)),
  175. );
  176. expect(result.socketPath).toBe('/tmp/fallback.sock');
  177. });
  178. });
  179. describe('lock acquisition without hard links (#997)', () => {
  180. // The hard-link-FAILS path (link() → O_EXCL fallback) can't be forced on a
  181. // normal FS — fs.linkSync's namespace export is non-configurable, so it can't
  182. // be spied. It's proven instead end-to-end on real ExFAT/FAT/exFAT volumes
  183. // (macOS ENOTSUP, Linux EPERM, Windows EISDIR — all acquire via the fallback).
  184. // Here we just guard that the refactored catch block didn't break the normal
  185. // link path: a clean acquire, and a second caller correctly sees it held.
  186. it.runIf(POSIX)('tryAcquireDaemonLock still acquires on a normal FS, and a second caller is told it is taken', () => {
  187. const root = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-lock-'));
  188. tmpDirs.push(root);
  189. const first = tryAcquireDaemonLock(root);
  190. expect(first.kind).toBe('acquired');
  191. const pidPath = getDaemonPidPath(root);
  192. expect(fs.existsSync(pidPath)).toBe(true);
  193. expect(decodeLockInfo(fs.readFileSync(pidPath, 'utf8'))?.pid).toBe(process.pid);
  194. const second = tryAcquireDaemonLock(root); // link() → EEXIST → taken
  195. expect(second.kind).toBe('taken');
  196. if (second.kind === 'taken') expect(second.existing?.pid).toBe(process.pid);
  197. });
  198. it.runIf(POSIX)('acquireLockViaExclusiveOpen creates the pidfile with a complete, parseable record', () => {
  199. const pidPath = path.join(os.tmpdir(), `cg-excl-${process.pid}-${Date.now()}.pid`);
  200. tmpFiles.push(pidPath);
  201. const info: DaemonLockInfo = {
  202. pid: 4242,
  203. version: '9.9.9-test',
  204. socketPath: '/tmp/whatever.sock',
  205. startedAt: 1_700_000_000_000,
  206. };
  207. const acquired = acquireLockViaExclusiveOpen(pidPath, info);
  208. expect(acquired).toBe(true);
  209. // The file is non-empty and decodes back to exactly what we wrote — i.e. no
  210. // empty-file window left behind for a reader to mistake for a corrupt lock.
  211. expect(decodeLockInfo(fs.readFileSync(pidPath, 'utf8'))).toEqual(info);
  212. });
  213. it.runIf(POSIX)('acquireLockViaExclusiveOpen is exclusive: the second caller loses (EEXIST → false)', () => {
  214. const pidPath = path.join(os.tmpdir(), `cg-excl2-${process.pid}-${Date.now()}.pid`);
  215. tmpFiles.push(pidPath);
  216. const winner: DaemonLockInfo = { pid: 1, version: 'a', socketPath: '/s1', startedAt: 1 };
  217. const loser: DaemonLockInfo = { pid: 2, version: 'b', socketPath: '/s2', startedAt: 2 };
  218. expect(acquireLockViaExclusiveOpen(pidPath, winner)).toBe(true);
  219. expect(acquireLockViaExclusiveOpen(pidPath, loser)).toBe(false); // does not clobber
  220. // The winner's record is intact — the loser never overwrote it.
  221. expect(decodeLockInfo(fs.readFileSync(pidPath, 'utf8'))).toEqual(winner);
  222. });
  223. });