|
|
@@ -58,6 +58,22 @@ import { CodeGraphPackageVersion } from './version';
|
|
|
/** Default idle linger after the last client disconnects. */
|
|
|
const DEFAULT_IDLE_TIMEOUT_MS = 300_000;
|
|
|
|
|
|
+/**
|
|
|
+ * Hard ceiling on how long the daemon stays up with clients connected but no
|
|
|
+ * inbound traffic. A backstop (#692): if a client's socket-close is never
|
|
|
+ * delivered (a Windows named-pipe hazard) it stays counted forever and the
|
|
|
+ * normal idle timer — which only arms at zero clients — never fires. A phantom
|
|
|
+ * client sends no traffic, so bounding on inactivity reaps the daemon anyway.
|
|
|
+ * Set generously so a real but momentarily-idle session isn't reaped mid-use.
|
|
|
+ */
|
|
|
+const DEFAULT_MAX_IDLE_MS = 1_800_000; // 30 min
|
|
|
+
|
|
|
+/** How often the daemon sweeps connected clients for a dead peer process (#692). */
|
|
|
+const DEFAULT_CLIENT_SWEEP_MS = 30_000;
|
|
|
+
|
|
|
+/** How long the daemon waits for the optional client-hello before proceeding without it. */
|
|
|
+const CLIENT_HELLO_TIMEOUT_MS = 3_000;
|
|
|
+
|
|
|
/** Bytes/parse-window for an oversized hello line — bounded against a malicious peer. */
|
|
|
const MAX_HELLO_LINE_BYTES = 4096;
|
|
|
|
|
|
@@ -74,6 +90,21 @@ export interface DaemonHello {
|
|
|
protocol: 1; // bump if the hello shape changes
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * Optional reverse-handshake line a proxy sends right after it verifies the
|
|
|
+ * daemon hello, carrying its own pids so the daemon can reap the client if its
|
|
|
+ * process dies WITHOUT the socket ever signalling close (the Windows named-pipe
|
|
|
+ * hazard behind #692). Entirely optional and fail-safe: a connection that never
|
|
|
+ * sends it (a legacy/direct client) just falls back to the socket-close
|
|
|
+ * lifecycle. The `codegraph_client` marker is what tells it apart from the
|
|
|
+ * client's first JSON-RPC message.
|
|
|
+ */
|
|
|
+export interface DaemonClientHello {
|
|
|
+ codegraph_client: 1;
|
|
|
+ pid: number; // the proxy process's own pid
|
|
|
+ hostPid: number | null; // the MCP host pid (past any launcher shim), if known
|
|
|
+}
|
|
|
+
|
|
|
export interface DaemonStartResult {
|
|
|
/** Always-non-null for a successfully-started daemon. */
|
|
|
socketPath: string;
|
|
|
@@ -95,8 +126,14 @@ export interface DaemonStartResult {
|
|
|
export class Daemon {
|
|
|
private server: net.Server | null = null;
|
|
|
private clients = new Set<MCPSession>();
|
|
|
+ /** Per-client peer pids from the optional client-hello, for the liveness sweep. */
|
|
|
+ private clientPeers = new Map<MCPSession, { pid: number | null; hostPid: number | null }>();
|
|
|
private idleTimer: NodeJS.Timeout | null = null;
|
|
|
private idleTimeoutMs: number;
|
|
|
+ private maxIdleMs: number;
|
|
|
+ private lastActivityAt = Date.now();
|
|
|
+ private maxIdleTimer: NodeJS.Timeout | null = null;
|
|
|
+ private clientSweepTimer: NodeJS.Timeout | null = null;
|
|
|
private engine: MCPEngine;
|
|
|
private stopping = false;
|
|
|
private socketPath: string;
|
|
|
@@ -104,11 +141,12 @@ export class Daemon {
|
|
|
|
|
|
constructor(
|
|
|
private projectRoot: string,
|
|
|
- opts: { idleTimeoutMs?: number } = {},
|
|
|
+ opts: { idleTimeoutMs?: number; maxIdleMs?: number } = {},
|
|
|
) {
|
|
|
this.socketPath = getDaemonSocketPath(projectRoot);
|
|
|
this.pidPath = getDaemonPidPath(projectRoot);
|
|
|
this.idleTimeoutMs = opts.idleTimeoutMs ?? resolveIdleTimeoutMs();
|
|
|
+ this.maxIdleMs = opts.maxIdleMs ?? resolveMaxIdleMs();
|
|
|
this.engine = new MCPEngine();
|
|
|
this.engine.setProjectPathHint(projectRoot);
|
|
|
}
|
|
|
@@ -161,6 +199,7 @@ export class Daemon {
|
|
|
// ever connects to (e.g. spawned then abandoned because the launcher died)
|
|
|
// doesn't pin resources forever.
|
|
|
this.armIdleTimer();
|
|
|
+ this.startLivenessTimers();
|
|
|
|
|
|
process.on('SIGINT', () => this.stop('SIGINT'));
|
|
|
process.on('SIGTERM', () => this.stop('SIGTERM'));
|
|
|
@@ -186,6 +225,14 @@ export class Daemon {
|
|
|
clearTimeout(this.idleTimer);
|
|
|
this.idleTimer = null;
|
|
|
}
|
|
|
+ if (this.maxIdleTimer) {
|
|
|
+ clearInterval(this.maxIdleTimer);
|
|
|
+ this.maxIdleTimer = null;
|
|
|
+ }
|
|
|
+ if (this.clientSweepTimer) {
|
|
|
+ clearInterval(this.clientSweepTimer);
|
|
|
+ this.clientSweepTimer = null;
|
|
|
+ }
|
|
|
process.stderr.write(`[CodeGraph daemon] Shutting down (${reason}; clients=${this.clients.size}).\n`);
|
|
|
for (const session of [...this.clients]) {
|
|
|
try { session.stop(); } catch { /* best-effort */ }
|
|
|
@@ -214,18 +261,30 @@ export class Daemon {
|
|
|
};
|
|
|
socket.write(JSON.stringify(hello) + '\n');
|
|
|
|
|
|
- const transport = new SocketTransport(socket);
|
|
|
- const session = new MCPSession(transport, this.engine, {
|
|
|
- explicitProjectPath: this.projectRoot,
|
|
|
+ // Read the optional client-hello (proxy → daemon) to learn the client's
|
|
|
+ // peer pids, then hand the socket to the session. Fail-safe: any problem —
|
|
|
+ // timeout, a non-hello first line, an early close — yields null pids and we
|
|
|
+ // fall back to the socket-close lifecycle exactly as before (#692).
|
|
|
+ void readClientHello(socket).then((peers) => {
|
|
|
+ const transport = new SocketTransport(socket);
|
|
|
+ const session = new MCPSession(transport, this.engine, {
|
|
|
+ explicitProjectPath: this.projectRoot,
|
|
|
+ });
|
|
|
+ transport.onClose(() => this.dropClient(session));
|
|
|
+ this.clients.add(session);
|
|
|
+ this.clientPeers.set(session, peers);
|
|
|
+ this.disarmIdleTimer();
|
|
|
+ session.start();
|
|
|
+ // Observe inbound bytes purely to feed the inactivity backstop — a second
|
|
|
+ // 'data' listener that reads nothing, added AFTER the transport's so the
|
|
|
+ // unshifted client-hello tail reaches the transport intact.
|
|
|
+ socket.on('data', () => { this.lastActivityAt = Date.now(); });
|
|
|
});
|
|
|
- transport.onClose(() => this.dropClient(session));
|
|
|
- this.clients.add(session);
|
|
|
- this.disarmIdleTimer();
|
|
|
- session.start();
|
|
|
}
|
|
|
|
|
|
private dropClient(session: MCPSession): void {
|
|
|
if (!this.clients.delete(session)) return;
|
|
|
+ this.clientPeers.delete(session);
|
|
|
if (this.clients.size === 0) this.armIdleTimer();
|
|
|
}
|
|
|
|
|
|
@@ -255,6 +314,58 @@ export class Daemon {
|
|
|
this.idleTimer = null;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Defense-in-depth against a daemon that outlives its clients (#692), for the
|
|
|
+ * cases the refcount + idle timer miss because a socket close never arrives:
|
|
|
+ * - **Inactivity backstop:** exit if no inbound traffic for `maxIdleMs` while
|
|
|
+ * clients are still (nominally) connected. A phantom client sends nothing,
|
|
|
+ * so it can't pin the daemon past this window.
|
|
|
+ * - **Liveness sweep:** drop any client whose peer process has died (per the
|
|
|
+ * client-hello pids), which re-arms the idle timer once the last real
|
|
|
+ * client is gone. Catches a dead peer within one sweep instead of waiting
|
|
|
+ * out the whole backstop.
|
|
|
+ * Both timers are unref'd — the listening server keeps the loop alive, and
|
|
|
+ * neither should hold it open on its own.
|
|
|
+ */
|
|
|
+ private startLivenessTimers(): void {
|
|
|
+ if (this.maxIdleMs > 0) {
|
|
|
+ const tick = Math.min(this.maxIdleMs, 60_000);
|
|
|
+ this.maxIdleTimer = setInterval(() => {
|
|
|
+ if (this.stopping || this.clients.size === 0) return; // idle timer owns the no-client case
|
|
|
+ if (Date.now() - this.lastActivityAt >= this.maxIdleMs) {
|
|
|
+ void this.stop('inactivity backstop');
|
|
|
+ }
|
|
|
+ }, tick);
|
|
|
+ this.maxIdleTimer.unref?.();
|
|
|
+ }
|
|
|
+ const sweepMs = resolveClientSweepMs();
|
|
|
+ if (sweepMs > 0) {
|
|
|
+ this.clientSweepTimer = setInterval(() => this.reapDeadClients(isProcessAlive), sweepMs);
|
|
|
+ this.clientSweepTimer.unref?.();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Drop every connected client whose peer process is gone. Returns the count
|
|
|
+ * reaped. `isAlive` is injected for testing. Clients with unknown pids (no
|
|
|
+ * client-hello) are skipped — they rely on the socket-close path.
|
|
|
+ */
|
|
|
+ reapDeadClients(isAlive: (pid: number) => boolean): number {
|
|
|
+ if (this.clients.size === 0) return 0;
|
|
|
+ let reaped = 0;
|
|
|
+ for (const session of [...this.clients]) {
|
|
|
+ const peers = this.clientPeers.get(session);
|
|
|
+ if (!peers || !peerIsDead(peers, isAlive)) continue;
|
|
|
+ process.stderr.write(
|
|
|
+ `[CodeGraph daemon] Reaping client with dead peer (pid ${peers.pid}); clients=${this.clients.size - 1}.\n`
|
|
|
+ );
|
|
|
+ try { session.stop(); } catch { /* best-effort */ }
|
|
|
+ this.dropClient(session);
|
|
|
+ reaped++;
|
|
|
+ }
|
|
|
+ return reaped;
|
|
|
+ }
|
|
|
+
|
|
|
private cleanupLockfile(): void {
|
|
|
try {
|
|
|
if (fs.existsSync(this.pidPath)) {
|
|
|
@@ -393,5 +504,115 @@ function resolveIdleTimeoutMs(): number {
|
|
|
return Math.floor(parsed);
|
|
|
}
|
|
|
|
|
|
+function resolveMaxIdleMs(): number {
|
|
|
+ const raw = process.env.CODEGRAPH_DAEMON_MAX_IDLE_MS;
|
|
|
+ if (raw === undefined || raw === '') return DEFAULT_MAX_IDLE_MS;
|
|
|
+ const parsed = Number(raw);
|
|
|
+ if (!Number.isFinite(parsed) || parsed < 0) return DEFAULT_MAX_IDLE_MS;
|
|
|
+ return Math.floor(parsed); // 0 disables the backstop
|
|
|
+}
|
|
|
+
|
|
|
+function resolveClientSweepMs(): number {
|
|
|
+ const raw = process.env.CODEGRAPH_DAEMON_CLIENT_SWEEP_MS;
|
|
|
+ if (raw === undefined || raw === '') return DEFAULT_CLIENT_SWEEP_MS;
|
|
|
+ const parsed = Number(raw);
|
|
|
+ if (!Number.isFinite(parsed) || parsed < 0) return DEFAULT_CLIENT_SWEEP_MS;
|
|
|
+ return Math.floor(parsed); // 0 disables the sweep
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Parse one client-hello line. Returns the peer pids if `line` is a well-formed
|
|
|
+ * client-hello (carries the `codegraph_client` marker), or null otherwise — in
|
|
|
+ * which case the caller treats the bytes as ordinary JSON-RPC.
|
|
|
+ */
|
|
|
+export function parseClientHelloLine(
|
|
|
+ line: string,
|
|
|
+): { pid: number; hostPid: number | null } | null {
|
|
|
+ let parsed: unknown;
|
|
|
+ try { parsed = JSON.parse(line); } catch { return null; }
|
|
|
+ if (!parsed || typeof parsed !== 'object') return null;
|
|
|
+ const o = parsed as Record<string, unknown>;
|
|
|
+ if (o.codegraph_client !== 1 || typeof o.pid !== 'number') return null;
|
|
|
+ return { pid: o.pid, hostPid: typeof o.hostPid === 'number' ? o.hostPid : null };
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * A client's peer is dead when its proxy process is gone, or when its known
|
|
|
+ * host process is gone. Unknown pid (no client-hello) is never "dead" on this
|
|
|
+ * basis — those clients rely on the socket-close path. Exported for testing.
|
|
|
+ */
|
|
|
+export function peerIsDead(
|
|
|
+ peers: { pid: number | null; hostPid: number | null },
|
|
|
+ isAlive: (pid: number) => boolean,
|
|
|
+): boolean {
|
|
|
+ if (peers.pid === null) return false;
|
|
|
+ if (!isAlive(peers.pid)) return true;
|
|
|
+ if (peers.hostPid !== null && !isAlive(peers.hostPid)) return true;
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Read the optional client-hello line a proxy sends after the daemon hello.
|
|
|
+ * Always resolves (never rejects) — fail-safe by design, since every connection
|
|
|
+ * funnels through here. Resolves with the peer pids when the first line is a
|
|
|
+ * client-hello; otherwise resolves with null pids and unshifts the already-read
|
|
|
+ * bytes so the transport parses them as the client's first JSON-RPC message(s).
|
|
|
+ * Accumulates as Buffers and splits on the newline byte so a UTF-8 sequence
|
|
|
+ * straddling a chunk boundary in the unshifted tail is never corrupted.
|
|
|
+ */
|
|
|
+function readClientHello(
|
|
|
+ socket: net.Socket,
|
|
|
+): Promise<{ pid: number | null; hostPid: number | null }> {
|
|
|
+ return new Promise((resolve) => {
|
|
|
+ let chunks: Buffer[] = [];
|
|
|
+ let total = 0;
|
|
|
+ let settled = false;
|
|
|
+ const finish = (
|
|
|
+ peers: { pid: number | null; hostPid: number | null },
|
|
|
+ putBack?: Buffer,
|
|
|
+ ) => {
|
|
|
+ if (settled) return;
|
|
|
+ settled = true;
|
|
|
+ socket.removeListener('data', onData);
|
|
|
+ socket.removeListener('error', onEnd);
|
|
|
+ socket.removeListener('close', onEnd);
|
|
|
+ clearTimeout(timer);
|
|
|
+ if (putBack && putBack.length > 0 && !socket.destroyed) {
|
|
|
+ try { socket.unshift(putBack); } catch { /* stream already gone */ }
|
|
|
+ }
|
|
|
+ resolve(peers);
|
|
|
+ };
|
|
|
+ const onData = (chunk: Buffer | string) => {
|
|
|
+ const buf = typeof chunk === 'string' ? Buffer.from(chunk, 'utf8') : chunk;
|
|
|
+ chunks.push(buf);
|
|
|
+ total += buf.length;
|
|
|
+ const all = chunks.length === 1 ? buf : Buffer.concat(chunks, total);
|
|
|
+ const nl = all.indexOf(0x0a); // '\n'
|
|
|
+ if (nl === -1) {
|
|
|
+ // No newline yet. If it's already too long to be a hello, it isn't one —
|
|
|
+ // hand the bytes back as data; otherwise keep accumulating.
|
|
|
+ if (total > MAX_HELLO_LINE_BYTES) finish({ pid: null, hostPid: null }, all);
|
|
|
+ else chunks = [all];
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ const peers = parseClientHelloLine(all.subarray(0, nl).toString('utf8'));
|
|
|
+ if (peers) {
|
|
|
+ const tail = all.subarray(nl + 1);
|
|
|
+ finish(peers, tail.length > 0 ? tail : undefined);
|
|
|
+ } else {
|
|
|
+ // First line is not a client-hello (legacy/direct client) — hand the
|
|
|
+ // whole buffer back so the transport sees the message verbatim.
|
|
|
+ finish({ pid: null, hostPid: null }, all);
|
|
|
+ }
|
|
|
+ };
|
|
|
+ const onEnd = () => finish({ pid: null, hostPid: null });
|
|
|
+ const timer = setTimeout(() => finish({ pid: null, hostPid: null }), CLIENT_HELLO_TIMEOUT_MS);
|
|
|
+ timer.unref?.();
|
|
|
+ socket.on('data', onData);
|
|
|
+ socket.on('error', onEnd);
|
|
|
+ socket.on('close', onEnd);
|
|
|
+ });
|
|
|
+}
|
|
|
+
|
|
|
/** Exported for test stubs that need to bound the hello-line read. */
|
|
|
export { MAX_HELLO_LINE_BYTES };
|