|
|
@@ -13,60 +13,77 @@
|
|
|
* const server = new MCPServer('/path/to/project');
|
|
|
* await server.start();
|
|
|
* ```
|
|
|
+ *
|
|
|
+ * Runtime modes (decided in {@link MCPServer.start}):
|
|
|
+ *
|
|
|
+ * - **Direct** — one process serves one MCP client over stdio. The pre-#411
|
|
|
+ * behavior; used when the user opts out (`CODEGRAPH_NO_DAEMON=1`), no
|
|
|
+ * `.codegraph/` is reachable, or the daemon machinery fails for any reason.
|
|
|
+ * - **Proxy** — what an MCP host actually talks to when sharing is on: a thin
|
|
|
+ * stdio↔socket pipe to the shared daemon. The proxy carries the #277 PPID
|
|
|
+ * watchdog, so a SIGKILL'd host reaps its proxy promptly. See {@link ./proxy.ts}.
|
|
|
+ * - **Daemon** — a *detached* background process (its own session/process
|
|
|
+ * group) that serves N proxies over a Unix-domain socket / named pipe,
|
|
|
+ * sharing one CodeGraph + watcher + SQLite handle. Spawned on demand; never a
|
|
|
+ * child of any host, so it survives individual sessions and is reaped by
|
|
|
+ * client-refcount + idle timeout. See {@link ./daemon.ts} and issue #411.
|
|
|
+ *
|
|
|
+ * The detached-daemon + always-proxy split is the fix for the review finding
|
|
|
+ * that the original in-process daemon (a) was the first host's child, so closing
|
|
|
+ * that terminal severed every other client, and (b) disabled the PPID watchdog,
|
|
|
+ * regressing #277 (orphaned daemons on host SIGKILL).
|
|
|
*/
|
|
|
|
|
|
+import * as fs from 'fs';
|
|
|
import * as path from 'path';
|
|
|
-import CodeGraph, { findNearestCodeGraphRoot } from '../index';
|
|
|
-import { watchDisabledReason } from '../sync';
|
|
|
-import { StdioTransport, JsonRpcRequest, JsonRpcNotification, ErrorCodes } from './transport';
|
|
|
-import { tools, ToolHandler } from './tools';
|
|
|
-import { SERVER_INSTRUCTIONS } from './server-instructions';
|
|
|
+import { spawn, StdioOptions } from 'child_process';
|
|
|
+import { findNearestCodeGraphRoot } from '../index';
|
|
|
+import { getCodeGraphDir } from '../directory';
|
|
|
+import { StdioTransport } from './transport';
|
|
|
+import { MCPEngine } from './engine';
|
|
|
+import { MCPSession } from './session';
|
|
|
+import {
|
|
|
+ Daemon,
|
|
|
+ clearStaleDaemonLock,
|
|
|
+ isProcessAlive,
|
|
|
+ tryAcquireDaemonLock,
|
|
|
+} from './daemon';
|
|
|
+import { runProxy } from './proxy';
|
|
|
+import { getDaemonSocketPath } from './daemon-paths';
|
|
|
import { HOST_PPID_ENV } from '../extraction/wasm-runtime-flags';
|
|
|
|
|
|
/**
|
|
|
- * Convert a file:// URI to a filesystem path.
|
|
|
- * Handles URL encoding and Windows drive letter paths.
|
|
|
- */
|
|
|
-function fileUriToPath(uri: string): string {
|
|
|
- try {
|
|
|
- const url = new URL(uri);
|
|
|
- let filePath = decodeURIComponent(url.pathname);
|
|
|
- // On Windows, file:///C:/path produces pathname /C:/path — strip leading /
|
|
|
- if (process.platform === 'win32' && /^\/[a-zA-Z]:/.test(filePath)) {
|
|
|
- filePath = filePath.slice(1);
|
|
|
- }
|
|
|
- return path.resolve(filePath);
|
|
|
- } catch {
|
|
|
- // Fallback for non-standard URIs
|
|
|
- return uri.replace(/^file:\/\/\/?/, '');
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * MCP Server Info
|
|
|
+ * How often to poll `process.ppid` to detect parent process death (see #277).
|
|
|
+ * 5s is a deliberate trade-off: the failure mode being guarded against is rare
|
|
|
+ * (parent SIGKILL'd), and longer poll = less wakeup overhead while idle.
|
|
|
*/
|
|
|
-const SERVER_INFO = {
|
|
|
- name: 'codegraph',
|
|
|
- version: '0.1.0',
|
|
|
-};
|
|
|
+const DEFAULT_PPID_POLL_MS = 5000;
|
|
|
|
|
|
/**
|
|
|
- * MCP Protocol Version
|
|
|
+ * Env var that marks a process as the *detached daemon* itself (set by
|
|
|
+ * {@link spawnDetachedDaemon} when it re-invokes the CLI). Without it a
|
|
|
+ * `serve --mcp` invocation is a launcher that connects-or-spawns; with it, the
|
|
|
+ * process IS the daemon and must never try to spawn another (infinite spawn).
|
|
|
*/
|
|
|
-const PROTOCOL_VERSION = '2024-11-05';
|
|
|
+const DAEMON_INTERNAL_ENV = 'CODEGRAPH_DAEMON_INTERNAL';
|
|
|
|
|
|
/**
|
|
|
- * How long to wait for the client's `roots/list` response before giving up
|
|
|
- * and falling back to the process cwd.
|
|
|
+ * Retries for the detached daemon arbitrating the O_EXCL lock against a racing
|
|
|
+ * sibling. Tiny — the lock resolves on the first round in practice; the retries
|
|
|
+ * only cover clearing a genuinely stale (dead-pid) lockfile.
|
|
|
*/
|
|
|
-const ROOTS_LIST_TIMEOUT_MS = 5000;
|
|
|
+const TAKEOVER_MAX_RETRIES = 5;
|
|
|
+const TAKEOVER_RETRY_DELAY_MS = 100;
|
|
|
|
|
|
/**
|
|
|
- * How often to poll `process.ppid` to detect parent process death (see #277).
|
|
|
- * 5s is a deliberate trade-off: the failure mode being guarded against is rare
|
|
|
- * (parent SIGKILL'd), and longer poll = less wakeup overhead while idle.
|
|
|
+ * How long a launcher waits for a freshly-spawned daemon to bind its socket
|
|
|
+ * before giving up and running in-process. The daemon binds the socket *before*
|
|
|
+ * the (backgrounded) engine/grammar warm-up, so this only needs to cover node
|
|
|
+ * process startup. 60 × 100ms = 6s of headroom for a cold/slow box; on the
|
|
|
+ * common path the socket appears within a few rounds.
|
|
|
*/
|
|
|
-const DEFAULT_PPID_POLL_MS = 5000;
|
|
|
+const DAEMON_CONNECT_MAX_RETRIES = 60;
|
|
|
+const DAEMON_CONNECT_RETRY_DELAY_MS = 100;
|
|
|
|
|
|
/**
|
|
|
* Resolve the PPID watchdog poll interval from an env override. A value of
|
|
|
@@ -96,28 +113,84 @@ function parseHostPpid(raw: string | undefined): number | null {
|
|
|
return parsed;
|
|
|
}
|
|
|
|
|
|
-/** True if a process with `pid` currently exists (signal-0 probe). */
|
|
|
-function isProcessAlive(pid: number): boolean {
|
|
|
- try {
|
|
|
- process.kill(pid, 0);
|
|
|
- return true;
|
|
|
- } catch {
|
|
|
- return false;
|
|
|
- }
|
|
|
+/** Whether `CODEGRAPH_NO_DAEMON` was set to a truthy value. */
|
|
|
+function daemonOptOutSet(): boolean {
|
|
|
+ const raw = process.env.CODEGRAPH_NO_DAEMON;
|
|
|
+ if (!raw) return false;
|
|
|
+ return raw !== '0' && raw.toLowerCase() !== 'false';
|
|
|
+}
|
|
|
+
|
|
|
+/** Whether this process was spawned to BE the detached daemon. */
|
|
|
+function daemonInternalSet(): boolean {
|
|
|
+ const raw = process.env[DAEMON_INTERNAL_ENV];
|
|
|
+ return !!raw && raw !== '0' && raw.toLowerCase() !== 'false';
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Extract the first usable filesystem path from a `roots/list` result.
|
|
|
- * Shape per MCP spec: `{ roots: [{ uri: "file:///path", name?: string }] }`.
|
|
|
- * Returns null if the result is empty or malformed.
|
|
|
+ * Resolve the project root the daemon machinery should key on. Returns
|
|
|
+ * `null` when no `.codegraph/` is reachable from the candidate path — in
|
|
|
+ * that case the caller must run in direct mode, since the daemon lockfile
|
|
|
+ * and socket both live under `.codegraph/`.
|
|
|
+ *
|
|
|
+ * The result is canonicalized with `realpathSync` so every client converges on
|
|
|
+ * the same socket/lock path regardless of how it expressed the path: a client
|
|
|
+ * launched with cwd under a symlink (e.g. macOS `/var` → `/private/var`, where
|
|
|
+ * spawned `process.cwd()` is already realpath'd) and one that passed a
|
|
|
+ * symlinked `rootUri` would otherwise hash to different sockets and silently
|
|
|
+ * fail to share the daemon.
|
|
|
*/
|
|
|
-function firstRootPath(result: unknown): string | null {
|
|
|
- if (!result || typeof result !== 'object') return null;
|
|
|
- const roots = (result as { roots?: unknown }).roots;
|
|
|
- if (!Array.isArray(roots) || roots.length === 0) return null;
|
|
|
- const first = roots[0] as { uri?: unknown };
|
|
|
- if (typeof first?.uri !== 'string') return null;
|
|
|
- return fileUriToPath(first.uri);
|
|
|
+function resolveDaemonRoot(explicitPath: string | null): string | null {
|
|
|
+ const candidate = explicitPath ?? process.cwd();
|
|
|
+ const root = findNearestCodeGraphRoot(candidate);
|
|
|
+ if (!root) return null;
|
|
|
+ try { return fs.realpathSync(root); } catch { return root; }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Spawn the shared daemon as a fully detached background process: its own
|
|
|
+ * session/process group (so a SIGHUP/SIGINT to the launcher's terminal can't
|
|
|
+ * reach it) with stdio decoupled from the launcher (logs to
|
|
|
+ * `.codegraph/daemon.log`). Re-invokes the *same* CLI faithfully across dev and
|
|
|
+ * bundled launches by reusing `process.argv[0]` (the right node), the current
|
|
|
+ * `process.execArgv` (carries `--liftoff-only`, so the daemon never re-execs)
|
|
|
+ * and `process.argv[1]` (this script). The spawned process self-arbitrates the
|
|
|
+ * O_EXCL lock, so racing launchers may each spawn one — losers exit and every
|
|
|
+ * launcher proxies through the single winner.
|
|
|
+ */
|
|
|
+function spawnDetachedDaemon(root: string): void {
|
|
|
+ const scriptPath = process.argv[1];
|
|
|
+ if (!scriptPath) {
|
|
|
+ // No resolvable CLI entry point to re-invoke — let the caller fall back to
|
|
|
+ // direct mode rather than spawn something broken.
|
|
|
+ throw new Error('cannot resolve CLI script path to spawn the daemon');
|
|
|
+ }
|
|
|
+
|
|
|
+ let logFd: number | null = null;
|
|
|
+ let stdio: StdioOptions = 'ignore';
|
|
|
+ try {
|
|
|
+ logFd = fs.openSync(path.join(getCodeGraphDir(root), 'daemon.log'), 'a');
|
|
|
+ stdio = ['ignore', logFd, logFd];
|
|
|
+ } catch {
|
|
|
+ stdio = 'ignore'; // no log file — discard daemon output rather than fail
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ const child = spawn(
|
|
|
+ process.execPath,
|
|
|
+ [...process.execArgv, scriptPath, 'serve', '--mcp', '--path', root],
|
|
|
+ {
|
|
|
+ detached: true,
|
|
|
+ stdio,
|
|
|
+ windowsHide: true,
|
|
|
+ env: { ...process.env, [DAEMON_INTERNAL_ENV]: '1' },
|
|
|
+ },
|
|
|
+ );
|
|
|
+ child.unref();
|
|
|
+ } finally {
|
|
|
+ // The child holds its own dup of the log fd now; the launcher doesn't need it.
|
|
|
+ if (logFd !== null) {
|
|
|
+ try { fs.closeSync(logFd); } catch { /* ignore */ }
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
@@ -125,281 +198,88 @@ function firstRootPath(result: unknown): string | null {
|
|
|
*
|
|
|
* Implements the Model Context Protocol to expose CodeGraph
|
|
|
* functionality as tools that can be called by AI assistants.
|
|
|
+ *
|
|
|
+ * Backwards-compatible constructor and `start()` signature with the
|
|
|
+ * pre-issue-#411 implementation: callers continue to do
|
|
|
+ * `new MCPServer(path).start()`. Internally we now pick from direct / proxy /
|
|
|
+ * daemon at start time.
|
|
|
*/
|
|
|
export class MCPServer {
|
|
|
- private transport: StdioTransport;
|
|
|
- private cg: CodeGraph | null = null;
|
|
|
- private toolHandler: ToolHandler;
|
|
|
private projectPath: string | null;
|
|
|
- // In-flight background init kicked off from handleInitialize. Tracked so the
|
|
|
- // sync retry path doesn't race against it (double-opening the SQLite file).
|
|
|
- private initPromise: Promise<void> | null = null;
|
|
|
- // Whether the client advertised the MCP `roots` capability during initialize.
|
|
|
- // If so, and no explicit project path was given, we ask it for the workspace
|
|
|
- // root via roots/list rather than guessing from the (often wrong) cwd.
|
|
|
- private clientSupportsRoots = false;
|
|
|
- // Guards the one-shot deferred resolution (roots/list or cwd) so we don't
|
|
|
- // re-issue roots/list on every tool call.
|
|
|
- private rootsAttempted = false;
|
|
|
- // PPID watchdog — see start(). Captured at construction so we always have a
|
|
|
+ // Direct-mode-only state. In daemon mode the per-connection sessions live
|
|
|
+ // inside the Daemon class; in proxy mode there is no session at all.
|
|
|
+ private session: MCPSession | null = null;
|
|
|
+ private engine: MCPEngine | null = null;
|
|
|
+ private daemon: Daemon | null = null;
|
|
|
+ private ppidWatchdog: ReturnType<typeof setInterval> | null = null;
|
|
|
+ // PPID watchdog baseline — captured at construction so we always have a
|
|
|
// baseline, even if start() runs after a fork-style reparent.
|
|
|
private originalPpid: number = process.ppid;
|
|
|
- // The MCP host's PID, propagated across the `--liftoff-only` re-exec (see
|
|
|
- // HOST_PPID_ENV). When set, the watchdog polls it directly: the re-exec
|
|
|
- // inserts an intermediate process whose *death* — not just our reparenting —
|
|
|
- // is what we'd otherwise miss. null on the direct (bundled) launch path.
|
|
|
private hostPpid: number | null = parseHostPpid(process.env[HOST_PPID_ENV]);
|
|
|
- private ppidWatchdog: ReturnType<typeof setInterval> | null = null;
|
|
|
- // Idempotency guard for stop(). Without it, the watchdog can race with the
|
|
|
- // stdin `end`/`close` handlers (or SIGTERM/SIGINT) and double-close cg and
|
|
|
- // the transport before process.exit() lands.
|
|
|
+ // Idempotency guard for stop().
|
|
|
private stopped = false;
|
|
|
+ private mode: 'unstarted' | 'direct' | 'proxy' | 'daemon' = 'unstarted';
|
|
|
|
|
|
constructor(projectPath?: string) {
|
|
|
this.projectPath = projectPath || null;
|
|
|
- this.transport = new StdioTransport();
|
|
|
- // Create ToolHandler eagerly — cross-project queries work even without a default project
|
|
|
- this.toolHandler = new ToolHandler(null);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Start the MCP server
|
|
|
+ * Start the MCP server.
|
|
|
*
|
|
|
- * Note: CodeGraph initialization is deferred until the initialize request
|
|
|
- * is received, which includes the rootUri from the client.
|
|
|
- */
|
|
|
- async start(): Promise<void> {
|
|
|
- // Start listening for messages immediately - don't check initialization yet
|
|
|
- // We'll get the project path from the initialize request's rootUri
|
|
|
- this.transport.start(this.handleMessage.bind(this));
|
|
|
-
|
|
|
- // Keep the process running
|
|
|
- process.on('SIGINT', () => this.stop());
|
|
|
- process.on('SIGTERM', () => this.stop());
|
|
|
-
|
|
|
- // When the parent process (Claude Code) exits, stdin closes.
|
|
|
- // Detect this and shut down gracefully to prevent orphaned processes.
|
|
|
- process.stdin.on('end', () => this.stop());
|
|
|
- process.stdin.on('close', () => this.stop());
|
|
|
-
|
|
|
- // PPID watchdog (#277). Linux doesn't propagate parent death to children,
|
|
|
- // so when the MCP host (Claude Code, opencode, …) is SIGKILL'd by the OOM
|
|
|
- // killer / a force-quit / a container teardown, the child is reparented to
|
|
|
- // init/systemd and the stdin `end`/`close` events don't always fire. The
|
|
|
- // server would then linger indefinitely, holding inotify watches, file
|
|
|
- // descriptors, and the SQLite WAL. Poll `process.ppid` and shut down the
|
|
|
- // moment it changes from what we observed at startup. Cross-platform:
|
|
|
- // reparenting changes ppid on Linux *and* macOS; on Windows the value can
|
|
|
- // also drop to 0 once the parent is gone. When the CLI re-execs itself for
|
|
|
- // `--liftoff-only`, an intermediate process sits between us and the host and
|
|
|
- // outlives it, so our own ppid wouldn't change — in that case we poll the
|
|
|
- // host PID (propagated via HOST_PPID_ENV) for liveness instead. The watchdog
|
|
|
- // is `.unref()`'d so it never holds the event loop open on its own.
|
|
|
- const pollMs = parsePpidPollMs(process.env.CODEGRAPH_PPID_POLL_MS);
|
|
|
- if (pollMs > 0) {
|
|
|
- this.ppidWatchdog = setInterval(() => {
|
|
|
- const current = process.ppid;
|
|
|
- const ppidChanged = current !== this.originalPpid;
|
|
|
- const hostGone = this.hostPpid !== null && !isProcessAlive(this.hostPpid);
|
|
|
- if (ppidChanged || hostGone) {
|
|
|
- const reason = ppidChanged
|
|
|
- ? `ppid ${this.originalPpid} -> ${current}`
|
|
|
- : `host pid ${this.hostPpid} exited`;
|
|
|
- process.stderr.write(
|
|
|
- `[CodeGraph MCP] Parent process exited (${reason}); shutting down.\n`
|
|
|
- );
|
|
|
- this.stop();
|
|
|
- }
|
|
|
- }, pollMs);
|
|
|
- this.ppidWatchdog.unref();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * Try to initialize CodeGraph for the default project.
|
|
|
+ * Decision order:
|
|
|
+ * 1. `CODEGRAPH_NO_DAEMON=1` → direct mode (unchanged pre-#411 behavior).
|
|
|
+ * 2. `CODEGRAPH_DAEMON_INTERNAL=1` → we ARE the detached daemon; listen.
|
|
|
+ * 3. No `.codegraph/` reachable → direct mode (the daemon's lockfile and
|
|
|
+ * socket both live under `.codegraph/`).
|
|
|
+ * 4. Otherwise connect to (or spawn) the shared daemon and proxy to it.
|
|
|
*
|
|
|
- * Walks up parent directories to find the nearest .codegraph/ folder,
|
|
|
- * similar to how git finds .git/ directories.
|
|
|
- *
|
|
|
- * If initialization fails, the error is recorded but the server continues
|
|
|
- * to work — cross-project queries and retries on subsequent tool calls
|
|
|
- * are still possible.
|
|
|
+ * On any unexpected failure in step 4 we transparently fall back to direct
|
|
|
+ * mode — a misbehaving daemon must never block a session from starting.
|
|
|
*/
|
|
|
- private async tryInitializeDefault(projectPath: string): Promise<void> {
|
|
|
- // Record where we searched so a later "not initialized" error can name it.
|
|
|
- this.toolHandler.setDefaultProjectHint(projectPath);
|
|
|
-
|
|
|
- // Walk up parent directories to find nearest .codegraph/
|
|
|
- const resolvedRoot = findNearestCodeGraphRoot(projectPath);
|
|
|
-
|
|
|
- if (!resolvedRoot) {
|
|
|
- this.projectPath = projectPath;
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- this.projectPath = resolvedRoot;
|
|
|
-
|
|
|
- try {
|
|
|
- this.cg = await CodeGraph.open(resolvedRoot);
|
|
|
- this.toolHandler.setDefaultCodeGraph(this.cg);
|
|
|
- this.startWatching();
|
|
|
- this.catchUpSync();
|
|
|
- } catch (err) {
|
|
|
- // Log the error so transient failures are diagnosable (see issue #47)
|
|
|
- const msg = err instanceof Error ? err.message : String(err);
|
|
|
- process.stderr.write(`[CodeGraph MCP] Failed to open project at ${resolvedRoot}: ${msg}\n`);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * Retry initialization of the default project if it previously failed.
|
|
|
- * Called lazily on tool calls that need the default project.
|
|
|
- * Re-walks parent directories each time so it picks up projects
|
|
|
- * initialized after the MCP server started.
|
|
|
- *
|
|
|
- * Awaits any in-flight background init (kicked off by handleInitialize) so
|
|
|
- * we never open the SQLite file twice concurrently.
|
|
|
- */
|
|
|
- private async retryInitIfNeeded(): Promise<void> {
|
|
|
- // Wait for the background init started during handleInitialize, if any.
|
|
|
- if (this.initPromise) {
|
|
|
- try { await this.initPromise; } catch { /* errored init falls through to retry */ }
|
|
|
+ async start(): Promise<void> {
|
|
|
+ // The detached daemon process itself. Checked before the opt-out so the
|
|
|
+ // daemon honors the same env it was spawned with (it never sets NO_DAEMON).
|
|
|
+ if (daemonInternalSet()) {
|
|
|
+ return this.startDaemonProcess();
|
|
|
}
|
|
|
|
|
|
- // Already initialized successfully
|
|
|
- if (this.toolHandler.hasDefaultCodeGraph()) return;
|
|
|
-
|
|
|
- // No explicit path was given at initialize. Resolve it now, exactly once:
|
|
|
- // ask the client via roots/list (if it advertised roots), else use cwd.
|
|
|
- // Deferring to here lets a roots answer override the wrong cwd, and the
|
|
|
- // one-shot guard means we never re-issue roots/list per tool call.
|
|
|
- if (!this.projectPath && !this.rootsAttempted) {
|
|
|
- this.rootsAttempted = true;
|
|
|
- this.initPromise = (
|
|
|
- this.clientSupportsRoots
|
|
|
- ? this.initFromRoots()
|
|
|
- : this.tryInitializeDefault(process.cwd())
|
|
|
- ).finally(() => { this.initPromise = null; });
|
|
|
- try { await this.initPromise; } catch { /* fall through to last-resort below */ }
|
|
|
- if (this.toolHandler.hasDefaultCodeGraph()) return;
|
|
|
+ // Direct mode if the user opted out. Setting the env var is sufficient to
|
|
|
+ // get the pre-#411 single-process behavior.
|
|
|
+ if (daemonOptOutSet()) {
|
|
|
+ return this.startDirect('CODEGRAPH_NO_DAEMON set');
|
|
|
}
|
|
|
|
|
|
- // Last resort: re-walk from the best candidate we have. Picks up projects
|
|
|
- // initialized after the server started, and covers clients that sent no
|
|
|
- // usable initialize signal at all.
|
|
|
- const candidate = this.projectPath ?? process.cwd();
|
|
|
- this.toolHandler.setDefaultProjectHint(candidate);
|
|
|
- const resolvedRoot = findNearestCodeGraphRoot(candidate);
|
|
|
- if (!resolvedRoot) return;
|
|
|
-
|
|
|
- try {
|
|
|
- // Close any previously failed instance to avoid leaking resources
|
|
|
- if (this.cg) {
|
|
|
- try { this.cg.close(); } catch { /* ignore */ }
|
|
|
- this.cg = null;
|
|
|
- }
|
|
|
- this.cg = CodeGraph.openSync(resolvedRoot);
|
|
|
- this.projectPath = resolvedRoot;
|
|
|
- this.toolHandler.setDefaultCodeGraph(this.cg);
|
|
|
- this.startWatching();
|
|
|
- this.catchUpSync();
|
|
|
- } catch {
|
|
|
- // Still failing — will retry on next tool call
|
|
|
+ const root = resolveDaemonRoot(this.projectPath);
|
|
|
+ if (!root) {
|
|
|
+ // No initialized project found — daemon mode has nowhere to put its
|
|
|
+ // socket. The fresh-checkout / outside-project case; behave as before.
|
|
|
+ return this.startDirect('no .codegraph/ root found');
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- /**
|
|
|
- * Resolve the project root via the MCP `roots/list` request and initialize
|
|
|
- * from the first root the client reports. Falls back to the process cwd if
|
|
|
- * the client returns no usable root or doesn't answer in time. See issue #196.
|
|
|
- */
|
|
|
- private async initFromRoots(): Promise<void> {
|
|
|
- let target = process.cwd();
|
|
|
try {
|
|
|
- const result = await this.transport.request('roots/list', undefined, ROOTS_LIST_TIMEOUT_MS);
|
|
|
- const rootPath = firstRootPath(result);
|
|
|
- if (rootPath) {
|
|
|
- target = rootPath;
|
|
|
- } else {
|
|
|
- process.stderr.write('[CodeGraph MCP] Client returned no workspace roots; falling back to process cwd.\n');
|
|
|
+ const mode = await this.connectOrSpawnDaemon(root);
|
|
|
+ if (mode === 'fallback') {
|
|
|
+ return this.startDirect('daemon unavailable; fallback to direct');
|
|
|
}
|
|
|
+ // 'proxy': connectOrSpawnDaemon ran the stdio↔socket pipe to completion
|
|
|
+ // (it only returns once the host disconnected). The process is now
|
|
|
+ // expected to terminate naturally — the proxy installed its own watchdog.
|
|
|
+ this.mode = 'proxy';
|
|
|
+ return;
|
|
|
} catch (err) {
|
|
|
+ // Belt-and-braces: if anything throws inside the daemon machinery,
|
|
|
+ // never wedge the user — fall back to a working direct-mode session.
|
|
|
const msg = err instanceof Error ? err.message : String(err);
|
|
|
- process.stderr.write(`[CodeGraph MCP] roots/list request failed (${msg}); falling back to process cwd.\n`);
|
|
|
+ process.stderr.write(`[CodeGraph MCP] Daemon path failed (${msg}); falling back to direct mode.\n`);
|
|
|
+ return this.startDirect('daemon path threw');
|
|
|
}
|
|
|
- await this.tryInitializeDefault(target);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Start file watching on the active CodeGraph instance.
|
|
|
- * Logs sync activity to stderr for diagnostics.
|
|
|
- */
|
|
|
- private startWatching(): void {
|
|
|
- if (!this.cg) return;
|
|
|
-
|
|
|
- // When the watcher is intentionally disabled (e.g. WSL2 /mnt drives, or
|
|
|
- // CODEGRAPH_NO_WATCH=1), say so explicitly and tell the user how to keep
|
|
|
- // the graph fresh — otherwise the silent staleness is hard to diagnose.
|
|
|
- const disabledReason = watchDisabledReason(this.projectPath ?? process.cwd());
|
|
|
- if (disabledReason) {
|
|
|
- process.stderr.write(
|
|
|
- `[CodeGraph MCP] File watcher disabled — ${disabledReason}. ` +
|
|
|
- `The graph will not auto-update; run \`codegraph sync\` (or install the git sync hooks via \`codegraph init\`) to refresh.\n`
|
|
|
- );
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- const started = this.cg.watch({
|
|
|
- onSyncComplete: (result) => {
|
|
|
- if (result.filesChanged > 0) {
|
|
|
- process.stderr.write(
|
|
|
- `[CodeGraph MCP] Auto-synced ${result.filesChanged} file(s) in ${result.durationMs}ms\n`
|
|
|
- );
|
|
|
- }
|
|
|
- },
|
|
|
- onSyncError: (err) => {
|
|
|
- process.stderr.write(`[CodeGraph MCP] Auto-sync error: ${err.message}\n`);
|
|
|
- },
|
|
|
- });
|
|
|
-
|
|
|
- if (started) {
|
|
|
- process.stderr.write('[CodeGraph MCP] File watcher active — graph will auto-sync on changes\n');
|
|
|
- } else {
|
|
|
- // start() can also return false when recursive fs.watch isn't supported.
|
|
|
- process.stderr.write(
|
|
|
- '[CodeGraph MCP] File watcher unavailable on this platform — run `codegraph sync` to refresh the graph after changes.\n'
|
|
|
- );
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * Reconcile the index with the current filesystem once, right after connect —
|
|
|
- * catches edits, adds, deletes, and `git pull`/`checkout` changes made while
|
|
|
- * no watcher was running. Runs in the background so it never delays the
|
|
|
- * `initialize` response; `sync()` is incremental (a stat pre-filter skips
|
|
|
- * unchanged files) and mutex-guarded, so it can't collide with the live
|
|
|
- * watcher or a git-hook sync. Runs even when the watcher is unavailable
|
|
|
- * (e.g. WSL2 /mnt drives), where catch-up matters most.
|
|
|
- */
|
|
|
- private catchUpSync(): void {
|
|
|
- const cg = this.cg;
|
|
|
- if (!cg) return;
|
|
|
- void cg
|
|
|
- .sync()
|
|
|
- .then((result) => {
|
|
|
- const changed = result.filesAdded + result.filesModified + result.filesRemoved;
|
|
|
- if (changed > 0) {
|
|
|
- process.stderr.write(`[CodeGraph MCP] Caught up ${changed} file(s) changed since last run\n`);
|
|
|
- }
|
|
|
- })
|
|
|
- .catch((err) => {
|
|
|
- const msg = err instanceof Error ? err.message : String(err);
|
|
|
- process.stderr.write(`[CodeGraph MCP] Catch-up sync failed: ${msg}\n`);
|
|
|
- });
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * Stop the server
|
|
|
+ * Stop the server. In daemon mode this triggers graceful shutdown of every
|
|
|
+ * connected session; in direct mode it mirrors the pre-#411 behavior (close
|
|
|
+ * cg, exit). Proxy mode never routes through here — the proxy exits itself.
|
|
|
*/
|
|
|
stop(): void {
|
|
|
if (this.stopped) return;
|
|
|
@@ -408,181 +288,166 @@ export class MCPServer {
|
|
|
clearInterval(this.ppidWatchdog);
|
|
|
this.ppidWatchdog = null;
|
|
|
}
|
|
|
- // Close all cached cross-project connections first
|
|
|
- this.toolHandler.closeAll();
|
|
|
- // Close the main CodeGraph instance
|
|
|
- if (this.cg) {
|
|
|
- this.cg.close();
|
|
|
- this.cg = null;
|
|
|
+ if (this.daemon) {
|
|
|
+ void this.daemon.stop('stop()');
|
|
|
+ // Daemon.stop calls process.exit; nothing else to do.
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ if (this.session) {
|
|
|
+ this.session.stop();
|
|
|
+ this.session = null;
|
|
|
+ }
|
|
|
+ if (this.engine) {
|
|
|
+ this.engine.stop();
|
|
|
+ this.engine = null;
|
|
|
}
|
|
|
- this.transport.stop();
|
|
|
process.exit(0);
|
|
|
}
|
|
|
|
|
|
- /**
|
|
|
- * Handle incoming JSON-RPC messages
|
|
|
- */
|
|
|
- private async handleMessage(message: JsonRpcRequest | JsonRpcNotification): Promise<void> {
|
|
|
- // Check if it's a request (has id) or notification (no id)
|
|
|
- const isRequest = 'id' in message;
|
|
|
-
|
|
|
- switch (message.method) {
|
|
|
- case 'initialize':
|
|
|
- if (isRequest) {
|
|
|
- await this.handleInitialize(message as JsonRpcRequest);
|
|
|
- }
|
|
|
- break;
|
|
|
-
|
|
|
- case 'initialized':
|
|
|
- // Notification that client has finished initialization
|
|
|
- // No action needed - the client is ready
|
|
|
- break;
|
|
|
-
|
|
|
- case 'tools/list':
|
|
|
- if (isRequest) {
|
|
|
- await this.handleToolsList(message as JsonRpcRequest);
|
|
|
- }
|
|
|
- break;
|
|
|
-
|
|
|
- case 'tools/call':
|
|
|
- if (isRequest) {
|
|
|
- await this.handleToolsCall(message as JsonRpcRequest);
|
|
|
- }
|
|
|
- break;
|
|
|
-
|
|
|
- case 'ping':
|
|
|
- if (isRequest) {
|
|
|
- this.transport.sendResult((message as JsonRpcRequest).id, {});
|
|
|
- }
|
|
|
- break;
|
|
|
-
|
|
|
- default:
|
|
|
- if (isRequest) {
|
|
|
- this.transport.sendError(
|
|
|
- (message as JsonRpcRequest).id,
|
|
|
- ErrorCodes.MethodNotFound,
|
|
|
- `Method not found: ${message.method}`
|
|
|
- );
|
|
|
- }
|
|
|
+ /** Single-process stdio MCP session — the pre-issue-#411 code path. */
|
|
|
+ private async startDirect(reason: string): Promise<void> {
|
|
|
+ if (reason && process.env.CODEGRAPH_MCP_DEBUG) {
|
|
|
+ process.stderr.write(`[CodeGraph MCP] Direct mode: ${reason}.\n`);
|
|
|
}
|
|
|
- }
|
|
|
+ this.engine = new MCPEngine();
|
|
|
+ const transport = new StdioTransport();
|
|
|
+ this.session = new MCPSession(transport, this.engine, {
|
|
|
+ explicitProjectPath: this.projectPath,
|
|
|
+ });
|
|
|
|
|
|
- /**
|
|
|
- * Handle initialize request
|
|
|
- */
|
|
|
- private async handleInitialize(request: JsonRpcRequest): Promise<void> {
|
|
|
- const params = request.params as {
|
|
|
- rootUri?: string;
|
|
|
- workspaceFolders?: Array<{ uri: string; name: string }>;
|
|
|
- capabilities?: { roots?: unknown };
|
|
|
- } | undefined;
|
|
|
-
|
|
|
- // Does the client support the MCP `roots` protocol? If so, and we have no
|
|
|
- // explicit path, we ask it for the workspace root after the handshake
|
|
|
- // instead of falling back to the (frequently wrong) cwd. See issue #196.
|
|
|
- this.clientSupportsRoots = !!params?.capabilities?.roots;
|
|
|
-
|
|
|
- // Explicit project signal, strongest first: a client-provided rootUri /
|
|
|
- // workspaceFolders (LSP-style, non-standard but some clients send it), else
|
|
|
- // the --path the server was launched with. cwd is NOT used here — we defer
|
|
|
- // it so a roots/list answer can win over it.
|
|
|
- let explicitPath: string | null = null;
|
|
|
- if (params?.rootUri) {
|
|
|
- explicitPath = fileUriToPath(params.rootUri);
|
|
|
- } else if (params?.workspaceFolders?.[0]?.uri) {
|
|
|
- explicitPath = fileUriToPath(params.workspaceFolders[0].uri);
|
|
|
- } else if (this.projectPath) {
|
|
|
- explicitPath = this.projectPath;
|
|
|
+ if (this.projectPath) {
|
|
|
+ // Background init so the initialize response stays fast (#172).
|
|
|
+ void this.engine.ensureInitialized(this.projectPath);
|
|
|
}
|
|
|
|
|
|
- // Respond to the handshake BEFORE doing any heavy initialization. Loading
|
|
|
- // the SQLite DB and the tree-sitter WASM runtime can take many seconds on
|
|
|
- // slow filesystems (Docker Desktop VirtioFS on macOS, WSL2). Clients like
|
|
|
- // Claude Code time out the handshake at ~30s, which manifested as
|
|
|
- // "MCP tools never appear" — the child was alive and had received the
|
|
|
- // initialize but was still awaiting initGrammars(). See issue #172.
|
|
|
- //
|
|
|
- // We accept the client's protocol version but respond with our supported
|
|
|
- // version. The `instructions` field is surfaced by MCP clients in the
|
|
|
- // agent's system prompt automatically — it's the right place for the
|
|
|
- // universal tool-selection playbook, ahead of individual tool descriptions.
|
|
|
- this.transport.sendResult(request.id, {
|
|
|
- protocolVersion: PROTOCOL_VERSION,
|
|
|
- capabilities: {
|
|
|
- tools: {},
|
|
|
- },
|
|
|
- serverInfo: SERVER_INFO,
|
|
|
- instructions: SERVER_INSTRUCTIONS,
|
|
|
- });
|
|
|
+ this.session.start();
|
|
|
|
|
|
- // If we know the project dir, kick off init in the background now. Tool
|
|
|
- // calls that arrive before it finishes fall through to `retryInitIfNeeded`,
|
|
|
- // which waits for this promise rather than racing it with a second open.
|
|
|
- //
|
|
|
- // If we DON'T know it (no rootUri, no --path), defer: the first tool call
|
|
|
- // resolves it via roots/list (when the client supports roots) or cwd. This
|
|
|
- // is the fix for issue #196 — clients that launch the server outside the
|
|
|
- // project and don't pass a rootUri previously got a misleading "not
|
|
|
- // initialized" error on every call.
|
|
|
- if (explicitPath) {
|
|
|
- this.initPromise = this.tryInitializeDefault(explicitPath).finally(() => {
|
|
|
- this.initPromise = null;
|
|
|
- });
|
|
|
- }
|
|
|
+ // Detect parent-process death — same logic as pre-refactor. When stdin
|
|
|
+ // closes we go through StdioTransport's `process.exit(0)` already, but
|
|
|
+ // SIGKILL of the parent doesn't reliably close stdin on Linux (#277).
|
|
|
+ process.stdin.on('end', () => this.stop());
|
|
|
+ process.stdin.on('close', () => this.stop());
|
|
|
+
|
|
|
+ this.mode = 'direct';
|
|
|
+ this.installSignalHandlers();
|
|
|
+ this.installPpidWatchdog();
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Handle tools/list request
|
|
|
+ * Run as the detached shared daemon (process spawned with
|
|
|
+ * `CODEGRAPH_DAEMON_INTERNAL=1`). Arbitrate the O_EXCL lock, then either
|
|
|
+ * become the daemon (bind the socket, serve forever) or — if a live daemon
|
|
|
+ * already holds the lock — exit so we don't leak a redundant process.
|
|
|
+ *
|
|
|
+ * No PPID watchdog and no stdin handlers: the daemon is detached on purpose
|
|
|
+ * and reaps itself via client-refcount + idle timeout (see {@link Daemon}).
|
|
|
*/
|
|
|
- private async handleToolsList(request: JsonRpcRequest): Promise<void> {
|
|
|
- await this.retryInitIfNeeded();
|
|
|
- this.transport.sendResult(request.id, {
|
|
|
- tools: this.toolHandler.getTools(),
|
|
|
- });
|
|
|
+ private async startDaemonProcess(): Promise<void> {
|
|
|
+ const root = resolveDaemonRoot(this.projectPath) ?? this.projectPath ?? process.cwd();
|
|
|
+ for (let attempt = 0; attempt < TAKEOVER_MAX_RETRIES; attempt++) {
|
|
|
+ const lock = tryAcquireDaemonLock(root);
|
|
|
+
|
|
|
+ if (lock.kind === 'acquired') {
|
|
|
+ const daemon = new Daemon(root);
|
|
|
+ await daemon.start();
|
|
|
+ this.daemon = daemon;
|
|
|
+ this.mode = 'daemon';
|
|
|
+ return; // the net.Server keeps the process alive
|
|
|
+ }
|
|
|
+
|
|
|
+ // Taken. If the holder is alive, another daemon already serves (or is
|
|
|
+ // binding) — we're redundant; exit cleanly so the launcher proxies to it.
|
|
|
+ const existing = lock.existing;
|
|
|
+ if (existing && existing.pid > 0 && isProcessAlive(existing.pid)) {
|
|
|
+ process.stderr.write(
|
|
|
+ `[CodeGraph daemon] Another daemon (pid ${existing.pid}) already holds the lock; exiting.\n`
|
|
|
+ );
|
|
|
+ process.exit(0);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Holder is dead (or the record is unreadable) — clear it (pid-verified,
|
|
|
+ // so we never delete a live daemon's lock) and retry the acquire.
|
|
|
+ clearStaleDaemonLock(lock.pidPath, existing?.pid);
|
|
|
+ await sleep(TAKEOVER_RETRY_DELAY_MS);
|
|
|
+ }
|
|
|
+
|
|
|
+ process.stderr.write('[CodeGraph daemon] Could not acquire the daemon lock; exiting.\n');
|
|
|
+ process.exit(0);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Handle tools/call request
|
|
|
+ * Become a proxy to the shared daemon, spawning the daemon first if none is
|
|
|
+ * reachable. Returns 'proxy' once the proxied session has run to completion
|
|
|
+ * (the host disconnected), or 'fallback' if the caller should run in-process.
|
|
|
*/
|
|
|
- private async handleToolsCall(request: JsonRpcRequest): Promise<void> {
|
|
|
- const params = request.params as {
|
|
|
- name: string;
|
|
|
- arguments?: Record<string, unknown>;
|
|
|
- };
|
|
|
-
|
|
|
- if (!params || !params.name) {
|
|
|
- this.transport.sendError(
|
|
|
- request.id,
|
|
|
- ErrorCodes.InvalidParams,
|
|
|
- 'Missing tool name'
|
|
|
- );
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- const toolName = params.name;
|
|
|
- const toolArgs = params.arguments || {};
|
|
|
-
|
|
|
- // Validate tool exists
|
|
|
- const tool = tools.find(t => t.name === toolName);
|
|
|
- if (!tool) {
|
|
|
- this.transport.sendError(
|
|
|
- request.id,
|
|
|
- ErrorCodes.InvalidParams,
|
|
|
- `Unknown tool: ${toolName}`
|
|
|
- );
|
|
|
- return;
|
|
|
+ private async connectOrSpawnDaemon(root: string): Promise<'proxy' | 'fallback'> {
|
|
|
+ const socketPath = getDaemonSocketPath(root);
|
|
|
+
|
|
|
+ // Fast path: a daemon may already be listening. On success runProxy pipes
|
|
|
+ // stdio until the host disconnects, so a 'proxied' outcome means this
|
|
|
+ // process has finished its entire job.
|
|
|
+ let probe = await runProxy(socketPath);
|
|
|
+ if (probe.outcome === 'proxied') return 'proxy';
|
|
|
+ if (probe.reason === 'version mismatch') return 'fallback';
|
|
|
+
|
|
|
+ // No reachable daemon — spawn one (detached) and wait for it to bind.
|
|
|
+ spawnDetachedDaemon(root);
|
|
|
+
|
|
|
+ for (let attempt = 0; attempt < DAEMON_CONNECT_MAX_RETRIES; attempt++) {
|
|
|
+ await sleep(DAEMON_CONNECT_RETRY_DELAY_MS);
|
|
|
+ probe = await runProxy(socketPath);
|
|
|
+ if (probe.outcome === 'proxied') return 'proxy';
|
|
|
+ if (probe.reason === 'version mismatch') return 'fallback';
|
|
|
}
|
|
|
|
|
|
- // If the default project isn't initialized yet, retry in case it was
|
|
|
- // initialized after the MCP server started (e.g. user ran codegraph init)
|
|
|
- await this.retryInitIfNeeded();
|
|
|
+ // Daemon never came up in time — run in-process so the user is never blocked.
|
|
|
+ return 'fallback';
|
|
|
+ }
|
|
|
|
|
|
- const result = await this.toolHandler.execute(toolName, toolArgs);
|
|
|
+ /** Standard SIGINT/SIGTERM handlers that route to our `stop()` (direct mode). */
|
|
|
+ private installSignalHandlers(): void {
|
|
|
+ process.on('SIGINT', () => this.stop());
|
|
|
+ process.on('SIGTERM', () => this.stop());
|
|
|
+ }
|
|
|
|
|
|
- this.transport.sendResult(request.id, result);
|
|
|
+ /**
|
|
|
+ * PPID watchdog (#277) — direct mode only. Daemon mode is detached on purpose
|
|
|
+ * and reaps via idle timeout; proxy mode installs its own watchdog inside
|
|
|
+ * {@link runProxy}. So this only ever runs for an in-process direct session.
|
|
|
+ */
|
|
|
+ private installPpidWatchdog(): void {
|
|
|
+ if (this.mode !== 'direct') return;
|
|
|
+ const pollMs = parsePpidPollMs(process.env.CODEGRAPH_PPID_POLL_MS);
|
|
|
+ if (pollMs <= 0) return;
|
|
|
+ this.ppidWatchdog = setInterval(() => {
|
|
|
+ const current = process.ppid;
|
|
|
+ const ppidChanged = current !== this.originalPpid;
|
|
|
+ const hostGone = this.hostPpid !== null && !isProcessAlive(this.hostPpid);
|
|
|
+ if (ppidChanged || hostGone) {
|
|
|
+ const reason = ppidChanged
|
|
|
+ ? `ppid ${this.originalPpid} -> ${current}`
|
|
|
+ : `host pid ${this.hostPpid} exited`;
|
|
|
+ process.stderr.write(
|
|
|
+ `[CodeGraph MCP] Parent process exited (${reason}); shutting down.\n`
|
|
|
+ );
|
|
|
+ this.stop();
|
|
|
+ }
|
|
|
+ }, pollMs);
|
|
|
+ this.ppidWatchdog.unref();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+function sleep(ms: number): Promise<void> {
|
|
|
+ // Deliberately NOT unref'd. During the daemon connect/takeover retry loop we
|
|
|
+ // may be between processes — no socket bound yet, no transport, no listener
|
|
|
+ // pinning the event loop. An unref'd timer would let Node drain the loop and
|
|
|
+ // exit silently before we get a chance to try again.
|
|
|
+ return new Promise((resolve) => { setTimeout(resolve, ms); });
|
|
|
+}
|
|
|
+
|
|
|
// Export for use in CLI
|
|
|
export { StdioTransport } from './transport';
|
|
|
export { tools, ToolHandler } from './tools';
|
|
|
+// Surface a few daemon-mode bits for tests + diagnostics.
|
|
|
+export { Daemon } from './daemon';
|
|
|
+export { CodeGraphPackageVersion } from './version';
|