Sfoglia il codice sorgente

fix: Stale lock recovery and MCP init retry

Fixes #47 — "database is locked" after crash and MCP "not initialized"
when project IS initialized.

- FileLock: treat locks older than 10 minutes as stale regardless of PID
  status, covering cases where PID was reused or kill signal check fails
- MCP server: log errors from tryInitializeDefault() to stderr instead of
  silently swallowing, so transient open failures are diagnosable
- MCP server: retryInitIfNeeded() properly cleans up failed instances
  before retrying, preventing resource leaks
- CLI: add 'codegraph unlock' command for manual lock file removal
Colby McHenry 3 mesi fa
parent
commit
b964d5909a
3 ha cambiato i file con 51 aggiunte e 4 eliminazioni
  1. 31 0
      src/bin/codegraph.ts
  2. 10 0
      src/mcp/index.ts
  3. 10 4
      src/utils.ts

+ 31 - 0
src/bin/codegraph.ts

@@ -1036,6 +1036,37 @@ program
     process.exit(0);
   });
 
+/**
+ * codegraph unlock [path]
+ */
+program
+  .command('unlock [path]')
+  .description('Remove a stale lock file that is blocking indexing')
+  .action(async (pathArg: string | undefined) => {
+    const projectPath = resolveProjectPath(pathArg);
+
+    try {
+      if (!isInitialized(projectPath)) {
+        error(`CodeGraph not initialized in ${projectPath}`);
+        return;
+      }
+
+      const lockPath = path.join(getCodeGraphDir(projectPath), 'codegraph.lock');
+
+      if (!fs.existsSync(lockPath)) {
+        info('No lock file found — nothing to do');
+        return;
+      }
+
+      fs.unlinkSync(lockPath);
+      success('Removed lock file. You can now run indexing again.');
+    } catch (err) {
+      captureException(err);
+      error(`Failed to remove lock: ${err instanceof Error ? err.message : String(err)}`);
+      process.exit(1);
+    }
+  });
+
 /**
  * codegraph install
  */

+ 10 - 0
src/mcp/index.ts

@@ -120,13 +120,18 @@ export class MCPServer {
       this.cg = await CodeGraph.open(resolvedRoot);
       this.toolHandler.setDefaultCodeGraph(this.cg);
     } catch (err) {
+      // Log the error so transient failures are diagnosable (see issue #47)
       captureException(err);
+      const msg = err instanceof Error ? err.message : String(err);
+      process.stderr.write(`[CodeGraph MCP] Failed to open project at ${resolvedRoot}: ${msg}\n`);
     }
   }
 
   /**
    * Retry initialization of the default project if it previously failed.
    * Called lazily on tool calls that need the default project.
+   * Re-walks parent directories each time so it picks up projects
+   * initialized after the MCP server started.
    */
   private retryInitIfNeeded(): void {
     // Already initialized successfully
@@ -138,6 +143,11 @@ export class MCPServer {
     if (!resolvedRoot) return;
 
     try {
+      // Close any previously failed instance to avoid leaking resources
+      if (this.cg) {
+        try { this.cg.close(); } catch { /* ignore */ }
+        this.cg = null;
+      }
       this.cg = CodeGraph.openSync(resolvedRoot);
       this.projectPath = resolvedRoot;
       this.toolHandler.setDefaultCodeGraph(this.cg);

+ 10 - 4
src/utils.ts

@@ -184,6 +184,9 @@ export class FileLock {
   private lockPath: string;
   private held = false;
 
+  /** Locks older than this are considered stale regardless of PID status */
+  private static readonly STALE_TIMEOUT_MS = 10 * 60 * 1000; // 10 minutes
+
   constructor(lockPath: string) {
     this.lockPath = lockPath;
   }
@@ -197,15 +200,18 @@ export class FileLock {
       try {
         const content = fs.readFileSync(this.lockPath, 'utf-8').trim();
         const pid = parseInt(content, 10);
+        const stat = fs.statSync(this.lockPath);
+        const lockAge = Date.now() - stat.mtimeMs;
 
-        if (!isNaN(pid) && this.isProcessAlive(pid)) {
+        // Treat locks older than the timeout as stale, regardless of PID
+        if (lockAge < FileLock.STALE_TIMEOUT_MS && !isNaN(pid) && this.isProcessAlive(pid)) {
           throw new Error(
             `CodeGraph database is locked by another process (PID ${pid}). ` +
-            `If this is stale, delete ${this.lockPath}`
+            `If this is stale, run 'codegraph unlock' or delete ${this.lockPath}`
           );
         }
 
-        // Stale lock - remove it
+        // Stale lock (dead process or timed out) - remove it
         fs.unlinkSync(this.lockPath);
       } catch (err) {
         if (err instanceof Error && err.message.includes('locked by another')) {
@@ -225,7 +231,7 @@ export class FileLock {
         // Race condition: another process grabbed the lock between our check and write
         throw new Error(
           'CodeGraph database is locked by another process. ' +
-          `If this is stale, delete ${this.lockPath}`
+          `If this is stale, run 'codegraph unlock' or delete ${this.lockPath}`
         );
       }
       throw err;