2 months ago · 1271ad9161
--- a/src/extraction/index.ts
+++ b/src/extraction/index.ts
@@ -45,7 +45,7 @@ const PARSE_TIMEOUT_MS = 10_000;
 
															  * V8 isolate by terminating the worker thread and spawning a fresh one.
														
 
															  * This interval balances memory usage against the cost of reloading grammars.
														
 
															  */
														
 
															-const WORKER_RECYCLE_INTERVAL = 500;
														
 
															+const WORKER_RECYCLE_INTERVAL = 250;
														
 
															 /**
														
 
															  * Progress callback for indexing operations
														
@@ -521,8 +521,12 @@ export class ExtractionOrchestrator {
 
															           logWarn('Parse worker exited unexpectedly', { code });
														
 
															           rejectAllPending(`Worker exited with code ${code}`);
														
 
															         }
														
 
															-        // Clear reference so we know to respawn
														
 
															-        if (parseWorker === w) parseWorker = null;
														
 
															+        // Clear reference so we know to respawn, reset count so
														
 
															+        // the fresh worker gets a full cycle before recycling.
														
 
															+        if (parseWorker === w) {
														
 
															+          parseWorker = null;
														
 
															+          workerParseCount = 0;
														
 
															+        }
														
 
															       });
														
 
															     }
														
@@ -580,17 +584,20 @@ export class ExtractionOrchestrator {
 
															       const id = nextId++;
														
 
															       workerParseCount++;
														
 
															+      // Scale timeout for large files: base 10s + 10s per 100KB
														
 
															+      const timeoutMs = PARSE_TIMEOUT_MS + Math.floor(content.length / 100_000) * 10_000;
														
 
															+
														
 
															       return new Promise<ExtractionResult>((resolve, reject) => {
														
 
															         const timer = setTimeout(() => {
														
 
															           pendingParses.delete(id);
														
 
															-          log(`TIMEOUT: ${filePath} exceeded ${PARSE_TIMEOUT_MS}ms — killing worker`);
														
 
															+          log(`TIMEOUT: ${filePath} exceeded ${timeoutMs}ms — killing worker`);
														
 
															           // Reject FIRST — worker.terminate() can hang if WASM is stuck
														
 
															           parseWorker = null;
														
 
															           workerParseCount = 0;
														
 
															-          reject(new Error(`Parse timed out after ${PARSE_TIMEOUT_MS}ms`));
														
 
															+          reject(new Error(`Parse timed out after ${timeoutMs}ms`));
														
 
															           // Fire-and-forget: kill the stuck worker in the background
														
 
															           worker.terminate().catch(() => {});
														
 
															-        }, PARSE_TIMEOUT_MS);
														
 
															+        }, timeoutMs);
														
 
															         pendingParses.set(id, { resolve, reject, timer });
														
 
															         worker.postMessage({ type: 'parse', id, filePath, content });
														
@@ -712,6 +719,58 @@ export class ExtractionOrchestrator {
 
															       }
														
 
															     }
														
 
															+    // Retry pass: files that failed due to WASM memory corruption may succeed
														
 
															+    // on a fresh worker with a clean heap. Collect retryable failures, recycle
														
 
															+    // the worker, and try each one individually.
														
 
															+    const retryableErrors = errors.filter(
														
 
															+      (e) => e.code === 'parse_error' && e.filePath &&
														
 
															+        (e.message.includes('Worker exited') || e.message.includes('memory access out of bounds'))
														
 
															+    );
														
 
															+
														
 
															+    if (retryableErrors.length > 0 && WorkerClass) {
														
 
															+      log(`Retrying ${retryableErrors.length} files that failed due to WASM memory errors...`);
														
 
															+
														
 
															+      // Force a fresh worker
														
 
															+      recycleWorker();
														
 
															+
														
 
															+      for (const errEntry of retryableErrors) {
														
 
															+        const filePath = errEntry.filePath!;
														
 
															+        if (signal?.aborted) break;
														
 
															+
														
 
															+        let content: string;
														
 
															+        try {
														
 
															+          const fullPath = validatePathWithinRoot(this.rootDir, filePath);
														
 
															+          if (!fullPath) continue;
														
 
															+          content = await fsp.readFile(fullPath, 'utf-8');
														
 
															+        } catch {
														
 
															+          continue; // Skip files we can't read
														
 
															+        }
														
 
															+
														
 
															+        let result: ExtractionResult;
														
 
															+        try {
														
 
															+          result = await requestParse(filePath, content);
														
 
															+        } catch {
														
 
															+          continue; // Still failing — leave as errored
														
 
															+        }
														
 
															+
														
 
															+        if (result.nodes.length > 0 || result.errors.length === 0) {
														
 
															+          // Success on retry — store result and fix counts
														
 
															+          const language = detectLanguage(filePath);
														
 
															+          const stats = await fsp.stat(path.join(this.rootDir, filePath));
														
 
															+          this.storeExtractionResult(filePath, content, language, stats, result);
														
 
															+
														
 
															+          // Remove the original error and update counts
														
 
															+          const idx = errors.indexOf(errEntry);
														
 
															+          if (idx >= 0) errors.splice(idx, 1);
														
 
															+          filesErrored--;
														
 
															+          filesIndexed++;
														
 
															+          totalNodes += result.nodes.length;
														
 
															+          totalEdges += result.edges.length;
														
 
															+          log(`Retry OK: ${filePath} (${result.nodes.length} nodes)`);
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															     // Shut down parse worker and clear any pending timers
														
 
															     rejectAllPending('Indexing complete');
														
 
															     if (parseWorker) {
														
--- a/src/extraction/parse-worker.ts
+++ b/src/extraction/parse-worker.ts
@@ -33,6 +33,14 @@ parentPort!.on('message', async (msg: { type: string; id?: number; filePath?: st
 
															       parentPort!.postMessage({ type: 'parse-result', id, result });
														
 
															     } catch (err) {
														
 
															       const message = err instanceof Error ? err.message : String(err);
														
 
															+
														
 
															+      // WASM memory errors leave the module in a corrupted state — all
														
 
															+      // subsequent parses would also fail (cascading failures). Crash the
														
 
															+      // worker so the main thread spawns a fresh one with a clean heap.
														
 
															+      if (message.includes('memory access out of bounds') || message.includes('out of memory')) {
														
 
															+        process.exit(1);
														
 
															+      }
														
 
															+
														
 
															       parentPort!.postMessage({
														
 
															         type: 'parse-result',
														
 
															         id,
														
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -172,8 +172,17 @@ export class TreeSitterExtractor {
 
															       this.visitNode(this.tree.rootNode);
														
 
															       this.nodeStack.pop();
														
 
															     } catch (error) {
														
 
															+      const msg = error instanceof Error ? error.message : String(error);
														
 
															+
														
 
															+      // WASM memory errors leave the module in a corrupted state — all subsequent
														
 
															+      // parses would also fail. Re-throw so the worker can detect and crash,
														
 
															+      // forcing a clean restart with a fresh heap.
														
 
															+      if (msg.includes('memory access out of bounds') || msg.includes('out of memory')) {
														
 
															+        throw error;
														
 
															+      }
														
 
															+
														
 
															       this.errors.push({
														
 
															-        message: `Parse error: ${error instanceof Error ? error.message : String(error)}`,
														
 
															+        message: `Parse error: ${msg}`,
														
 
															         filePath: this.filePath,
														
 
															         severity: 'error',
														
 
															         code: 'parse_error',