Jelajahi Sumber

fix: Improve CLI progress display and prevent tree-sitter WASM memory crashes

Replaces fixed-width padding with terminal escape sequences for proper progress line clearing across different terminal widths. Adds periodic parser reset every 5000 parses per language to prevent WASM heap fragmentation that causes "memory access out of bounds" crashes in large repositories. Includes filename truncation to fit available terminal width.
Colby McHenry 2 bulan lalu
induk
melakukan
3a44d5c4d1
5 mengubah file dengan 64 tambahan dan 10 penghapusan
  1. 25 6
      src/bin/codegraph.ts
  2. 18 0
      src/extraction/grammars.ts
  3. 18 1
      src/extraction/index.ts
  4. 2 2
      src/installer/index.ts
  5. 1 1
      src/vectors/embedder.ts

+ 25 - 6
src/bin/codegraph.ts

@@ -180,6 +180,16 @@ function progressBar(current: number, total: number, width: number = 30): string
   return `${bar} ${percentStr}`;
   return `${bar} ${percentStr}`;
 }
 }
 
 
+/**
+ * Truncate a string to fit a given visible width, adding ellipsis if needed
+ */
+function truncate(str: string, maxWidth: number): string {
+  if (maxWidth <= 0) return '';
+  if (str.length <= maxWidth) return str;
+  if (maxWidth <= 1) return str.charAt(0);
+  return '\u2026' + str.slice(-(maxWidth - 1));
+}
+
 /**
 /**
  * Print a progress update (overwrites current line)
  * Print a progress update (overwrites current line)
  */
  */
@@ -192,15 +202,24 @@ function printProgress(progress: IndexProgress): void {
   };
   };
 
 
   const phaseName = phaseNames[progress.phase] || progress.phase;
   const phaseName = phaseNames[progress.phase] || progress.phase;
-  const file = progress.currentFile ? chalk.dim(` ${progress.currentFile}`) : '';
+  const cols = process.stdout.columns || 80;
 
 
   if (progress.total > 0) {
   if (progress.total > 0) {
     const bar = progressBar(progress.current, progress.total);
     const bar = progressBar(progress.current, progress.total);
-    process.stdout.write(`\r${chalk.cyan(phaseName)}: ${bar}${file}`.padEnd(100));
+    // "Phase: [bar] XX% filename" — calculate space left for filename
+    // phaseName + ": " = phaseName.length + 2, bar visible = 30 + 1 + 4 = 35, space before file = 1
+    const prefixWidth = phaseName.length + 2 + 35;
+    const fileMaxWidth = cols - prefixWidth - 1;
+    const file = progress.currentFile ? chalk.dim(` ${truncate(progress.currentFile, fileMaxWidth)}`) : '';
+    process.stdout.write(`\r${chalk.cyan(phaseName)}: ${bar}${file}\x1b[K`);
   } else {
   } else {
     // No known total (e.g. scanning) — show a running count
     // No known total (e.g. scanning) — show a running count
+    const countStr = progress.current > 0 ? ` ${formatNumber(progress.current)} found` : '';
+    const prefixWidth = phaseName.length + 1 + countStr.length;
+    const fileMaxWidth = cols - prefixWidth - 1;
+    const file = progress.currentFile ? chalk.dim(` ${truncate(progress.currentFile, fileMaxWidth)}`) : '';
     const count = progress.current > 0 ? ` ${chalk.green(formatNumber(progress.current))} found` : '';
     const count = progress.current > 0 ? ` ${chalk.green(formatNumber(progress.current))} found` : '';
-    process.stdout.write(`\r${chalk.cyan(phaseName)}:${count}${file}`.padEnd(100));
+    process.stdout.write(`\r${chalk.cyan(phaseName)}:${count}${file}\x1b[K`);
   }
   }
 }
 }
 
 
@@ -390,7 +409,7 @@ program
         });
         });
 
 
         // Clear progress line
         // Clear progress line
-        process.stdout.write('\r' + ' '.repeat(100) + '\r');
+        process.stdout.write('\r\x1b[K');
 
 
         printIndexResult(result, projectPath);
         printIndexResult(result, projectPath);
       } else {
       } else {
@@ -488,7 +507,7 @@ program
 
 
       // Clear progress line
       // Clear progress line
       if (!options.quiet) {
       if (!options.quiet) {
-        process.stdout.write('\r' + ' '.repeat(100) + '\r');
+        process.stdout.write('\r\x1b[K');
       }
       }
 
 
       if (!options.quiet) {
       if (!options.quiet) {
@@ -533,7 +552,7 @@ program
 
 
       // Clear progress line
       // Clear progress line
       if (!options.quiet) {
       if (!options.quiet) {
-        process.stdout.write('\r' + ' '.repeat(100) + '\r');
+        process.stdout.write('\r\x1b[K');
       }
       }
 
 
       const totalChanges = result.filesAdded + result.filesModified + result.filesRemoved;
       const totalChanges = result.filesAdded + result.filesModified + result.filesRemoved;

+ 18 - 0
src/extraction/grammars.ts

@@ -205,10 +205,28 @@ export function getSupportedLanguages(): Language[] {
   return [...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), 'svelte', 'liquid'];
   return [...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), 'svelte', 'liquid'];
 }
 }
 
 
+/**
+ * Reset the cached parser for a language to reclaim WASM heap memory.
+ * The tree-sitter WASM runtime accumulates fragmented memory over thousands
+ * of parses. Deleting and recreating the Parser instance forces the WASM
+ * heap to reset, preventing "memory access out of bounds" crashes in
+ * large repos.
+ */
+export function resetParser(language: Language): void {
+  const old = parserCache.get(language);
+  if (old) {
+    old.delete();
+    parserCache.delete(language);
+  }
+}
+
 /**
 /**
  * Clear parser/grammar caches (useful for testing)
  * Clear parser/grammar caches (useful for testing)
  */
  */
 export function clearParserCache(): void {
 export function clearParserCache(): void {
+  for (const parser of parserCache.values()) {
+    parser.delete();
+  }
   parserCache.clear();
   parserCache.clear();
   // Note: languageCache is NOT cleared — WASM languages persist.
   // Note: languageCache is NOT cleared — WASM languages persist.
   // To fully re-init, set parserInitialized = false and call initGrammars() again.
   // To fully re-init, set parserInitialized = false and call initGrammars() again.

+ 18 - 1
src/extraction/index.ts

@@ -18,7 +18,7 @@ import {
 } from '../types';
 } from '../types';
 import { QueryBuilder } from '../db/queries';
 import { QueryBuilder } from '../db/queries';
 import { extractFromSource } from './tree-sitter';
 import { extractFromSource } from './tree-sitter';
-import { detectLanguage, isLanguageSupported, initGrammars, loadGrammarsForLanguages } from './grammars';
+import { detectLanguage, isLanguageSupported, initGrammars, loadGrammarsForLanguages, resetParser } from './grammars';
 import { logDebug, logWarn } from '../errors';
 import { logDebug, logWarn } from '../errors';
 import { validatePathWithinRoot, normalizePath } from '../utils';
 import { validatePathWithinRoot, normalizePath } from '../utils';
 import picomatch from 'picomatch';
 import picomatch from 'picomatch';
@@ -29,6 +29,12 @@ import picomatch from 'picomatch';
  */
  */
 const FILE_IO_BATCH_SIZE = 10;
 const FILE_IO_BATCH_SIZE = 10;
 
 
+/**
+ * Reset tree-sitter parser after this many parses per language to reclaim
+ * WASM heap memory and prevent "memory access out of bounds" crashes.
+ */
+const PARSER_RESET_INTERVAL = 5000;
+
 /**
 /**
  * Progress callback for indexing operations
  * Progress callback for indexing operations
  */
  */
@@ -412,6 +418,7 @@ export class ExtractionOrchestrator {
     // Phase 2: Parse files (read in parallel batches, parse/store sequentially)
     // Phase 2: Parse files (read in parallel batches, parse/store sequentially)
     const total = files.length;
     const total = files.length;
     let processed = 0;
     let processed = 0;
+    const parseCounts = new Map<Language, number>(); // track parses per language for WASM reset
 
 
     for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
     for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
       if (signal?.aborted) {
       if (signal?.aborted) {
@@ -483,6 +490,16 @@ export class ExtractionOrchestrator {
 
 
         const result = await this.indexFileWithContent(filePath, content, stats);
         const result = await this.indexFileWithContent(filePath, content, stats);
 
 
+        // Periodically reset the parser to reclaim WASM heap memory.
+        // Without this, tree-sitter's WASM runtime fragments its heap
+        // across thousands of parses and eventually crashes.
+        const lang = detectLanguage(filePath);
+        const count = (parseCounts.get(lang) ?? 0) + 1;
+        parseCounts.set(lang, count);
+        if (count % PARSER_RESET_INTERVAL === 0) {
+          resetParser(lang);
+        }
+
         if (result.errors.length > 0) {
         if (result.errors.length > 0) {
           // Annotate errors with file path if not already set
           // Annotate errors with file path if not already set
           for (const err of result.errors) {
           for (const err of result.errors) {

+ 2 - 2
src/installer/index.ts

@@ -160,12 +160,12 @@ async function initializeLocalProject(): Promise<void> {
       };
       };
       const phaseName = phaseNames[progress.phase] || progress.phase;
       const phaseName = phaseNames[progress.phase] || progress.phase;
       const percent = progress.total > 0 ? Math.round((progress.current / progress.total) * 100) : 0;
       const percent = progress.total > 0 ? Math.round((progress.current / progress.total) * 100) : 0;
-      process.stdout.write(`\r  ${chalk.dim(phaseName)}... ${percent}%   `);
+      process.stdout.write(`\r  ${chalk.dim(phaseName)}... ${percent}%\x1b[K`);
     },
     },
   });
   });
 
 
   // Clear progress line
   // Clear progress line
-  process.stdout.write('\r' + ' '.repeat(50) + '\r');
+  process.stdout.write('\r\x1b[K');
 
 
   if (result.filesErrored > 0) {
   if (result.filesErrored > 0) {
     success(`Indexed ${formatNumber(result.filesIndexed)} files (${formatNumber(result.filesErrored)} files failed, ${formatNumber(result.nodesCreated)} symbols)`);
     success(`Indexed ${formatNumber(result.filesIndexed)} files (${formatNumber(result.filesErrored)} files failed, ${formatNumber(result.nodesCreated)} symbols)`);

+ 1 - 1
src/vectors/embedder.ts

@@ -135,7 +135,7 @@ export class TextEmbedder {
         ? (progress: { status: string; file?: string; progress?: number }) => {
         ? (progress: { status: string; file?: string; progress?: number }) => {
             if (progress.status === 'progress' && progress.file && progress.progress) {
             if (progress.status === 'progress' && progress.file && progress.progress) {
               const pct = Math.round(progress.progress);
               const pct = Math.round(progress.progress);
-              process.stdout.write(`\rDownloading ${progress.file}: ${pct}%`);
+              process.stdout.write(`\rDownloading ${progress.file}: ${pct}%\x1b[K`);
             } else if (progress.status === 'done') {
             } else if (progress.status === 'done') {
               process.stdout.write('\n');
               process.stdout.write('\n');
             }
             }