Kaynağa Gözat

Fix performance issues.

Colby McHenry 4 ay önce
ebeveyn
işleme
ce504c642a
6 değiştirilmiş dosya ile 387 ekleme ve 74 silme
  1. 109 0
      __tests__/sync.test.ts
  2. 2 2
      package-lock.json
  3. 1 1
      package.json
  4. 24 0
      src/db/queries.ts
  5. 245 68
      src/extraction/index.ts
  6. 6 3
      src/index.ts

+ 109 - 0
__tests__/sync.test.ts

@@ -10,6 +10,7 @@ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
 import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
+import { execFileSync } from 'child_process';
 import CodeGraph from '../src/index';
 
 describe('Sync Module', () => {
@@ -150,4 +151,112 @@ describe('Sync Module', () => {
       });
     });
   });
+
+  describe('Git-based sync', () => {
+    let testDir: string;
+    let cg: CodeGraph;
+
+    function git(...args: string[]) {
+      execFileSync('git', args, { cwd: testDir, stdio: 'pipe' });
+    }
+
+    beforeEach(async () => {
+      testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-git-sync-'));
+
+      // Initialize a git repo with an initial commit
+      git('init');
+      git('config', 'user.email', 'test@test.com');
+      git('config', 'user.name', 'Test');
+
+      const srcDir = path.join(testDir, 'src');
+      fs.mkdirSync(srcDir);
+      fs.writeFileSync(
+        path.join(srcDir, 'index.ts'),
+        `export function hello() { return 'world'; }`
+      );
+
+      git('add', '-A');
+      git('commit', '-m', 'initial');
+
+      // Initialize CodeGraph and index
+      cg = CodeGraph.initSync(testDir, {
+        config: {
+          include: ['**/*.ts'],
+          exclude: [],
+        },
+      });
+      await cg.indexAll();
+    });
+
+    afterEach(() => {
+      if (cg) {
+        cg.destroy();
+      }
+      if (fs.existsSync(testDir)) {
+        fs.rmSync(testDir, { recursive: true, force: true });
+      }
+    });
+
+    it('should detect modified files via git', async () => {
+      fs.writeFileSync(
+        path.join(testDir, 'src', 'index.ts'),
+        `export function hello() { return 'modified'; }`
+      );
+
+      const result = await cg.sync();
+
+      expect(result.filesModified).toBe(1);
+      expect(result.changedFilePaths).toContain('src/index.ts');
+    });
+
+    it('should detect new untracked files via git', async () => {
+      fs.writeFileSync(
+        path.join(testDir, 'src', 'new.ts'),
+        `export function newFunc() { return 42; }`
+      );
+
+      const result = await cg.sync();
+
+      expect(result.filesAdded).toBe(1);
+      expect(result.changedFilePaths).toContain('src/new.ts');
+
+      // Verify the function was indexed
+      const nodes = cg.searchNodes('newFunc');
+      expect(nodes.length).toBeGreaterThan(0);
+    });
+
+    it('should detect deleted files via git', async () => {
+      fs.unlinkSync(path.join(testDir, 'src', 'index.ts'));
+
+      const result = await cg.sync();
+
+      expect(result.filesRemoved).toBe(1);
+
+      // Verify function is gone
+      const nodes = cg.searchNodes('hello');
+      expect(nodes.length).toBe(0);
+    });
+
+    it('should skip files not matching config', async () => {
+      // Create a .js file which doesn't match **/*.ts
+      fs.writeFileSync(
+        path.join(testDir, 'src', 'ignored.js'),
+        `function ignored() {}`
+      );
+
+      const result = await cg.sync();
+
+      expect(result.filesAdded).toBe(0);
+      expect(result.filesModified).toBe(0);
+    });
+
+    it('should report no changes on clean working tree', async () => {
+      const result = await cg.sync();
+
+      expect(result.filesAdded).toBe(0);
+      expect(result.filesModified).toBe(0);
+      expect(result.filesRemoved).toBe(0);
+      expect(result.changedFilePaths).toBeUndefined();
+    });
+  });
 });

+ 2 - 2
package-lock.json

@@ -1,12 +1,12 @@
 {
   "name": "@colbymchenry/codegraph",
-  "version": "0.5.1",
+  "version": "0.5.2",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "@colbymchenry/codegraph",
-      "version": "0.5.1",
+      "version": "0.5.2",
       "hasInstallScript": true,
       "license": "MIT",
       "dependencies": {

+ 1 - 1
package.json

@@ -1,6 +1,6 @@
 {
   "name": "@colbymchenry/codegraph",
-  "version": "0.5.1",
+  "version": "0.5.2",
   "description": "Supercharge Claude Code with semantic code intelligence. 30% fewer tokens, 25% fewer tool calls, 100% local.",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",

+ 24 - 0
src/db/queries.ts

@@ -896,6 +896,30 @@ export class QueryBuilder {
     }));
   }
 
+  /**
+   * Get unresolved references scoped to specific file paths.
+   * Uses the idx_unresolved_file_path index for efficient lookup.
+   */
+  getUnresolvedReferencesByFiles(filePaths: string[]): UnresolvedReference[] {
+    if (filePaths.length === 0) return [];
+
+    const placeholders = filePaths.map(() => '?').join(',');
+    const rows = this.db
+      .prepare(`SELECT * FROM unresolved_refs WHERE file_path IN (${placeholders})`)
+      .all(...filePaths) as UnresolvedRefRow[];
+
+    return rows.map((row) => ({
+      fromNodeId: row.from_node_id,
+      referenceName: row.reference_name,
+      referenceKind: row.reference_kind as EdgeKind,
+      line: row.line,
+      column: row.col,
+      candidates: row.candidates ? safeJsonParse(row.candidates, undefined) : undefined,
+      filePath: row.file_path,
+      language: row.language as Language,
+    }));
+  }
+
   /**
    * Delete all unresolved references (after resolution)
    */

+ 245 - 68
src/extraction/index.ts

@@ -63,6 +63,7 @@ export interface SyncResult {
   filesRemoved: number;
   nodesUpdated: number;
   durationMs: number;
+  changedFilePaths?: string[];
 }
 
 /**
@@ -105,27 +106,80 @@ export function shouldIncludeFile(
 }
 
 /**
- * Get directories ignored by .gitignore using git ls-files.
- * Returns a Set of normalized relative directory paths (forward slashes, no trailing slash).
- * Gracefully returns empty Set on any failure.
+ * Get all files visible to git (tracked + untracked but not ignored).
+ * Respects .gitignore at all levels (root, subdirectories).
+ * Returns null on failure (non-git project) so callers can fall back.
  */
-function getGitIgnoredDirectories(rootDir: string): Set<string> {
+function getGitVisibleFiles(rootDir: string): Set<string> | null {
   try {
+    // -c = cached (tracked), -o = others (untracked), --exclude-standard = respect .gitignore
     const output = execFileSync(
       'git',
-      ['ls-files', '-oi', '--exclude-standard', '--directory'],
-      { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] }
+      ['ls-files', '-co', '--exclude-standard'],
+      { cwd: rootDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] }
     );
-    const dirs = new Set<string>();
+    const files = new Set<string>();
     for (const line of output.split('\n')) {
       const trimmed = line.trim();
-      if (trimmed.endsWith('/')) {
-        dirs.add(normalizePath(trimmed.slice(0, -1)));
+      if (trimmed) {
+        files.add(normalizePath(trimmed));
       }
     }
-    return dirs;
+    return files;
   } catch {
-    return new Set<string>();
+    return null;
+  }
+}
+
+/**
+ * Result of git-based change detection.
+ * Returns null when git is unavailable (non-git project or command failure),
+ * signaling the caller to fall back to full filesystem scan.
+ */
+interface GitChanges {
+  modified: string[];  // M, MM, AM — files to re-hash + re-index
+  added: string[];     // ?? — new untracked files to index
+  deleted: string[];   // D — files to remove from DB
+}
+
+/**
+ * Use `git status` to detect changed files instead of scanning every file.
+ * Returns null on failure so callers fall back to full scan.
+ */
+function getGitChangedFiles(rootDir: string, config: CodeGraphConfig): GitChanges | null {
+  try {
+    const output = execFileSync(
+      'git',
+      ['status', '--porcelain', '--no-renames'],
+      { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] }
+    );
+
+    const modified: string[] = [];
+    const added: string[] = [];
+    const deleted: string[] = [];
+
+    for (const line of output.split('\n')) {
+      if (line.length < 4) continue; // Minimum: "XY file"
+
+      const statusCode = line.substring(0, 2);
+      const filePath = normalizePath(line.substring(3));
+
+      // Skip files that don't match include/exclude config
+      if (!shouldIncludeFile(filePath, config)) continue;
+
+      if (statusCode === '??') {
+        added.push(filePath);
+      } else if (statusCode.includes('D')) {
+        deleted.push(filePath);
+      } else {
+        // M, MM, AM, A (staged), etc. — treat as modified
+        modified.push(filePath);
+      }
+    }
+
+    return { modified, added, deleted };
+  } catch {
+    return null;
   }
 }
 
@@ -135,21 +189,49 @@ function getGitIgnoredDirectories(rootDir: string): Set<string> {
 const CODEGRAPH_IGNORE_MARKER = '.codegraphignore';
 
 /**
- * Recursively scan directory for source files
+ * Recursively scan directory for source files.
+ *
+ * In git repos, uses `git ls-files` to get the file list (inherently
+ * respects .gitignore at all levels), then filters by config include patterns.
+ * Falls back to filesystem walk for non-git projects.
  */
 export function scanDirectory(
   rootDir: string,
   config: CodeGraphConfig,
   onProgress?: (current: number, file: string) => void
+): string[] {
+  // Fast path: use git to get all visible files (respects .gitignore everywhere)
+  const gitFiles = getGitVisibleFiles(rootDir);
+  if (gitFiles) {
+    const files: string[] = [];
+    let count = 0;
+    for (const filePath of gitFiles) {
+      if (shouldIncludeFile(filePath, config)) {
+        files.push(filePath);
+        count++;
+        onProgress?.(count, filePath);
+      }
+    }
+    return files;
+  }
+
+  // Fallback: walk filesystem for non-git projects
+  return scanDirectoryWalk(rootDir, config, onProgress);
+}
+
+/**
+ * Filesystem walk fallback for non-git projects.
+ */
+function scanDirectoryWalk(
+  rootDir: string,
+  config: CodeGraphConfig,
+  onProgress?: (current: number, file: string) => void
 ): string[] {
   const files: string[] = [];
   let count = 0;
-  // Track visited real paths to detect symlink cycles
   const visitedDirs = new Set<string>();
-  const gitIgnoredDirs = getGitIgnoredDirectories(rootDir);
 
   function walk(dir: string): void {
-    // Resolve real path to detect symlink cycles
     let realDir: string;
     try {
       realDir = fs.realpathSync(dir);
@@ -164,7 +246,7 @@ export function scanDirectory(
     }
     visitedDirs.add(realDir);
 
-    // Check for .codegraphignore marker file - skip entire directory tree if present
+    // Check for .codegraphignore marker file
     const ignoreMarker = path.join(dir, CODEGRAPH_IGNORE_MARKER);
     if (fs.existsSync(ignoreMarker)) {
       logDebug('Skipping directory due to .codegraphignore marker', { dir });
@@ -184,17 +266,11 @@ export function scanDirectory(
       const fullPath = path.join(dir, entry.name);
       const relativePath = normalizePath(path.relative(rootDir, fullPath));
 
-      // Follow symlinked directories, but skip symlinked files to non-project targets
       if (entry.isSymbolicLink()) {
         try {
           const realTarget = fs.realpathSync(fullPath);
           const stat = fs.statSync(realTarget);
           if (stat.isDirectory()) {
-            // Check gitignore first (fast O(1) lookup)
-            if (gitIgnoredDirs.has(relativePath)) {
-              continue;
-            }
-            // Check exclusion, then recurse (cycle detection handles the rest)
             const dirPattern = relativePath + '/';
             let excluded = false;
             for (const pattern of config.exclude) {
@@ -210,9 +286,7 @@ export function scanDirectory(
             if (shouldIncludeFile(relativePath, config)) {
               files.push(relativePath);
               count++;
-              if (onProgress) {
-                onProgress(count, relativePath);
-              }
+              onProgress?.(count, relativePath);
             }
           }
         } catch {
@@ -222,11 +296,6 @@ export function scanDirectory(
       }
 
       if (entry.isDirectory()) {
-        // Check gitignore first (fast O(1) lookup)
-        if (gitIgnoredDirs.has(relativePath)) {
-          continue;
-        }
-        // Check if directory should be excluded
         const dirPattern = relativePath + '/';
         let excluded = false;
         for (const pattern of config.exclude) {
@@ -242,9 +311,7 @@ export function scanDirectory(
         if (shouldIncludeFile(relativePath, config)) {
           files.push(relativePath);
           count++;
-          if (onProgress) {
-            onProgress(count, relativePath);
-          }
+          onProgress?.(count, relativePath);
         }
       }
     }
@@ -611,7 +678,8 @@ export class ExtractionOrchestrator {
   }
 
   /**
-   * Sync with current file state
+   * Sync with current file state.
+   * Uses git status as a fast path when available, falling back to full scan.
    */
   async sync(onProgress?: (progress: IndexProgress) => void): Promise<SyncResult> {
     const startTime = Date.now();
@@ -620,53 +688,107 @@ export class ExtractionOrchestrator {
     let filesModified = 0;
     let filesRemoved = 0;
     let nodesUpdated = 0;
+    const changedFilePaths: string[] = [];
 
-    // Get current files on disk
     onProgress?.({
       phase: 'scanning',
       current: 0,
       total: 0,
     });
 
-    const currentFiles = new Set(scanDirectory(this.rootDir, this.config));
-    filesChecked = currentFiles.size;
+    const filesToIndex: string[] = [];
+    const gitChanges = getGitChangedFiles(this.rootDir, this.config);
+
+    if (gitChanges) {
+      // === Git fast path ===
+      // Only inspect the files git reports as changed instead of scanning everything.
+      filesChecked = gitChanges.modified.length + gitChanges.added.length + gitChanges.deleted.length;
+
+      // Handle deleted files
+      for (const filePath of gitChanges.deleted) {
+        const tracked = this.queries.getFileByPath(filePath);
+        if (tracked) {
+          this.queries.deleteFile(filePath);
+          filesRemoved++;
+        }
+      }
 
-    // Get tracked files from database
-    const trackedFiles = this.queries.getAllFiles();
+      // Handle modified files — read + hash only these files
+      for (const filePath of gitChanges.modified) {
+        const fullPath = path.join(this.rootDir, filePath);
+        let content: string;
+        try {
+          content = fs.readFileSync(fullPath, 'utf-8');
+        } catch (error) {
+          captureException(error, { operation: 'sync-read-file', filePath });
+          logDebug('Skipping unreadable file during sync', { filePath, error: String(error) });
+          continue;
+        }
 
-    // Find files to remove (in DB but not on disk)
-    for (const tracked of trackedFiles) {
-      if (!currentFiles.has(tracked.path)) {
-        this.queries.deleteFile(tracked.path);
-        filesRemoved++;
+        const contentHash = hashContent(content);
+        const tracked = this.queries.getFileByPath(filePath);
+
+        if (!tracked) {
+          filesToIndex.push(filePath);
+          changedFilePaths.push(filePath);
+          filesAdded++;
+        } else if (tracked.contentHash !== contentHash) {
+          filesToIndex.push(filePath);
+          changedFilePaths.push(filePath);
+          filesModified++;
+        }
       }
-    }
 
-    // Find files to add or update
-    const filesToIndex: string[] = [];
+      // Handle added (untracked) files
+      for (const filePath of gitChanges.added) {
+        filesToIndex.push(filePath);
+        changedFilePaths.push(filePath);
+        filesAdded++;
+      }
+    } else {
+      // === Fallback: full scan (non-git project or git failure) ===
+      const currentFiles = new Set(scanDirectory(this.rootDir, this.config));
+      filesChecked = currentFiles.size;
+
+      // Build Map for O(1) lookups instead of .find() per file
+      const trackedFiles = this.queries.getAllFiles();
+      const trackedMap = new Map<string, FileRecord>();
+      for (const f of trackedFiles) {
+        trackedMap.set(f.path, f);
+      }
 
-    for (const filePath of currentFiles) {
-      const fullPath = path.join(this.rootDir, filePath);
-      let content: string;
-      try {
-        content = fs.readFileSync(fullPath, 'utf-8');
-      } catch (error) {
-        captureException(error, { operation: 'sync-read-file', filePath });
-        logDebug('Skipping unreadable file during sync', { filePath, error: String(error) });
-        continue;
+      // Find files to remove (in DB but not on disk)
+      for (const tracked of trackedFiles) {
+        if (!currentFiles.has(tracked.path)) {
+          this.queries.deleteFile(tracked.path);
+          filesRemoved++;
+        }
       }
 
-      const contentHash = hashContent(content);
-      const tracked = trackedFiles.find((f) => f.path === filePath);
+      // Find files to add or update
+      for (const filePath of currentFiles) {
+        const fullPath = path.join(this.rootDir, filePath);
+        let content: string;
+        try {
+          content = fs.readFileSync(fullPath, 'utf-8');
+        } catch (error) {
+          captureException(error, { operation: 'sync-read-file', filePath });
+          logDebug('Skipping unreadable file during sync', { filePath, error: String(error) });
+          continue;
+        }
 
-      if (!tracked) {
-        // New file
-        filesToIndex.push(filePath);
-        filesAdded++;
-      } else if (tracked.contentHash !== contentHash) {
-        // Modified file
-        filesToIndex.push(filePath);
-        filesModified++;
+        const contentHash = hashContent(content);
+        const tracked = trackedMap.get(filePath);
+
+        if (!tracked) {
+          filesToIndex.push(filePath);
+          changedFilePaths.push(filePath);
+          filesAdded++;
+        } else if (tracked.contentHash !== contentHash) {
+          filesToIndex.push(filePath);
+          changedFilePaths.push(filePath);
+          filesModified++;
+        }
       }
     }
 
@@ -692,16 +814,71 @@ export class ExtractionOrchestrator {
       filesRemoved,
       nodesUpdated,
       durationMs: Date.now() - startTime,
+      changedFilePaths: changedFilePaths.length > 0 ? changedFilePaths : undefined,
     };
   }
 
   /**
-   * Get files that have changed since last index
+   * Get files that have changed since last index.
+   * Uses git status as a fast path when available, falling back to full scan.
    */
   getChangedFiles(): { added: string[]; modified: string[]; removed: string[] } {
+    const gitChanges = getGitChangedFiles(this.rootDir, this.config);
+
+    if (gitChanges) {
+      // === Git fast path ===
+      const added: string[] = [];
+      const modified: string[] = [];
+      const removed: string[] = [];
+
+      // Deleted files — only report if tracked in DB
+      for (const filePath of gitChanges.deleted) {
+        const tracked = this.queries.getFileByPath(filePath);
+        if (tracked) {
+          removed.push(filePath);
+        }
+      }
+
+      // Modified files — read + hash only these, compare with DB
+      for (const filePath of gitChanges.modified) {
+        const fullPath = path.join(this.rootDir, filePath);
+        let content: string;
+        try {
+          content = fs.readFileSync(fullPath, 'utf-8');
+        } catch (error) {
+          captureException(error, { operation: 'detect-changes-read-file', filePath });
+          logDebug('Skipping unreadable file while detecting changes', { filePath, error: String(error) });
+          continue;
+        }
+
+        const contentHash = hashContent(content);
+        const tracked = this.queries.getFileByPath(filePath);
+
+        if (!tracked) {
+          added.push(filePath);
+        } else if (tracked.contentHash !== contentHash) {
+          modified.push(filePath);
+        }
+      }
+
+      // Added (untracked) files
+      for (const filePath of gitChanges.added) {
+        added.push(filePath);
+      }
+
+      return { added, modified, removed };
+    }
+
+    // === Fallback: full scan (non-git project or git failure) ===
     const currentFiles = new Set(scanDirectory(this.rootDir, this.config));
     const trackedFiles = this.queries.getAllFiles();
 
+    // Build Map for O(1) lookups
+    const trackedMap = new Map<string, FileRecord>();
+    for (const f of trackedFiles) {
+      trackedMap.set(f.path, f);
+    }
+
     const added: string[] = [];
     const modified: string[] = [];
     const removed: string[] = [];
@@ -726,7 +903,7 @@ export class ExtractionOrchestrator {
       }
 
       const contentHash = hashContent(content);
-      const tracked = trackedFiles.find((f) => f.path === filePath);
+      const tracked = trackedMap.get(filePath);
 
       if (!tracked) {
         added.push(filePath);

+ 6 - 3
src/index.ts

@@ -449,15 +449,18 @@ export class CodeGraph {
 
         // Resolve references if files were updated
         if (result.filesAdded > 0 || result.filesModified > 0) {
-          const unresolvedCount = this.queries.getUnresolvedReferences().length;
+          // Scope resolution to changed files when available (git fast path)
+          const unresolvedRefs = result.changedFilePaths
+            ? this.queries.getUnresolvedReferencesByFiles(result.changedFilePaths)
+            : this.queries.getUnresolvedReferences();
 
           options.onProgress?.({
             phase: 'resolving',
             current: 0,
-            total: unresolvedCount,
+            total: unresolvedRefs.length,
           });
 
-          this.resolveReferences((current, total) => {
+          this.resolver.resolveAndPersist(unresolvedRefs, (current, total) => {
             options.onProgress?.({
               phase: 'resolving',
               current,