Просмотр исходного кода

Fix .gitignore support and Windows path separator bug in scanner

Normalize paths to forward slashes in matchesGlob() and scanDirectory()
so glob exclude patterns work on Windows. Add getGitIgnoredDirectories()
using git ls-files to skip .gitignore'd directories during indexing.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Colby McHenry 4 месяцев назад
Родитель
Сommit
62a6cf38fe
4 измененных файлов с 158 добавлено и 3 удалено
  1. 106 1
      __tests__/extraction.test.ts
  2. 4 0
      src/config.ts
  3. 40 2
      src/extraction/index.ts
  4. 8 0
      src/utils.ts

+ 106 - 1
__tests__/extraction.test.ts

@@ -9,8 +9,10 @@ import * as fs from 'fs';
 import * as path from 'path';
 import * as path from 'path';
 import * as os from 'os';
 import * as os from 'os';
 import { CodeGraph } from '../src';
 import { CodeGraph } from '../src';
-import { extractFromSource } from '../src/extraction';
+import { extractFromSource, scanDirectory, shouldIncludeFile } from '../src/extraction';
 import { detectLanguage, isLanguageSupported, getSupportedLanguages } from '../src/extraction/grammars';
 import { detectLanguage, isLanguageSupported, getSupportedLanguages } from '../src/extraction/grammars';
+import { normalizePath } from '../src/utils';
+import { DEFAULT_CONFIG } from '../src/types';
 
 
 // Create a temporary directory for each test
 // Create a temporary directory for each test
 function createTempDir(): string {
 function createTempDir(): string {
@@ -1880,3 +1882,106 @@ export function multiply(a: number, b: number): number {
     cg.close();
     cg.close();
   });
   });
 });
 });
+
+describe('Path Normalization', () => {
+  it('should convert backslashes to forward slashes', () => {
+    expect(normalizePath('gui\\node_modules\\foo')).toBe('gui/node_modules/foo');
+    expect(normalizePath('src\\components\\Button.tsx')).toBe('src/components/Button.tsx');
+  });
+
+  it('should leave forward-slash paths unchanged', () => {
+    expect(normalizePath('src/components/Button.tsx')).toBe('src/components/Button.tsx');
+  });
+
+  it('should handle empty string', () => {
+    expect(normalizePath('')).toBe('');
+  });
+});
+
+describe('Directory Exclusion', () => {
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = createTempDir();
+  });
+
+  afterEach(() => {
+    cleanupTempDir(tempDir);
+  });
+
+  it('should exclude node_modules directories', () => {
+    // Create structure: src/index.ts + node_modules/pkg/index.js
+    const srcDir = path.join(tempDir, 'src');
+    const nmDir = path.join(tempDir, 'node_modules', 'pkg');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(nmDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(nmDir, 'index.js'), 'module.exports = {};');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('src/index.ts');
+    expect(files.every((f) => !f.includes('node_modules'))).toBe(true);
+  });
+
+  it('should exclude nested node_modules directories', () => {
+    // Create structure: packages/app/node_modules/pkg/index.js
+    const srcDir = path.join(tempDir, 'packages', 'app', 'src');
+    const nmDir = path.join(tempDir, 'packages', 'app', 'node_modules', 'pkg');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(nmDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(nmDir, 'index.js'), 'module.exports = {};');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('packages/app/src/index.ts');
+    expect(files.every((f) => !f.includes('node_modules'))).toBe(true);
+  });
+
+  it('should exclude .git directories', () => {
+    const srcDir = path.join(tempDir, 'src');
+    const gitDir = path.join(tempDir, '.git', 'objects');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(gitDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(gitDir, 'pack.ts'), 'export const y = 2;');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('src/index.ts');
+    expect(files.every((f) => !f.includes('.git'))).toBe(true);
+  });
+
+  it('should return forward-slash paths on all platforms', () => {
+    const srcDir = path.join(tempDir, 'src', 'components');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'Button.tsx'), 'export function Button() {}');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files.length).toBe(1);
+    expect(files[0]).toBe('src/components/Button.tsx');
+    expect(files[0]).not.toContain('\\');
+  });
+
+  it('should respect .codegraphignore marker', () => {
+    const srcDir = path.join(tempDir, 'src');
+    const vendorDir = path.join(tempDir, 'vendor');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(vendorDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(vendorDir, 'lib.ts'), 'export const y = 2;');
+    fs.writeFileSync(path.join(vendorDir, '.codegraphignore'), '');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('src/index.ts');
+    expect(files.every((f) => !f.includes('vendor'))).toBe(true);
+  });
+});

+ 4 - 0
src/config.ts

@@ -7,6 +7,7 @@
 import * as fs from 'fs';
 import * as fs from 'fs';
 import * as path from 'path';
 import * as path from 'path';
 import { CodeGraphConfig, DEFAULT_CONFIG, Language, NodeKind } from './types';
 import { CodeGraphConfig, DEFAULT_CONFIG, Language, NodeKind } from './types';
+import { normalizePath } from './utils';
 
 
 /**
 /**
  * Configuration filename
  * Configuration filename
@@ -240,6 +241,9 @@ export function addCustomPattern(
  * Check if a file path matches the include/exclude patterns
  * Check if a file path matches the include/exclude patterns
  */
  */
 export function shouldIncludeFile(filePath: string, config: CodeGraphConfig): boolean {
 export function shouldIncludeFile(filePath: string, config: CodeGraphConfig): boolean {
+  // Normalize to forward slashes so Windows backslash paths match glob patterns
+  filePath = normalizePath(filePath);
+
   // Simple glob matching (for now, just check if any pattern matches)
   // Simple glob matching (for now, just check if any pattern matches)
   // A full implementation would use a proper glob library
   // A full implementation would use a proper glob library
 
 

+ 40 - 2
src/extraction/index.ts

@@ -8,6 +8,7 @@ import * as fs from 'fs';
 import * as fsp from 'fs/promises';
 import * as fsp from 'fs/promises';
 import * as path from 'path';
 import * as path from 'path';
 import * as crypto from 'crypto';
 import * as crypto from 'crypto';
+import { execFileSync } from 'child_process';
 import {
 import {
   Language,
   Language,
   FileRecord,
   FileRecord,
@@ -20,7 +21,7 @@ import { extractFromSource } from './tree-sitter';
 import { detectLanguage, isLanguageSupported } from './grammars';
 import { detectLanguage, isLanguageSupported } from './grammars';
 import { logDebug } from '../errors';
 import { logDebug } from '../errors';
 import { captureException } from '../sentry';
 import { captureException } from '../sentry';
-import { validatePathWithinRoot } from '../utils';
+import { validatePathWithinRoot, normalizePath } from '../utils';
 
 
 /**
 /**
  * Progress callback for indexing operations
  * Progress callback for indexing operations
@@ -68,6 +69,9 @@ export function hashContent(content: string): string {
  * Check if a path matches any glob pattern (simplified)
  * Check if a path matches any glob pattern (simplified)
  */
  */
 function matchesGlob(filePath: string, pattern: string): boolean {
 function matchesGlob(filePath: string, pattern: string): boolean {
+  // Normalize to forward slashes so Windows backslash paths match glob patterns
+  filePath = normalizePath(filePath);
+
   // Convert glob to regex using placeholders to avoid conflicts
   // Convert glob to regex using placeholders to avoid conflicts
   let regexStr = pattern;
   let regexStr = pattern;
 
 
@@ -114,6 +118,31 @@ export function shouldIncludeFile(
   return false;
   return false;
 }
 }
 
 
+/**
+ * Get directories ignored by .gitignore using git ls-files.
+ * Returns a Set of normalized relative directory paths (forward slashes, no trailing slash).
+ * Gracefully returns empty Set on any failure.
+ */
+function getGitIgnoredDirectories(rootDir: string): Set<string> {
+  try {
+    const output = execFileSync(
+      'git',
+      ['ls-files', '-oi', '--exclude-standard', '--directory'],
+      { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] }
+    );
+    const dirs = new Set<string>();
+    for (const line of output.split('\n')) {
+      const trimmed = line.trim();
+      if (trimmed.endsWith('/')) {
+        dirs.add(normalizePath(trimmed.slice(0, -1)));
+      }
+    }
+    return dirs;
+  } catch {
+    return new Set<string>();
+  }
+}
+
 /**
 /**
  * Marker file name that indicates a directory (and all children) should be skipped
  * Marker file name that indicates a directory (and all children) should be skipped
  */
  */
@@ -130,6 +159,7 @@ export function scanDirectory(
   const files: string[] = [];
   const files: string[] = [];
   let count = 0;
   let count = 0;
   const visitedRealPaths = new Set<string>(); // Symlink cycle detection
   const visitedRealPaths = new Set<string>(); // Symlink cycle detection
+  const gitIgnoredDirs = getGitIgnoredDirectories(rootDir);
 
 
   function walk(dir: string): void {
   function walk(dir: string): void {
     // Symlink cycle detection: resolve real path and skip if already visited
     // Symlink cycle detection: resolve real path and skip if already visited
@@ -163,7 +193,7 @@ export function scanDirectory(
 
 
     for (const entry of entries) {
     for (const entry of entries) {
       const fullPath = path.join(dir, entry.name);
       const fullPath = path.join(dir, entry.name);
-      const relativePath = path.relative(rootDir, fullPath);
+      const relativePath = normalizePath(path.relative(rootDir, fullPath));
 
 
       // Follow symlinked directories, but skip symlinked files to non-project targets
       // Follow symlinked directories, but skip symlinked files to non-project targets
       if (entry.isSymbolicLink()) {
       if (entry.isSymbolicLink()) {
@@ -171,6 +201,10 @@ export function scanDirectory(
           const realTarget = fs.realpathSync(fullPath);
           const realTarget = fs.realpathSync(fullPath);
           const stat = fs.statSync(realTarget);
           const stat = fs.statSync(realTarget);
           if (stat.isDirectory()) {
           if (stat.isDirectory()) {
+            // Check gitignore first (fast O(1) lookup)
+            if (gitIgnoredDirs.has(relativePath)) {
+              continue;
+            }
             // Check exclusion, then recurse (cycle detection handles the rest)
             // Check exclusion, then recurse (cycle detection handles the rest)
             const dirPattern = relativePath + '/';
             const dirPattern = relativePath + '/';
             let excluded = false;
             let excluded = false;
@@ -199,6 +233,10 @@ export function scanDirectory(
       }
       }
 
 
       if (entry.isDirectory()) {
       if (entry.isDirectory()) {
+        // Check gitignore first (fast O(1) lookup)
+        if (gitIgnoredDirs.has(relativePath)) {
+          continue;
+        }
         // Check if directory should be excluded
         // Check if directory should be excluded
         const dirPattern = relativePath + '/';
         const dirPattern = relativePath + '/';
         let excluded = false;
         let excluded = false;

+ 8 - 0
src/utils.ts

@@ -74,6 +74,14 @@ export function clamp(value: number, min: number, max: number): number {
   return Math.max(min, Math.min(max, value));
   return Math.max(min, Math.min(max, value));
 }
 }
 
 
+/**
+ * Normalize a file path to use forward slashes.
+ * Fixes Windows backslash paths so glob matching works consistently.
+ */
+export function normalizePath(filePath: string): string {
+  return filePath.replace(/\\/g, '/');
+}
+
 /**
 /**
  * Cross-process file lock using lock files.
  * Cross-process file lock using lock files.
  * Prevents concurrent database writes from CLI, MCP server, and git hooks.
  * Prevents concurrent database writes from CLI, MCP server, and git hooks.