Переглянути джерело

Resolve merge conflict with main (gitignore + symlink rename)

Keeps the PR's visitedDirs rename and main's gitIgnoredDirs addition.
Colby McHenry 4 місяців тому
батько
коміт
ce220a573c
4 змінених файлів з 158 додано та 3 видалено
  1. 106 1
      __tests__/extraction.test.ts
  2. 4 0
      src/config.ts
  3. 40 2
      src/extraction/index.ts
  4. 8 0
      src/utils.ts

+ 106 - 1
__tests__/extraction.test.ts

@@ -9,8 +9,10 @@ import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
 import { CodeGraph } from '../src';
-import { extractFromSource } from '../src/extraction';
+import { extractFromSource, scanDirectory, shouldIncludeFile } from '../src/extraction';
 import { detectLanguage, isLanguageSupported, getSupportedLanguages } from '../src/extraction/grammars';
+import { normalizePath } from '../src/utils';
+import { DEFAULT_CONFIG } from '../src/types';
 
 // Create a temporary directory for each test
 function createTempDir(): string {
@@ -1981,3 +1983,106 @@ export function multiply(a: number, b: number): number {
     cg.close();
   });
 });
+
+describe('Path Normalization', () => {
+  it('should convert backslashes to forward slashes', () => {
+    expect(normalizePath('gui\\node_modules\\foo')).toBe('gui/node_modules/foo');
+    expect(normalizePath('src\\components\\Button.tsx')).toBe('src/components/Button.tsx');
+  });
+
+  it('should leave forward-slash paths unchanged', () => {
+    expect(normalizePath('src/components/Button.tsx')).toBe('src/components/Button.tsx');
+  });
+
+  it('should handle empty string', () => {
+    expect(normalizePath('')).toBe('');
+  });
+});
+
+describe('Directory Exclusion', () => {
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = createTempDir();
+  });
+
+  afterEach(() => {
+    cleanupTempDir(tempDir);
+  });
+
+  it('should exclude node_modules directories', () => {
+    // Create structure: src/index.ts + node_modules/pkg/index.js
+    const srcDir = path.join(tempDir, 'src');
+    const nmDir = path.join(tempDir, 'node_modules', 'pkg');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(nmDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(nmDir, 'index.js'), 'module.exports = {};');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('src/index.ts');
+    expect(files.every((f) => !f.includes('node_modules'))).toBe(true);
+  });
+
+  it('should exclude nested node_modules directories', () => {
+    // Create structure: packages/app/node_modules/pkg/index.js
+    const srcDir = path.join(tempDir, 'packages', 'app', 'src');
+    const nmDir = path.join(tempDir, 'packages', 'app', 'node_modules', 'pkg');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(nmDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(nmDir, 'index.js'), 'module.exports = {};');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('packages/app/src/index.ts');
+    expect(files.every((f) => !f.includes('node_modules'))).toBe(true);
+  });
+
+  it('should exclude .git directories', () => {
+    const srcDir = path.join(tempDir, 'src');
+    const gitDir = path.join(tempDir, '.git', 'objects');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(gitDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(gitDir, 'pack.ts'), 'export const y = 2;');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('src/index.ts');
+    expect(files.every((f) => !f.includes('.git'))).toBe(true);
+  });
+
+  it('should return forward-slash paths on all platforms', () => {
+    const srcDir = path.join(tempDir, 'src', 'components');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'Button.tsx'), 'export function Button() {}');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files.length).toBe(1);
+    expect(files[0]).toBe('src/components/Button.tsx');
+    expect(files[0]).not.toContain('\\');
+  });
+
+  it('should respect .codegraphignore marker', () => {
+    const srcDir = path.join(tempDir, 'src');
+    const vendorDir = path.join(tempDir, 'vendor');
+    fs.mkdirSync(srcDir, { recursive: true });
+    fs.mkdirSync(vendorDir, { recursive: true });
+    fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(vendorDir, 'lib.ts'), 'export const y = 2;');
+    fs.writeFileSync(path.join(vendorDir, '.codegraphignore'), '');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: tempDir };
+    const files = scanDirectory(tempDir, config);
+
+    expect(files).toContain('src/index.ts');
+    expect(files.every((f) => !f.includes('vendor'))).toBe(true);
+  });
+});

+ 4 - 0
src/config.ts

@@ -7,6 +7,7 @@
 import * as fs from 'fs';
 import * as path from 'path';
 import { CodeGraphConfig, DEFAULT_CONFIG, Language, NodeKind } from './types';
+import { normalizePath } from './utils';
 
 /**
  * Configuration filename
@@ -240,6 +241,9 @@ export function addCustomPattern(
  * Check if a file path matches the include/exclude patterns
  */
 export function shouldIncludeFile(filePath: string, config: CodeGraphConfig): boolean {
+  // Normalize to forward slashes so Windows backslash paths match glob patterns
+  filePath = normalizePath(filePath);
+
   // Simple glob matching (for now, just check if any pattern matches)
   // A full implementation would use a proper glob library
 

+ 40 - 2
src/extraction/index.ts

@@ -8,6 +8,7 @@ import * as fs from 'fs';
 import * as fsp from 'fs/promises';
 import * as path from 'path';
 import * as crypto from 'crypto';
+import { execFileSync } from 'child_process';
 import {
   Language,
   FileRecord,
@@ -20,7 +21,7 @@ import { extractFromSource } from './tree-sitter';
 import { detectLanguage, isLanguageSupported } from './grammars';
 import { logDebug, logWarn } from '../errors';
 import { captureException } from '../sentry';
-import { validatePathWithinRoot } from '../utils';
+import { validatePathWithinRoot, normalizePath } from '../utils';
 
 /**
  * Number of files to read in parallel during indexing.
@@ -74,6 +75,9 @@ export function hashContent(content: string): string {
  * Check if a path matches any glob pattern (simplified)
  */
 function matchesGlob(filePath: string, pattern: string): boolean {
+  // Normalize to forward slashes so Windows backslash paths match glob patterns
+  filePath = normalizePath(filePath);
+
   // Convert glob to regex using placeholders to avoid conflicts
   let regexStr = pattern;
 
@@ -120,6 +124,31 @@ export function shouldIncludeFile(
   return false;
 }
 
+/**
+ * Get directories ignored by .gitignore using git ls-files.
+ * Returns a Set of normalized relative directory paths (forward slashes, no trailing slash).
+ * Gracefully returns empty Set on any failure.
+ */
+function getGitIgnoredDirectories(rootDir: string): Set<string> {
+  try {
+    const output = execFileSync(
+      'git',
+      ['ls-files', '-oi', '--exclude-standard', '--directory'],
+      { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] }
+    );
+    const dirs = new Set<string>();
+    for (const line of output.split('\n')) {
+      const trimmed = line.trim();
+      if (trimmed.endsWith('/')) {
+        dirs.add(normalizePath(trimmed.slice(0, -1)));
+      }
+    }
+    return dirs;
+  } catch {
+    return new Set<string>();
+  }
+}
+
 /**
  * Marker file name that indicates a directory (and all children) should be skipped
  */
@@ -137,6 +166,7 @@ export function scanDirectory(
   let count = 0;
   // Track visited real paths to detect symlink cycles
   const visitedDirs = new Set<string>();
+  const gitIgnoredDirs = getGitIgnoredDirectories(rootDir);
 
   function walk(dir: string): void {
     // Resolve real path to detect symlink cycles
@@ -172,7 +202,7 @@ export function scanDirectory(
 
     for (const entry of entries) {
       const fullPath = path.join(dir, entry.name);
-      const relativePath = path.relative(rootDir, fullPath);
+      const relativePath = normalizePath(path.relative(rootDir, fullPath));
 
       // Follow symlinked directories, but skip symlinked files to non-project targets
       if (entry.isSymbolicLink()) {
@@ -180,6 +210,10 @@ export function scanDirectory(
           const realTarget = fs.realpathSync(fullPath);
           const stat = fs.statSync(realTarget);
           if (stat.isDirectory()) {
+            // Check gitignore first (fast O(1) lookup)
+            if (gitIgnoredDirs.has(relativePath)) {
+              continue;
+            }
             // Check exclusion, then recurse (cycle detection handles the rest)
             const dirPattern = relativePath + '/';
             let excluded = false;
@@ -208,6 +242,10 @@ export function scanDirectory(
       }
 
       if (entry.isDirectory()) {
+        // Check gitignore first (fast O(1) lookup)
+        if (gitIgnoredDirs.has(relativePath)) {
+          continue;
+        }
         // Check if directory should be excluded
         const dirPattern = relativePath + '/';
         let excluded = false;

+ 8 - 0
src/utils.ts

@@ -74,6 +74,14 @@ export function clamp(value: number, min: number, max: number): number {
   return Math.max(min, Math.min(max, value));
 }
 
+/**
+ * Normalize a file path to use forward slashes.
+ * Fixes Windows backslash paths so glob matching works consistently.
+ */
+export function normalizePath(filePath: string): string {
+  return filePath.replace(/\\/g, '/');
+}
+
 /**
  * Cross-process file lock using lock files.
  * Prevents concurrent database writes from CLI, MCP server, and git hooks.