Просмотр исходного кода

fix(extraction): recurse into git submodules when listing files (#150)

`git ls-files -co --exclude-standard` only sees the submodule pointer in
the main repo's index, so projects using submodules indexed 0 files. Now
the tracked list runs with `-c --recurse-submodules` so submodule
contents are included; untracked files are gathered with a separate
`-o --exclude-standard` call (the two flags can't be combined — git only
supports --recurse-submodules with --cached/--stage).

Fixes #147.

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Colby Mchenry 1 месяц назад
Родитель
Сommit
1cbd5a8123
2 измененных файлов с 71 добавлено и 7 удалено
  1. 52 0
      __tests__/extraction.test.ts
  2. 19 7
      src/extraction/index.ts

+ 52 - 0
__tests__/extraction.test.ts

@@ -3080,6 +3080,58 @@ describe('Directory Exclusion', () => {
   });
 });
 
+describe('Git Submodules', () => {
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = createTempDir();
+  });
+
+  afterEach(() => {
+    cleanupTempDir(tempDir);
+  });
+
+  it('should index files inside git submodules (issue #147)', async () => {
+    const { execFileSync } = await import('child_process');
+    const git = (cwd: string, ...args: string[]) =>
+      execFileSync('git', args, { cwd, stdio: 'pipe' });
+
+    // Build a separate "library" repo to use as a submodule source.
+    const libDir = path.join(tempDir, '_lib');
+    fs.mkdirSync(libDir, { recursive: true });
+    git(libDir, 'init', '-q');
+    git(libDir, 'config', 'user.email', 'test@test.com');
+    git(libDir, 'config', 'user.name', 'Test');
+    fs.writeFileSync(path.join(libDir, 'lib.ts'), 'export const fromSubmodule = 1;');
+    git(libDir, 'add', '-A');
+    git(libDir, 'commit', '-q', '-m', 'lib init');
+
+    // Build the main repo and add the lib repo as a submodule.
+    const mainDir = path.join(tempDir, 'main');
+    fs.mkdirSync(mainDir, { recursive: true });
+    git(mainDir, 'init', '-q');
+    git(mainDir, 'config', 'user.email', 'test@test.com');
+    git(mainDir, 'config', 'user.name', 'Test');
+    fs.writeFileSync(path.join(mainDir, 'app.ts'), 'export const app = 1;');
+    git(mainDir, 'add', '-A');
+    git(mainDir, 'commit', '-q', '-m', 'app init');
+    // protocol.file.allow=always is required to add a local-path submodule on
+    // recent git versions (CVE-2022-39253 mitigation).
+    execFileSync(
+      'git',
+      ['-c', 'protocol.file.allow=always', 'submodule', 'add', '-q', libDir, 'libs/lib'],
+      { cwd: mainDir, stdio: 'pipe' }
+    );
+    git(mainDir, 'commit', '-q', '-m', 'add submodule');
+
+    const config = { ...DEFAULT_CONFIG, rootDir: mainDir };
+    const files = scanDirectory(mainDir, config);
+
+    expect(files).toContain('app.ts');
+    expect(files).toContain('libs/lib/lib.ts');
+  });
+});
+
 // =============================================================================
 // Scala
 // =============================================================================

+ 19 - 7
src/extraction/index.ts

@@ -156,19 +156,31 @@ function getGitVisibleFiles(rootDir: string): Set<string> | null {
       }
     }
 
-    // -c = cached (tracked), -o = others (untracked), --exclude-standard = respect .gitignore
-    const output = execFileSync(
-      'git',
-      ['ls-files', '-co', '--exclude-standard'],
-      { cwd: rootDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] }
-    );
     const files = new Set<string>();
-    for (const line of output.split('\n')) {
+    const gitOpts = { cwd: rootDir, encoding: 'utf-8' as const, timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] as ['pipe', 'pipe', 'pipe'] };
+
+    // Tracked files. --recurse-submodules pulls in files from active submodules,
+    // which the main repo's index would otherwise represent only as a commit pointer.
+    // Without this, monorepos using submodules index 0 files. (See issue #147.)
+    // Note: --recurse-submodules only supports -c/--cached and --stage modes — it
+    // can't be combined with -o, so untracked files are gathered separately below.
+    const tracked = execFileSync('git', ['ls-files', '-c', '--recurse-submodules'], gitOpts);
+    for (const line of tracked.split('\n')) {
       const trimmed = line.trim();
       if (trimmed) {
         files.add(normalizePath(trimmed));
       }
     }
+
+    // Untracked files in the main repo (submodules manage their own untracked state).
+    const untracked = execFileSync('git', ['ls-files', '-o', '--exclude-standard'], gitOpts);
+    for (const line of untracked.split('\n')) {
+      const trimmed = line.trim();
+      if (trimmed) {
+        files.add(normalizePath(trimmed));
+      }
+    }
+
     return files;
   } catch {
     return null;