multi-repo-workspace.test.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. /**
  2. * Multi-repo workspaces (#514): a directory holding several independent git
  3. * repositories must index as a whole.
  4. *
  5. * Two enumeration paths are exercised:
  6. * - git path: the workspace root is itself a git repo (a "super-repo") whose
  7. * `.gitignore` hides the child repos to keep `git status` quiet. git never
  8. * lists ignored dirs, so the embedded repos were invisible (0 files). They
  9. * are now discovered via the ignored-directories listing and enumerated by
  10. * their own `git ls-files`. (#193 covered the *untracked* embedded case.)
  11. * - sync path: `git status` in the parent says nothing about embedded repos;
  12. * change detection now recurses into them.
  13. *
  14. * The non-git-parent case (plain folder of repos) already worked via the
  15. * filesystem walk — locked in here so it stays that way.
  16. */
  17. import { describe, it, expect, beforeEach, afterEach } from 'vitest';
  18. import * as fs from 'fs';
  19. import * as path from 'path';
  20. import * as os from 'os';
  21. import { execFileSync } from 'child_process';
  22. import CodeGraph from '../src/index';
  23. import { scanDirectory, buildScopeIgnore, discoverEmbeddedRepoRoots } from '../src/extraction';
  24. function git(cwd: string, ...args: string[]): void {
  25. execFileSync('git', args, { cwd, stdio: ['ignore', 'ignore', 'ignore'] });
  26. }
  27. /** git init + commit everything currently in `dir` as one repo. */
  28. function makeRepo(dir: string): void {
  29. git(dir, 'init', '-q');
  30. git(dir, 'add', '-A');
  31. git(dir, '-c', 'user.email=t@t', '-c', 'user.name=t', 'commit', '-qm', 'init', '--allow-empty');
  32. }
  33. function write(file: string, content: string): void {
  34. fs.mkdirSync(path.dirname(file), { recursive: true });
  35. fs.writeFileSync(file, content);
  36. }
  37. describe('multi-repo workspaces (#514)', () => {
  38. let ws: string;
  39. beforeEach(() => {
  40. ws = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-multirepo-'));
  41. });
  42. afterEach(() => {
  43. fs.rmSync(ws, { recursive: true, force: true });
  44. });
  45. it('indexes embedded repos hidden by the super-repo .gitignore', () => {
  46. write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() { return 1; }\n');
  47. write(path.join(ws, 'packages/proj-b/src/billing.ts'), 'export function charge() { return 2; }\n');
  48. makeRepo(path.join(ws, 'packages/proj-a'));
  49. makeRepo(path.join(ws, 'packages/proj-b'));
  50. write(path.join(ws, '.gitignore'), '/packages/\n');
  51. write(path.join(ws, 'tools.ts'), 'export function tool() { return 0; }\n');
  52. makeRepo(ws);
  53. const files = scanDirectory(ws);
  54. expect(files).toContain('packages/proj-a/src/auth.ts');
  55. expect(files).toContain('packages/proj-b/src/billing.ts');
  56. expect(files).toContain('tools.ts'); // the parent's own tracked code still indexes
  57. });
  58. it('keeps respecting the parent .gitignore for the parent own (non-repo) dirs', () => {
  59. write(path.join(ws, 'scratch/junk.ts'), 'export function junk() { return 9; }\n');
  60. write(path.join(ws, 'src/app.ts'), 'export function app() { return 1; }\n');
  61. write(path.join(ws, '.gitignore'), '/scratch/\n');
  62. makeRepo(ws);
  63. const files = scanDirectory(ws);
  64. expect(files).toContain('src/app.ts');
  65. // scratch/ is gitignored and contains NO embedded repo — stays excluded.
  66. expect(files.some((f) => f.startsWith('scratch/'))).toBe(false);
  67. });
  68. it('never descends into git repos inside node_modules (npm git-dependencies)', () => {
  69. // Embedded repo first (clean), node_modules dropped in afterwards —
  70. // matching reality, where node_modules is never committed.
  71. write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() {}\n');
  72. makeRepo(path.join(ws, 'packages/proj-a'));
  73. write(path.join(ws, 'packages/proj-a/node_modules/inner/src/evil2.ts'), 'export function evil2() {}\n');
  74. makeRepo(path.join(ws, 'packages/proj-a/node_modules/inner')); // npm git-dep: has commits
  75. // Workspace-level git-dep too.
  76. write(path.join(ws, 'node_modules/git-dep/src/evil.ts'), 'export function evil() {}\n');
  77. makeRepo(path.join(ws, 'node_modules/git-dep'));
  78. write(path.join(ws, '.gitignore'), '/packages/\nnode_modules\n');
  79. makeRepo(ws);
  80. const files = scanDirectory(ws);
  81. expect(files).toContain('packages/proj-a/src/auth.ts');
  82. expect(files.some((f) => f.includes('node_modules'))).toBe(false);
  83. });
  84. it('still indexes UNTRACKED embedded repos (#193 regression)', () => {
  85. write(path.join(ws, 'vendor-src/lib/src/util.ts'), 'export function util() {}\n');
  86. makeRepo(path.join(ws, 'vendor-src/lib'));
  87. write(path.join(ws, 'main.ts'), 'export function main() {}\n');
  88. makeRepo(ws); // vendor-src/ is untracked (not ignored) — committed ws has only main.ts + nothing else
  89. // NOTE: makeRepo committed vendor-src too via add -A… recreate untracked state:
  90. git(ws, 'rm', '-r', '--cached', '-q', 'vendor-src');
  91. git(ws, '-c', 'user.email=t@t', '-c', 'user.name=t', 'commit', '-qm', 'untrack');
  92. const files = scanDirectory(ws);
  93. expect(files).toContain('vendor-src/lib/src/util.ts');
  94. expect(files).toContain('main.ts');
  95. });
  96. it('skips nested git worktrees instead of indexing them as duplicate embedded repos (#848)', () => {
  97. // Claude Code (and others) create worktrees under a gitignored path like
  98. // `.claude/worktrees/<name>/`. A worktree's `.git` is a FILE pointing into
  99. // the host repo's own `.git/worktrees/`, so it is the SAME repo already
  100. // indexed — sweeping it in as an embedded repo multiplies the whole graph.
  101. // A genuine embedded clone (a `.git` *directory*) must still be indexed.
  102. write(path.join(ws, 'src/app.ts'), 'export function app() { return 1; }\n');
  103. write(path.join(ws, '.gitignore'), '.claude/\nvendored/\n');
  104. makeRepo(ws);
  105. // A real linked worktree under the gitignored .claude/worktrees/.
  106. git(ws, 'worktree', 'add', '-q', '.claude/worktrees/feature', '-b', 'feature');
  107. // A genuine embedded clone, also gitignored — must STAY indexed (#514).
  108. write(path.join(ws, 'vendored/lib.ts'), 'export function vendoredFn() { return 9; }\n');
  109. makeRepo(path.join(ws, 'vendored'));
  110. const files = scanDirectory(ws);
  111. expect(files).toContain('src/app.ts');
  112. // The worktree is a duplicate working view — never indexed.
  113. expect(files.some((f) => f.includes('.claude/worktrees'))).toBe(false);
  114. // The genuine embedded clone is still indexed (#514/#622 preserved).
  115. expect(files).toContain('vendored/lib.ts');
  116. });
  117. it('skips a submodule worktree instead of indexing it as a duplicate (#945)', () => {
  118. // A worktree OF A SUBMODULE points its `.git` into
  119. // `.git/modules/<module>/worktrees/<name>` — not the top-level repo's
  120. // `.git/worktrees/`. The detector used to miss that extra `modules/<name>`
  121. // segment, so the worktree fell through to "embedded" and every symbol it
  122. // shared with the real submodule checkout got indexed twice. The submodule's
  123. // own checkout (`.git/modules/<module>`, no `worktrees/`) is distinct code
  124. // and must stay indexed (#514).
  125. const upstream = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-945-up-'));
  126. try {
  127. // The repo that becomes the submodule's origin.
  128. write(path.join(upstream, 'lib.ts'), 'export function libFn() { return 1; }\n');
  129. makeRepo(upstream);
  130. write(path.join(ws, 'src/app.ts'), 'export function app() { return 1; }\n');
  131. write(path.join(ws, '.gitignore'), '.worktrees/\n');
  132. git(ws, 'init', '-q');
  133. // protocol.file.allow=always: modern git refuses a local-path submodule otherwise.
  134. git(ws, '-c', 'protocol.file.allow=always', 'submodule', 'add', '-q', upstream, 'common');
  135. git(ws, '-c', 'user.email=t@t', '-c', 'user.name=t', 'commit', '-qm', 'add submodule');
  136. // A worktree of the submodule, under the gitignored .worktrees/ — its `.git`
  137. // points into `.git/modules/common/worktrees/<name>`.
  138. git(path.join(ws, 'common'), 'worktree', 'add', '-q', '../.worktrees/common-feature', '-b', 'feature');
  139. const files = scanDirectory(ws);
  140. expect(files).toContain('src/app.ts');
  141. // The real submodule checkout is distinct code — still indexed (#514).
  142. expect(files).toContain('common/lib.ts');
  143. // The submodule worktree is a duplicate working view — never indexed (#945).
  144. expect(files.some((f) => f.includes('.worktrees'))).toBe(false);
  145. } finally {
  146. fs.rmSync(upstream, { recursive: true, force: true });
  147. }
  148. });
  149. it('non-git workspace: walks children and respects each child own .gitignore', () => {
  150. write(path.join(ws, 'proj-a/src/auth.ts'), 'export function login() {}\n');
  151. write(path.join(ws, 'proj-a/build/out.ts'), 'export function generated() {}\n');
  152. write(path.join(ws, 'proj-a/.gitignore'), 'build/\n');
  153. write(path.join(ws, 'proj-b/src/billing.ts'), 'export function charge() {}\n');
  154. makeRepo(path.join(ws, 'proj-a'));
  155. makeRepo(path.join(ws, 'proj-b'));
  156. // ws itself is NOT a git repo.
  157. const files = scanDirectory(ws);
  158. expect(files).toContain('proj-a/src/auth.ts');
  159. expect(files).toContain('proj-b/src/billing.ts');
  160. expect(files.some((f) => f.includes('build/'))).toBe(false);
  161. });
  162. it('does not search beyond the embedded-repo depth cap', () => {
  163. // Repo buried 5 levels under the ignored dir — past EMBEDDED_REPO_SEARCH_DEPTH (4).
  164. const deep = path.join(ws, 'pkgs/a/b/c/d/e');
  165. write(path.join(deep, 'src/deep.ts'), 'export function deep() {}\n');
  166. makeRepo(deep);
  167. write(path.join(ws, 'main.ts'), 'export function main() {}\n');
  168. write(path.join(ws, '.gitignore'), '/pkgs/\n');
  169. makeRepo(ws);
  170. const files = scanDirectory(ws);
  171. expect(files).toContain('main.ts');
  172. expect(files.some((f) => f.includes('deep.ts'))).toBe(false);
  173. });
  174. it('discovers embedded roots (ignored + untracked kinds); none for non-git roots', () => {
  175. write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() {}\n');
  176. makeRepo(path.join(ws, 'packages/proj-a'));
  177. write(path.join(ws, 'vendor-src/lib/util.ts'), 'export function util() {}\n');
  178. makeRepo(path.join(ws, 'vendor-src/lib'));
  179. write(path.join(ws, '.gitignore'), '/packages/\n'); // vendor-src stays untracked
  180. makeRepo(ws);
  181. git(ws, 'rm', '-r', '--cached', '-q', 'vendor-src');
  182. git(ws, '-c', 'user.email=t@t', '-c', 'user.name=t', 'commit', '-qm', 'untrack');
  183. const roots = discoverEmbeddedRepoRoots(ws);
  184. expect(roots).toContain('packages/proj-a/');
  185. expect(roots).toContain('vendor-src/lib/');
  186. const plain = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-nongit-'));
  187. try {
  188. expect(discoverEmbeddedRepoRoots(plain)).toEqual([]);
  189. } finally {
  190. fs.rmSync(plain, { recursive: true, force: true });
  191. }
  192. });
  193. it('ScopeIgnore: embedded files use the child rules; the watcher can descend to them', () => {
  194. write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() {}\n');
  195. write(path.join(ws, 'packages/proj-a/.gitignore'), 'build/\n');
  196. makeRepo(path.join(ws, 'packages/proj-a'));
  197. write(path.join(ws, '.gitignore'), '/packages/\n');
  198. makeRepo(ws);
  199. const scope = buildScopeIgnore(ws);
  200. // Inside the embedded repo: the CHILD's rules decide.
  201. expect(scope.ignores('packages/proj-a/src/auth.ts')).toBe(false);
  202. expect(scope.ignores('packages/proj-a/build/out.ts')).toBe(true);
  203. // Under the ignored dir but NOT in any embedded repo: parent rules apply.
  204. expect(scope.ignores('packages/stray.ts')).toBe(true);
  205. // Directory form: ancestors of an embedded root are never pruned —
  206. // the Linux per-directory watcher must descend through `packages/`.
  207. expect(scope.ignores('packages/')).toBe(false);
  208. // Ordinary paths: unchanged semantics.
  209. expect(scope.ignores('node_modules/dep/index.ts')).toBe(true);
  210. expect(scope.ignores('src/app.ts')).toBe(false);
  211. });
  212. it('buildScopeIgnore: indexed root is itself a gitignored subdir of an enclosing repo (#936)', () => {
  213. // `child/` is NOT its own repo, so `git` resolves the ENCLOSING repo from
  214. // inside it — and `git ls-files --directory`, whose cwd is then a wholly
  215. // ignored directory, emits the literal `./` ("this entire directory").
  216. // That sentinel used to reach the `ignore` matcher and throw
  217. // ("path should be a `path.relative()`d string, but got "./""), aborting
  218. // buildScopeIgnore → the MCP daemon's watcher never started and auto-sync
  219. // silently stalled until a manual `codegraph sync`.
  220. write(path.join(ws, 'child/src/a.ts'), 'export const x = 1;\n');
  221. write(path.join(ws, '.gitignore'), '/child/\n');
  222. makeRepo(ws);
  223. const child = path.join(ws, 'child');
  224. // The crux: building scope for the ignored subdir must not throw.
  225. const scope = buildScopeIgnore(child);
  226. // The subdir's own source is watchable/indexable, not ignored.
  227. expect(scope.ignores('src/a.ts')).toBe(false);
  228. // And the `./` self entry must not be mistaken for a nested embedded repo.
  229. expect(discoverEmbeddedRepoRoots(child)).toEqual([]);
  230. });
  231. it('sync picks up a change inside a gitignored embedded repo', async () => {
  232. write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() { return 1; }\n');
  233. makeRepo(path.join(ws, 'packages/proj-a'));
  234. write(path.join(ws, '.gitignore'), '/packages/\n');
  235. makeRepo(ws);
  236. const cg = CodeGraph.initSync(ws, { config: { include: ['**/*.ts'], exclude: [] } });
  237. try {
  238. await cg.indexAll();
  239. expect(cg.searchNodes('login', { limit: 5 }).length).toBeGreaterThan(0);
  240. // Change inside the embedded repo — invisible to the parent's `git status`.
  241. write(path.join(ws, 'packages/proj-a/src/auth.ts'),
  242. 'export function login() { return 1; }\nexport function logout() { return 0; }\n');
  243. await cg.sync();
  244. expect(cg.searchNodes('logout', { limit: 5 }).length).toBeGreaterThan(0);
  245. } finally {
  246. cg.destroy();
  247. }
  248. });
  249. });