multi-repo-workspace.test.ts 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. /**
  2. * Multi-repo workspaces (#514) — and the `.gitignore`-respect default (#970, #976).
  3. *
  4. * A directory holding several independent git repositories can be indexed as a
  5. * whole, but ONLY when the project opts the gitignored directories in. The
  6. * default is the universal one: `.gitignore` excludes. Walking into a gitignored
  7. * directory to index embedded repos there is OPT-IN via `codegraph.json`
  8. * `includeIgnored` (#622, #699) — without it a gitignored `node_modules`-style
  9. * reference/data dir full of nested clones is left untouched, instead of blowing
  10. * the graph up or stalling the scan (#970, #976).
  11. *
  12. * Two enumeration paths are exercised under opt-in:
  13. * - git path: the workspace root is itself a git repo (a "super-repo") whose
  14. * `.gitignore` hides the child repos. They are discovered via the ignored-
  15. * directories listing and enumerated by their own `git ls-files`. (#193
  16. * covered the *untracked* embedded case, which stays on by default.)
  17. * - sync path: `git status` in the parent says nothing about embedded repos;
  18. * change detection recurses into the opted-in ones.
  19. *
  20. * The non-git-parent case (plain folder of repos) works via the filesystem walk
  21. * regardless — locked in here so it stays that way.
  22. */
  23. import { describe, it, expect, beforeEach, afterEach } from 'vitest';
  24. import * as fs from 'fs';
  25. import * as path from 'path';
  26. import * as os from 'os';
  27. import { execFileSync } from 'child_process';
  28. import CodeGraph from '../src/index';
  29. import { scanDirectory, buildScopeIgnore, discoverEmbeddedRepoRoots } from '../src/extraction';
  30. import { clearProjectConfigCache } from '../src/project-config';
  31. function git(cwd: string, ...args: string[]): void {
  32. execFileSync('git', args, { cwd, stdio: ['ignore', 'ignore', 'ignore'] });
  33. }
  34. /** git init + commit everything currently in `dir` as one repo. */
  35. function makeRepo(dir: string): void {
  36. git(dir, 'init', '-q');
  37. git(dir, 'add', '-A');
  38. git(dir, '-c', 'user.email=t@t', '-c', 'user.name=t', 'commit', '-qm', 'init', '--allow-empty');
  39. }
  40. function write(file: string, content: string): void {
  41. fs.mkdirSync(path.dirname(file), { recursive: true });
  42. fs.writeFileSync(file, content);
  43. }
  44. describe('multi-repo workspaces (#514) + .gitignore-respect default (#970, #976)', () => {
  45. let ws: string;
  46. beforeEach(() => {
  47. ws = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-multirepo-'));
  48. clearProjectConfigCache();
  49. });
  50. afterEach(() => {
  51. clearProjectConfigCache();
  52. fs.rmSync(ws, { recursive: true, force: true });
  53. });
  54. /** Drop a `codegraph.json` at the workspace root. */
  55. const writeConfig = (obj: unknown) =>
  56. fs.writeFileSync(path.join(ws, 'codegraph.json'),
  57. typeof obj === 'string' ? obj : JSON.stringify(obj));
  58. describe('default: .gitignore is respected (#970, #976)', () => {
  59. it('does NOT index embedded repos inside a gitignored dir without opt-in', () => {
  60. // The exact #976 layout: nested clones under a directory the user
  61. // explicitly gitignored. They must stay out of the index — no graph blowup.
  62. write(path.join(ws, '.repos/lib-a/src/a.ts'), 'export function fromLibA() { return 1; }\n');
  63. write(path.join(ws, '.repos/lib-b/src/b.ts'), 'export function fromLibB() { return 2; }\n');
  64. makeRepo(path.join(ws, '.repos/lib-a'));
  65. makeRepo(path.join(ws, '.repos/lib-b'));
  66. write(path.join(ws, '.gitignore'), '/.repos/\n');
  67. write(path.join(ws, 'app.ts'), 'export function app() { return 0; }\n');
  68. makeRepo(ws);
  69. const files = scanDirectory(ws);
  70. expect(files).toContain('app.ts'); // the project's own code still indexes
  71. expect(files.some((f) => f.startsWith('.repos/'))).toBe(false);
  72. });
  73. it('does NOT discover gitignored embedded roots without opt-in', () => {
  74. write(path.join(ws, 'resource/ref/src/x.ts'), 'export const x = 1;\n');
  75. makeRepo(path.join(ws, 'resource/ref'));
  76. write(path.join(ws, '.gitignore'), '/resource/\n');
  77. makeRepo(ws);
  78. // The #970 perf fix: a gitignored dir of reference repos is never walked.
  79. expect(discoverEmbeddedRepoRoots(ws)).toEqual([]);
  80. });
  81. it('ScopeIgnore: a gitignored dir is fully pruned without opt-in', () => {
  82. write(path.join(ws, 'resource/ref/src/x.ts'), 'export const x = 1;\n');
  83. makeRepo(path.join(ws, 'resource/ref'));
  84. write(path.join(ws, '.gitignore'), '/resource/\n');
  85. makeRepo(ws);
  86. const scope = buildScopeIgnore(ws);
  87. // Both the dir and its contents are ignored — the watcher won't descend.
  88. expect(scope.ignores('resource/')).toBe(true);
  89. expect(scope.ignores('resource/ref/src/x.ts')).toBe(true);
  90. });
  91. });
  92. describe('opt-in: codegraph.json includeIgnored re-includes a gitignored dir (#622, #699)', () => {
  93. it('indexes embedded repos hidden by the super-repo .gitignore', () => {
  94. write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() { return 1; }\n');
  95. write(path.join(ws, 'packages/proj-b/src/billing.ts'), 'export function charge() { return 2; }\n');
  96. makeRepo(path.join(ws, 'packages/proj-a'));
  97. makeRepo(path.join(ws, 'packages/proj-b'));
  98. write(path.join(ws, '.gitignore'), '/packages/\n');
  99. write(path.join(ws, 'tools.ts'), 'export function tool() { return 0; }\n');
  100. writeConfig({ includeIgnored: ['packages/'] });
  101. makeRepo(ws);
  102. const files = scanDirectory(ws);
  103. expect(files).toContain('packages/proj-a/src/auth.ts');
  104. expect(files).toContain('packages/proj-b/src/billing.ts');
  105. expect(files).toContain('tools.ts'); // the parent's own tracked code still indexes
  106. });
  107. it('only re-includes the opted-in dir, not every gitignored dir', () => {
  108. // `packages/` is opted in; `scratch/` (also holding a repo) is NOT.
  109. write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() {}\n');
  110. makeRepo(path.join(ws, 'packages/proj-a'));
  111. write(path.join(ws, 'scratch/throwaway/src/junk.ts'), 'export function junk() {}\n');
  112. makeRepo(path.join(ws, 'scratch/throwaway'));
  113. write(path.join(ws, '.gitignore'), '/packages/\n/scratch/\n');
  114. writeConfig({ includeIgnored: ['packages/'] });
  115. makeRepo(ws);
  116. const files = scanDirectory(ws);
  117. expect(files).toContain('packages/proj-a/src/auth.ts');
  118. expect(files.some((f) => f.startsWith('scratch/'))).toBe(false);
  119. });
  120. it('discovers the opted-in ignored root alongside untracked roots', () => {
  121. write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() {}\n');
  122. makeRepo(path.join(ws, 'packages/proj-a'));
  123. write(path.join(ws, 'vendor-src/lib/util.ts'), 'export function util() {}\n');
  124. makeRepo(path.join(ws, 'vendor-src/lib'));
  125. write(path.join(ws, '.gitignore'), '/packages/\n'); // vendor-src stays untracked
  126. writeConfig({ includeIgnored: ['packages/'] });
  127. makeRepo(ws);
  128. git(ws, 'rm', '-r', '--cached', '-q', 'vendor-src');
  129. git(ws, '-c', 'user.email=t@t', '-c', 'user.name=t', 'commit', '-qm', 'untrack');
  130. const roots = discoverEmbeddedRepoRoots(ws);
  131. expect(roots).toContain('packages/proj-a/'); // opted-in ignored kind
  132. expect(roots).toContain('vendor-src/lib/'); // untracked kind (always on)
  133. });
  134. it('ScopeIgnore: opted-in embedded files use the child rules; the watcher can descend', () => {
  135. write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() {}\n');
  136. write(path.join(ws, 'packages/proj-a/.gitignore'), 'build/\n');
  137. makeRepo(path.join(ws, 'packages/proj-a'));
  138. write(path.join(ws, '.gitignore'), '/packages/\n');
  139. writeConfig({ includeIgnored: ['packages/'] });
  140. makeRepo(ws);
  141. const scope = buildScopeIgnore(ws);
  142. // Inside the opted-in embedded repo: the CHILD's rules decide.
  143. expect(scope.ignores('packages/proj-a/src/auth.ts')).toBe(false);
  144. expect(scope.ignores('packages/proj-a/build/out.ts')).toBe(true);
  145. // Under the ignored dir but NOT in any embedded repo: parent rules apply.
  146. expect(scope.ignores('packages/stray.ts')).toBe(true);
  147. // Directory form: ancestors of an embedded root are never pruned —
  148. // the Linux per-directory watcher must descend through `packages/`.
  149. expect(scope.ignores('packages/')).toBe(false);
  150. // Ordinary paths: unchanged semantics.
  151. expect(scope.ignores('node_modules/dep/index.ts')).toBe(true);
  152. expect(scope.ignores('src/app.ts')).toBe(false);
  153. });
  154. it('sync picks up a change inside an opted-in gitignored embedded repo', async () => {
  155. write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() { return 1; }\n');
  156. makeRepo(path.join(ws, 'packages/proj-a'));
  157. write(path.join(ws, '.gitignore'), '/packages/\n');
  158. writeConfig({ includeIgnored: ['packages/'] });
  159. makeRepo(ws);
  160. const cg = CodeGraph.initSync(ws, { config: { include: ['**/*.ts'], exclude: [] } });
  161. try {
  162. await cg.indexAll();
  163. expect(cg.searchNodes('login', { limit: 5 }).length).toBeGreaterThan(0);
  164. // Change inside the embedded repo — invisible to the parent's `git status`.
  165. write(path.join(ws, 'packages/proj-a/src/auth.ts'),
  166. 'export function login() { return 1; }\nexport function logout() { return 0; }\n');
  167. await cg.sync();
  168. expect(cg.searchNodes('logout', { limit: 5 }).length).toBeGreaterThan(0);
  169. } finally {
  170. cg.destroy();
  171. }
  172. });
  173. });
  174. describe('discovery/classifier machinery (exercised under opt-in)', () => {
  175. it('keeps respecting the parent .gitignore for the parent own (non-repo) dirs', () => {
  176. write(path.join(ws, 'scratch/junk.ts'), 'export function junk() { return 9; }\n');
  177. write(path.join(ws, 'src/app.ts'), 'export function app() { return 1; }\n');
  178. write(path.join(ws, '.gitignore'), '/scratch/\n');
  179. makeRepo(ws);
  180. const files = scanDirectory(ws);
  181. expect(files).toContain('src/app.ts');
  182. // scratch/ is gitignored and contains NO embedded repo — stays excluded.
  183. expect(files.some((f) => f.startsWith('scratch/'))).toBe(false);
  184. });
  185. it('never descends into git repos inside node_modules (npm git-dependencies)', () => {
  186. // Embedded repo first (clean), node_modules dropped in afterwards —
  187. // matching reality, where node_modules is never committed.
  188. write(path.join(ws, 'packages/proj-a/src/auth.ts'), 'export function login() {}\n');
  189. makeRepo(path.join(ws, 'packages/proj-a'));
  190. write(path.join(ws, 'packages/proj-a/node_modules/inner/src/evil2.ts'), 'export function evil2() {}\n');
  191. makeRepo(path.join(ws, 'packages/proj-a/node_modules/inner')); // npm git-dep: has commits
  192. // Workspace-level git-dep too.
  193. write(path.join(ws, 'node_modules/git-dep/src/evil.ts'), 'export function evil() {}\n');
  194. makeRepo(path.join(ws, 'node_modules/git-dep'));
  195. write(path.join(ws, '.gitignore'), '/packages/\nnode_modules\n');
  196. writeConfig({ includeIgnored: ['packages/'] });
  197. makeRepo(ws);
  198. const files = scanDirectory(ws);
  199. expect(files).toContain('packages/proj-a/src/auth.ts');
  200. // node_modules is a built-in default exclude — never re-included, even though
  201. // `packages/` is opted in and node_modules is gitignored.
  202. expect(files.some((f) => f.includes('node_modules'))).toBe(false);
  203. });
  204. it('still indexes UNTRACKED embedded repos by default (#193 regression)', () => {
  205. write(path.join(ws, 'vendor-src/lib/src/util.ts'), 'export function util() {}\n');
  206. makeRepo(path.join(ws, 'vendor-src/lib'));
  207. write(path.join(ws, 'main.ts'), 'export function main() {}\n');
  208. makeRepo(ws); // vendor-src/ is untracked (not ignored) — committed ws has only main.ts + nothing else
  209. // NOTE: makeRepo committed vendor-src too via add -A… recreate untracked state:
  210. git(ws, 'rm', '-r', '--cached', '-q', 'vendor-src');
  211. git(ws, '-c', 'user.email=t@t', '-c', 'user.name=t', 'commit', '-qm', 'untrack');
  212. // No codegraph.json: the untracked path is unaffected by the opt-in gate.
  213. const files = scanDirectory(ws);
  214. expect(files).toContain('vendor-src/lib/src/util.ts');
  215. expect(files).toContain('main.ts');
  216. });
  217. it('skips nested git worktrees instead of indexing them as duplicate embedded repos (#848)', () => {
  218. // Claude Code (and others) create worktrees under a gitignored path like
  219. // `.claude/worktrees/<name>/`. A worktree's `.git` is a FILE pointing into
  220. // the host repo's own `.git/worktrees/`, so it is the SAME repo already
  221. // indexed — sweeping it in as an embedded repo multiplies the whole graph.
  222. // A genuine embedded clone (a `.git` *directory*) must still be indexed.
  223. // Both dirs are opted in so the classifier (not the gitignore gate) is what
  224. // decides: the worktree is skipped, the genuine clone is kept.
  225. write(path.join(ws, 'src/app.ts'), 'export function app() { return 1; }\n');
  226. write(path.join(ws, '.gitignore'), '.claude/\nvendored/\n');
  227. writeConfig({ includeIgnored: ['.claude/', 'vendored/'] });
  228. makeRepo(ws);
  229. // A real linked worktree under the gitignored .claude/worktrees/.
  230. git(ws, 'worktree', 'add', '-q', '.claude/worktrees/feature', '-b', 'feature');
  231. // A genuine embedded clone, also gitignored — must STAY indexed under opt-in.
  232. write(path.join(ws, 'vendored/lib.ts'), 'export function vendoredFn() { return 9; }\n');
  233. makeRepo(path.join(ws, 'vendored'));
  234. const files = scanDirectory(ws);
  235. expect(files).toContain('src/app.ts');
  236. // The worktree is a duplicate working view — never indexed (#848).
  237. expect(files.some((f) => f.includes('.claude/worktrees'))).toBe(false);
  238. // The genuine embedded clone is still indexed under opt-in (#514/#622).
  239. expect(files).toContain('vendored/lib.ts');
  240. });
  241. it('skips a submodule worktree instead of indexing it as a duplicate (#945)', () => {
  242. // A worktree OF A SUBMODULE points its `.git` into
  243. // `.git/modules/<module>/worktrees/<name>` — not the top-level repo's
  244. // `.git/worktrees/`. The detector used to miss that extra `modules/<name>`
  245. // segment, so the worktree fell through to "embedded" and every symbol it
  246. // shared with the real submodule checkout got indexed twice. The submodule's
  247. // own checkout (`.git/modules/<module>`, no `worktrees/`) is distinct code
  248. // and must stay indexed. The worktree dir is opted in so the classifier is
  249. // what skips it (not the gitignore gate).
  250. const upstream = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-945-up-'));
  251. try {
  252. // The repo that becomes the submodule's origin.
  253. write(path.join(upstream, 'lib.ts'), 'export function libFn() { return 1; }\n');
  254. makeRepo(upstream);
  255. write(path.join(ws, 'src/app.ts'), 'export function app() { return 1; }\n');
  256. write(path.join(ws, '.gitignore'), '.worktrees/\n');
  257. writeConfig({ includeIgnored: ['.worktrees/'] });
  258. git(ws, 'init', '-q');
  259. // protocol.file.allow=always: modern git refuses a local-path submodule otherwise.
  260. git(ws, '-c', 'protocol.file.allow=always', 'submodule', 'add', '-q', upstream, 'common');
  261. git(ws, '-c', 'user.email=t@t', '-c', 'user.name=t', 'commit', '-qm', 'add submodule');
  262. // A worktree of the submodule, under the gitignored .worktrees/ — its `.git`
  263. // points into `.git/modules/common/worktrees/<name>`.
  264. git(path.join(ws, 'common'), 'worktree', 'add', '-q', '../.worktrees/common-feature', '-b', 'feature');
  265. const files = scanDirectory(ws);
  266. expect(files).toContain('src/app.ts');
  267. // The real submodule checkout is distinct code — still indexed (#514).
  268. expect(files).toContain('common/lib.ts');
  269. // The submodule worktree is a duplicate working view — never indexed (#945).
  270. expect(files.some((f) => f.includes('.worktrees'))).toBe(false);
  271. } finally {
  272. fs.rmSync(upstream, { recursive: true, force: true });
  273. }
  274. });
  275. it('non-git workspace: walks children and respects each child own .gitignore', () => {
  276. write(path.join(ws, 'proj-a/src/auth.ts'), 'export function login() {}\n');
  277. write(path.join(ws, 'proj-a/build/out.ts'), 'export function generated() {}\n');
  278. write(path.join(ws, 'proj-a/.gitignore'), 'build/\n');
  279. write(path.join(ws, 'proj-b/src/billing.ts'), 'export function charge() {}\n');
  280. makeRepo(path.join(ws, 'proj-a'));
  281. makeRepo(path.join(ws, 'proj-b'));
  282. // ws itself is NOT a git repo.
  283. const files = scanDirectory(ws);
  284. expect(files).toContain('proj-a/src/auth.ts');
  285. expect(files).toContain('proj-b/src/billing.ts');
  286. expect(files.some((f) => f.includes('build/'))).toBe(false);
  287. });
  288. it('does not search beyond the embedded-repo depth cap (opted-in dir)', () => {
  289. // Repo buried 5 levels under the ignored dir — past EMBEDDED_REPO_SEARCH_DEPTH (4).
  290. const deep = path.join(ws, 'pkgs/a/b/c/d/e');
  291. write(path.join(deep, 'src/deep.ts'), 'export function deep() {}\n');
  292. makeRepo(deep);
  293. write(path.join(ws, 'main.ts'), 'export function main() {}\n');
  294. write(path.join(ws, '.gitignore'), '/pkgs/\n');
  295. writeConfig({ includeIgnored: ['pkgs/'] });
  296. makeRepo(ws);
  297. const files = scanDirectory(ws);
  298. expect(files).toContain('main.ts');
  299. expect(files.some((f) => f.includes('deep.ts'))).toBe(false);
  300. });
  301. it('buildScopeIgnore: indexed root is itself a gitignored subdir of an enclosing repo (#936)', () => {
  302. // `child/` is NOT its own repo, so `git` resolves the ENCLOSING repo from
  303. // inside it — and `git ls-files --directory`, whose cwd is then a wholly
  304. // ignored directory, emits the literal `./` ("this entire directory").
  305. // That sentinel used to reach the `ignore` matcher and throw
  306. // ("path should be a `path.relative()`d string, but got "./""), aborting
  307. // buildScopeIgnore → the MCP daemon's watcher never started and auto-sync
  308. // silently stalled until a manual `codegraph sync`.
  309. write(path.join(ws, 'child/src/a.ts'), 'export const x = 1;\n');
  310. write(path.join(ws, '.gitignore'), '/child/\n');
  311. makeRepo(ws);
  312. const child = path.join(ws, 'child');
  313. // The crux: building scope for the ignored subdir must not throw.
  314. const scope = buildScopeIgnore(child);
  315. // The subdir's own source is watchable/indexable, not ignored.
  316. expect(scope.ignores('src/a.ts')).toBe(false);
  317. // And the `./` self entry must not be mistaken for a nested embedded repo.
  318. expect(discoverEmbeddedRepoRoots(child)).toEqual([]);
  319. });
  320. });
  321. });