explore-output-budget.test.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. /**
  2. * Adaptive output budget for codegraph_explore (#185).
  3. *
  4. * The explore tool used to apply a fixed 35KB output cap regardless of
  5. * project size, which on small codebases was a net loss vs. native
  6. * grep+Read. These tests pin the per-tier budget shape so future tuning
  7. * doesn't silently drift the small-project case back into bloat.
  8. */
  9. import { describe, it, expect, beforeAll, afterAll } from 'vitest';
  10. import * as fs from 'fs';
  11. import * as path from 'path';
  12. import * as os from 'os';
  13. import { getExploreOutputBudget, getExploreBudget, ToolHandler } from '../src/mcp/tools';
  14. import CodeGraph from '../src/index';
  15. describe('getExploreOutputBudget', () => {
  16. it('returns a strictly smaller total cap for small projects than for huge ones', () => {
  17. const small = getExploreOutputBudget(100);
  18. const huge = getExploreOutputBudget(30000);
  19. expect(small.maxOutputChars).toBeLessThan(huge.maxOutputChars);
  20. expect(small.defaultMaxFiles).toBeLessThan(huge.defaultMaxFiles);
  21. expect(small.maxCharsPerFile).toBeLessThan(huge.maxCharsPerFile);
  22. });
  23. it('caps total output well under 8000 tokens (~32k chars) on small projects', () => {
  24. const small = getExploreOutputBudget(100);
  25. expect(small.maxOutputChars).toBeLessThanOrEqual(20000);
  26. });
  27. it('keeps the historical 35k+ ceiling for medium-large projects so existing benchmarks do not regress', () => {
  28. const large = getExploreOutputBudget(10000);
  29. expect(large.maxOutputChars).toBeGreaterThanOrEqual(35000);
  30. });
  31. it('uses tier breakpoints matching getExploreBudget so call-count and output-budget agree on a project', () => {
  32. // Anything in the same tier should pick the same total-output cap.
  33. const tier1a = getExploreOutputBudget(50);
  34. const tier1b = getExploreOutputBudget(499);
  35. expect(tier1a.maxOutputChars).toBe(tier1b.maxOutputChars);
  36. expect(getExploreBudget(50)).toBe(getExploreBudget(499));
  37. const tier2a = getExploreOutputBudget(500);
  38. const tier2b = getExploreOutputBudget(4999);
  39. expect(tier2a.maxOutputChars).toBe(tier2b.maxOutputChars);
  40. expect(getExploreBudget(500)).toBe(getExploreBudget(4999));
  41. const tier3a = getExploreOutputBudget(5000);
  42. const tier3b = getExploreOutputBudget(14999);
  43. expect(tier3a.maxOutputChars).toBe(tier3b.maxOutputChars);
  44. // And crossing a breakpoint changes the cap.
  45. expect(tier1a.maxOutputChars).not.toBe(tier2a.maxOutputChars);
  46. expect(tier2a.maxOutputChars).not.toBe(tier3a.maxOutputChars);
  47. });
  48. it('gates off "Additional relevant files", completeness signal, and budget note on small projects', () => {
  49. const small = getExploreOutputBudget(100);
  50. expect(small.includeAdditionalFiles).toBe(false);
  51. expect(small.includeCompletenessSignal).toBe(false);
  52. expect(small.includeBudgetNote).toBe(false);
  53. });
  54. it('keeps all meta-text on for projects that earn the breadth signal (>=500 files)', () => {
  55. const medium = getExploreOutputBudget(1000);
  56. expect(medium.includeAdditionalFiles).toBe(true);
  57. expect(medium.includeCompletenessSignal).toBe(true);
  58. expect(medium.includeBudgetNote).toBe(true);
  59. });
  60. it('keeps the Relationships section on for every tier — it is the cheapest structural signal', () => {
  61. expect(getExploreOutputBudget(50).includeRelationships).toBe(true);
  62. expect(getExploreOutputBudget(1000).includeRelationships).toBe(true);
  63. expect(getExploreOutputBudget(10000).includeRelationships).toBe(true);
  64. expect(getExploreOutputBudget(30000).includeRelationships).toBe(true);
  65. });
  66. it('caps the per-file header symbol list more tightly on small projects', () => {
  67. // Without this cap, a file like Alamofire's Session.swift produced
  68. // a 3.4KB symbol list in the `#### path — sym, sym, ...` header,
  69. // dwarfing the per-file body cap.
  70. const small = getExploreOutputBudget(100);
  71. const huge = getExploreOutputBudget(30000);
  72. expect(small.maxSymbolsInFileHeader).toBeLessThan(huge.maxSymbolsInFileHeader);
  73. expect(small.maxSymbolsInFileHeader).toBeGreaterThan(0);
  74. });
  75. it('uses a tighter clustering gap threshold on small projects to break runaway single clusters', () => {
  76. const small = getExploreOutputBudget(100);
  77. const huge = getExploreOutputBudget(30000);
  78. expect(small.gapThreshold).toBeLessThanOrEqual(huge.gapThreshold);
  79. });
  80. it('handles the boundary file counts exactly (off-by-one regression guard)', () => {
  81. // 499 -> small tier, 500 -> medium tier
  82. expect(getExploreOutputBudget(499).maxOutputChars).toBe(getExploreOutputBudget(100).maxOutputChars);
  83. expect(getExploreOutputBudget(500).maxOutputChars).toBe(getExploreOutputBudget(1000).maxOutputChars);
  84. // 4999 -> medium, 5000 -> large
  85. expect(getExploreOutputBudget(4999).maxOutputChars).toBe(getExploreOutputBudget(1000).maxOutputChars);
  86. expect(getExploreOutputBudget(5000).maxOutputChars).toBe(getExploreOutputBudget(10000).maxOutputChars);
  87. // 14999 -> large, 15000 -> xlarge
  88. expect(getExploreOutputBudget(14999).maxOutputChars).toBe(getExploreOutputBudget(10000).maxOutputChars);
  89. expect(getExploreOutputBudget(15000).maxOutputChars).toBe(getExploreOutputBudget(30000).maxOutputChars);
  90. });
  91. });
  92. /**
  93. * End-to-end check that the budget is actually applied by handleExplore.
  94. *
  95. * Builds a tiny synthetic project (<500 files, so the small tier), indexes
  96. * it, and confirms the output:
  97. * - stays under the small-tier maxOutputChars cap
  98. * - omits the meta-text the small tier gates off (completeness signal,
  99. * budget note, "Additional relevant files")
  100. *
  101. * Regression guard for #185 — protects against future edits to handleExplore
  102. * silently re-introducing the fixed 35KB cap on small projects.
  103. */
  104. describe('codegraph_explore output respects the adaptive budget', () => {
  105. let testDir: string;
  106. let cg: CodeGraph;
  107. let handler: ToolHandler;
  108. beforeAll(async () => {
  109. testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-explore-budget-'));
  110. const srcDir = path.join(testDir, 'src');
  111. fs.mkdirSync(srcDir);
  112. // A handful of files with one fat target file. The fat file mimics the
  113. // Alamofire Session.swift case: many methods stacked on top of each other,
  114. // which collapsed into one giant cluster pre-#185.
  115. const fatLines: string[] = ['export class Session {'];
  116. for (let i = 0; i < 30; i++) {
  117. fatLines.push(` method${i}(arg: string): string {`);
  118. fatLines.push(` return this.helper${i}(arg) + "${i}";`);
  119. fatLines.push(` }`);
  120. fatLines.push(` private helper${i}(arg: string): string {`);
  121. fatLines.push(` return arg.repeat(${i + 1});`);
  122. fatLines.push(` }`);
  123. }
  124. fatLines.push('}');
  125. fs.writeFileSync(path.join(srcDir, 'session.ts'), fatLines.join('\n'));
  126. // A few small supporting files so the project has >1 indexed file.
  127. for (let i = 0; i < 5; i++) {
  128. fs.writeFileSync(
  129. path.join(srcDir, `support${i}.ts`),
  130. `import { Session } from './session';\nexport function callSession${i}(s: Session) { return s.method${i}('hi'); }\n`
  131. );
  132. }
  133. cg = CodeGraph.initSync(testDir, {
  134. config: { include: ['**/*.ts'], exclude: [] },
  135. });
  136. await cg.indexAll();
  137. handler = new ToolHandler(cg);
  138. });
  139. afterAll(() => {
  140. if (cg) cg.destroy();
  141. if (testDir && fs.existsSync(testDir)) {
  142. fs.rmSync(testDir, { recursive: true, force: true });
  143. }
  144. });
  145. it('keeps total output under the small-project cap', async () => {
  146. const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
  147. const text = result.content?.[0]?.text ?? '';
  148. const smallBudget = getExploreOutputBudget(100);
  149. // Allow a small overshoot for the trailing markers — the cap is enforced
  150. // per-file rather than as an absolute output ceiling.
  151. expect(text.length).toBeLessThan(smallBudget.maxOutputChars + 500);
  152. });
  153. it('omits the meta-text gated off for small projects', async () => {
  154. const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
  155. const text = result.content?.[0]?.text ?? '';
  156. expect(text).not.toContain('### Additional relevant files');
  157. expect(text).not.toContain('Complete source code is included above');
  158. expect(text).not.toContain('Explore budget:');
  159. });
  160. it('still includes the Relationships section — it is the cheapest structural signal', async () => {
  161. const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
  162. const text = result.content?.[0]?.text ?? '';
  163. // Either there are relationships, or no edges were significant — both are fine.
  164. // We just want to confirm we did not accidentally gate it off.
  165. const hasRelationships = text.includes('### Relationships');
  166. const sourceFollowsHeader = text.indexOf('### Source Code') > 0;
  167. expect(hasRelationships || sourceFollowsHeader).toBe(true);
  168. });
  169. it('prefixes source lines with line numbers by default (cat -n style)', async () => {
  170. delete process.env.CODEGRAPH_EXPLORE_LINENUMS;
  171. const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
  172. const text = result.content?.[0]?.text ?? '';
  173. // At least one fenced source line should look like `<digits>\t<code>`.
  174. expect(/\n\d+\t/.test(text)).toBe(true);
  175. });
  176. it('omits line numbers when CODEGRAPH_EXPLORE_LINENUMS=0', async () => {
  177. process.env.CODEGRAPH_EXPLORE_LINENUMS = '0';
  178. try {
  179. const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
  180. const text = result.content?.[0]?.text ?? '';
  181. // The synthetic source has no tab-prefixed numeric lines of its own,
  182. // so none should appear when the toggle is off.
  183. expect(/\n\d+\t(?:export| )/.test(text)).toBe(false);
  184. } finally {
  185. delete process.env.CODEGRAPH_EXPLORE_LINENUMS;
  186. }
  187. });
  188. it('uses language-neutral omission markers (no C-style // in the output)', async () => {
  189. // The gap/trimmed separators must not assume `//` is a comment — that's
  190. // wrong in Python, Ruby, etc. They render inside fenced source blocks.
  191. const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
  192. const text = result.content?.[0]?.text ?? '';
  193. expect(text).not.toContain('// ... (gap)');
  194. expect(text).not.toContain('// ... trimmed');
  195. });
  196. it('does not collapse a whole-file class into just its header (envelope filter)', async () => {
  197. // The synthetic `Session` class spans the entire file. Without the
  198. // envelope filter it would form one giant cluster that tail-trims to
  199. // the class declaration, hiding the methods. Confirm real method bodies
  200. // make it into the output. Regression guard for the #185 follow-up.
  201. const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
  202. const text = result.content?.[0]?.text ?? '';
  203. // A method body line (`methodN(arg: string)`) should appear, not just
  204. // the `export class Session {` opener.
  205. const hasMethodBody = /method\d+\(arg: string\)/.test(text);
  206. expect(hasMethodBody).toBe(true);
  207. });
  208. });