| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256 |
- /**
- * Adaptive output budget for codegraph_explore (#185).
- *
- * The explore tool used to apply a fixed 35KB output cap regardless of
- * project size, which on small codebases was a net loss vs. native
- * grep+Read. These tests pin the per-tier budget shape so future tuning
- * doesn't silently drift the small-project case back into bloat.
- */
- import { describe, it, expect, beforeAll, afterAll } from 'vitest';
- import * as fs from 'fs';
- import * as path from 'path';
- import * as os from 'os';
- import { getExploreOutputBudget, getExploreBudget, ToolHandler } from '../src/mcp/tools';
- import CodeGraph from '../src/index';
- describe('getExploreOutputBudget', () => {
- it('returns a strictly smaller total cap for small projects than for huge ones', () => {
- const small = getExploreOutputBudget(100);
- const huge = getExploreOutputBudget(30000);
- expect(small.maxOutputChars).toBeLessThan(huge.maxOutputChars);
- expect(small.defaultMaxFiles).toBeLessThan(huge.defaultMaxFiles);
- expect(small.maxCharsPerFile).toBeLessThan(huge.maxCharsPerFile);
- });
- it('caps total output well under 8000 tokens (~32k chars) on small projects', () => {
- const small = getExploreOutputBudget(100);
- expect(small.maxOutputChars).toBeLessThanOrEqual(20000);
- });
- it('caps medium-large projects at the inline tool-result ceiling (~24k) so the result is never externalized', () => {
- // A bigger single response gets externalized by the host to a file the agent
- // Reads back (a 35k vscode explore did exactly that in the n=4 A/B) — adding a
- // read AND cache-write cost. So large repos get MORE CALLS (getExploreBudget),
- // not a fatter single response; the output cap stays under the inline limit.
- const large = getExploreOutputBudget(10000);
- expect(large.maxOutputChars).toBeLessThanOrEqual(25000);
- expect(large.maxOutputChars).toBeGreaterThanOrEqual(20000);
- });
- it('uses tier breakpoints matching getExploreBudget so call-count and output-budget agree on a project', () => {
- // Very-tiny tier (<150 files) gets a tighter cap than small (150-499) —
- // paired with tool gating to handle the MCP-overhead-dominates regime.
- const tier0a = getExploreOutputBudget(50);
- const tier0b = getExploreOutputBudget(149);
- expect(tier0a.maxOutputChars).toBe(tier0b.maxOutputChars);
- const tier1a = getExploreOutputBudget(150);
- const tier1b = getExploreOutputBudget(499);
- expect(tier1a.maxOutputChars).toBe(tier1b.maxOutputChars);
- // The <500 explore-call budget covers both very-tiny and small.
- expect(getExploreBudget(50)).toBe(getExploreBudget(499));
- const tier2a = getExploreOutputBudget(500);
- const tier2b = getExploreOutputBudget(4999);
- expect(tier2a.maxOutputChars).toBe(tier2b.maxOutputChars);
- expect(getExploreBudget(500)).toBe(getExploreBudget(4999));
- const tier3a = getExploreOutputBudget(5000);
- const tier3b = getExploreOutputBudget(14999);
- expect(tier3a.maxOutputChars).toBe(tier3b.maxOutputChars);
- // Small tiers step up (13k → 18k → 24k); medium and large SHARE the ~24k
- // inline ceiling — scaling with repo size now lives in the CALL budget
- // (getExploreBudget), not in a fatter single response.
- expect(tier0a.maxOutputChars).not.toBe(tier1a.maxOutputChars); // <150 vs <500
- expect(tier1a.maxOutputChars).not.toBe(tier2a.maxOutputChars); // <500 vs <5000
- expect(tier2a.maxOutputChars).toBe(tier3a.maxOutputChars); // <5000 == <15000 (inline cap)
- expect(getExploreBudget(5000)).toBeGreaterThan(getExploreBudget(4999)); // calls scale instead
- });
- it('gates off "Additional relevant files", completeness signal, and budget note on small projects', () => {
- const small = getExploreOutputBudget(100);
- expect(small.includeAdditionalFiles).toBe(false);
- expect(small.includeCompletenessSignal).toBe(false);
- expect(small.includeBudgetNote).toBe(false);
- });
- it('keeps all meta-text on for projects that earn the breadth signal (>=500 files)', () => {
- const medium = getExploreOutputBudget(1000);
- expect(medium.includeAdditionalFiles).toBe(true);
- expect(medium.includeCompletenessSignal).toBe(true);
- expect(medium.includeBudgetNote).toBe(true);
- });
- it('keeps the Relationships section on for medium+ tiers — small tiers drop it to maximize body density', () => {
- // ITER2: relationships dropped on <500 tiers; on tiny repos the
- // per-call payload is the cost driver, so even "cheap" structural
- // signal adds up across follow-up turns. Re-enabled at ≥500 where
- // body budgets are roomy enough to absorb the 1-2KB overhead.
- expect(getExploreOutputBudget(50).includeRelationships).toBe(false);
- expect(getExploreOutputBudget(1000).includeRelationships).toBe(true);
- expect(getExploreOutputBudget(10000).includeRelationships).toBe(true);
- expect(getExploreOutputBudget(30000).includeRelationships).toBe(true);
- });
- it('caps the per-file header symbol list more tightly on small projects', () => {
- // Without this cap, a file like Alamofire's Session.swift produced
- // a 3.4KB symbol list in the `#### path — sym, sym, ...` header,
- // dwarfing the per-file body cap.
- const small = getExploreOutputBudget(100);
- const huge = getExploreOutputBudget(30000);
- expect(small.maxSymbolsInFileHeader).toBeLessThan(huge.maxSymbolsInFileHeader);
- expect(small.maxSymbolsInFileHeader).toBeGreaterThan(0);
- });
- it('uses a tighter clustering gap threshold on small projects to break runaway single clusters', () => {
- const small = getExploreOutputBudget(100);
- const huge = getExploreOutputBudget(30000);
- expect(small.gapThreshold).toBeLessThanOrEqual(huge.gapThreshold);
- });
- it('handles the boundary file counts exactly (off-by-one regression guard)', () => {
- // 149 -> very-tiny, 150 -> small
- expect(getExploreOutputBudget(149).maxOutputChars).toBe(getExploreOutputBudget(50).maxOutputChars);
- expect(getExploreOutputBudget(150).maxOutputChars).toBe(getExploreOutputBudget(200).maxOutputChars);
- // 499 -> small, 500 -> medium
- expect(getExploreOutputBudget(499).maxOutputChars).toBe(getExploreOutputBudget(200).maxOutputChars);
- expect(getExploreOutputBudget(500).maxOutputChars).toBe(getExploreOutputBudget(1000).maxOutputChars);
- // 4999 -> medium, 5000 -> large
- expect(getExploreOutputBudget(4999).maxOutputChars).toBe(getExploreOutputBudget(1000).maxOutputChars);
- expect(getExploreOutputBudget(5000).maxOutputChars).toBe(getExploreOutputBudget(10000).maxOutputChars);
- // 14999 -> large, 15000 -> xlarge
- expect(getExploreOutputBudget(14999).maxOutputChars).toBe(getExploreOutputBudget(10000).maxOutputChars);
- expect(getExploreOutputBudget(15000).maxOutputChars).toBe(getExploreOutputBudget(30000).maxOutputChars);
- });
- });
- /**
- * End-to-end check that the budget is actually applied by handleExplore.
- *
- * Builds a tiny synthetic project (<500 files, so the small tier), indexes
- * it, and confirms the output:
- * - stays under the small-tier maxOutputChars cap
- * - omits the meta-text the small tier gates off (completeness signal,
- * budget note, "Additional relevant files")
- *
- * Regression guard for #185 — protects against future edits to handleExplore
- * silently re-introducing the fixed 35KB cap on small projects.
- */
- describe('codegraph_explore output respects the adaptive budget', () => {
- let testDir: string;
- let cg: CodeGraph;
- let handler: ToolHandler;
- beforeAll(async () => {
- testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-explore-budget-'));
- const srcDir = path.join(testDir, 'src');
- fs.mkdirSync(srcDir);
- // A handful of files with one fat target file. The fat file mimics the
- // Alamofire Session.swift case: many methods stacked on top of each other,
- // which collapsed into one giant cluster pre-#185.
- const fatLines: string[] = ['export class Session {'];
- for (let i = 0; i < 30; i++) {
- fatLines.push(` method${i}(arg: string): string {`);
- fatLines.push(` return this.helper${i}(arg) + "${i}";`);
- fatLines.push(` }`);
- fatLines.push(` private helper${i}(arg: string): string {`);
- fatLines.push(` return arg.repeat(${i + 1});`);
- fatLines.push(` }`);
- }
- fatLines.push('}');
- fs.writeFileSync(path.join(srcDir, 'session.ts'), fatLines.join('\n'));
- // A few small supporting files so the project has >1 indexed file.
- for (let i = 0; i < 5; i++) {
- fs.writeFileSync(
- path.join(srcDir, `support${i}.ts`),
- `import { Session } from './session';\nexport function callSession${i}(s: Session) { return s.method${i}('hi'); }\n`
- );
- }
- cg = CodeGraph.initSync(testDir, {
- config: { include: ['**/*.ts'], exclude: [] },
- });
- await cg.indexAll();
- handler = new ToolHandler(cg);
- });
- afterAll(() => {
- if (cg) cg.destroy();
- if (testDir && fs.existsSync(testDir)) {
- fs.rmSync(testDir, { recursive: true, force: true });
- }
- });
- it('keeps total output under the small-project cap', async () => {
- const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
- const text = result.content?.[0]?.text ?? '';
- const smallBudget = getExploreOutputBudget(100);
- // Allow a small overshoot for the trailing markers — the cap is enforced
- // per-file rather than as an absolute output ceiling.
- expect(text.length).toBeLessThan(smallBudget.maxOutputChars + 500);
- });
- it('omits the meta-text gated off for small projects', async () => {
- const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
- const text = result.content?.[0]?.text ?? '';
- expect(text).not.toContain('### Additional relevant files');
- expect(text).not.toContain('Complete source code is included above');
- expect(text).not.toContain('Explore budget:');
- });
- it('still includes the Relationships section — it is the cheapest structural signal', async () => {
- const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
- const text = result.content?.[0]?.text ?? '';
- // Either there are relationships, or no edges were significant — both are fine.
- // We just want to confirm we did not accidentally gate it off.
- const hasRelationships = text.includes('### Relationships');
- const sourceFollowsHeader = text.indexOf('### Source Code') > 0;
- expect(hasRelationships || sourceFollowsHeader).toBe(true);
- });
- it('prefixes source lines with line numbers by default (cat -n style)', async () => {
- delete process.env.CODEGRAPH_EXPLORE_LINENUMS;
- const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
- const text = result.content?.[0]?.text ?? '';
- // At least one fenced source line should look like `<digits>\t<code>`.
- expect(/\n\d+\t/.test(text)).toBe(true);
- });
- it('omits line numbers when CODEGRAPH_EXPLORE_LINENUMS=0', async () => {
- process.env.CODEGRAPH_EXPLORE_LINENUMS = '0';
- try {
- const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
- const text = result.content?.[0]?.text ?? '';
- // The synthetic source has no tab-prefixed numeric lines of its own,
- // so none should appear when the toggle is off.
- expect(/\n\d+\t(?:export| )/.test(text)).toBe(false);
- } finally {
- delete process.env.CODEGRAPH_EXPLORE_LINENUMS;
- }
- });
- it('uses language-neutral omission markers (no C-style // in the output)', async () => {
- // The gap/trimmed separators must not assume `//` is a comment — that's
- // wrong in Python, Ruby, etc. They render inside fenced source blocks.
- const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
- const text = result.content?.[0]?.text ?? '';
- expect(text).not.toContain('// ... (gap)');
- expect(text).not.toContain('// ... trimmed');
- });
- it('does not collapse a whole-file class into just its header (envelope filter)', async () => {
- // The synthetic `Session` class spans the entire file. Without the
- // envelope filter it would form one giant cluster that tail-trims to
- // the class declaration, hiding the methods. Confirm real method bodies
- // make it into the output. Regression guard for the #185 follow-up.
- const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
- const text = result.content?.[0]?.text ?? '';
- // A method body line (`methodN(arg: string)`) should appear, not just
- // the `export class Session {` opener.
- const hasMethodBody = /method\d+\(arg: string\)/.test(text);
- expect(hasMethodBody).toBe(true);
- });
- });
|