1 month ago · ca36e99208
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,18 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
				 
			
 
				 ## [Unreleased]
			
 
				 
			
 
				+### Added
			
 
				+- **MCP / explore**: `codegraph_explore` source sections now carry line
			
 
				+  numbers (cat -n style `<num>\t<code>`, matching the Read tool). This lets
			
 
				+  the agent cite `file:line` straight from the explore payload instead of
			
 
				+  re-opening the file just to find a line number — the dominant residual
			
 
				+  cost on precise-tracing questions. In an isolated A/B (answer a
			
 
				+  "which exact line" question with the relevant code already in the
			
 
				+  payload), the no-line-numbers arm spent 2 file Reads + a grep recovering
			
 
				+  the line number while the line-numbered arm answered with zero follow-up
			
 
				+  tool calls. Payload cost is small (~3-5%). Set
			
 
				+  `CODEGRAPH_EXPLORE_LINENUMS=0` to disable.
			
 
				+
			
 
				 ### Changed
			
 
				 - **MCP / explore**: `codegraph_explore` output is now adaptive to project
			
 
				   size. The tool used to apply a fixed 35KB cap regardless of how large the
			
@@ -22,12 +34,16 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
				   (<5,000) caps at ~28KB; large (<15,000) keeps the historical ~35KB; very
			
 
				   large goes up to ~38KB. A new per-file char cap also prevents a single
			
 
				   file with many adjacent symbols from collapsing into one whole-file dump
			
 
				-  (the Alamofire `Session.swift` case from #185). Measured against the
			
 
				-  same repos used in the README benchmark: Alamofire ~62% smaller per call,
			
 
				-  Excalidraw ~35%, VS Code ~14%. Agent-trust floor still holds — the
			
 
				-  Relationships section, scored cluster selection, and structured-source
			
 
				-  output are all retained. Thanks to
			
 
				-  [@essopsp](https://github.com/essopsp) for the repro.
			
 
				+  (the Alamofire `Session.swift` case from #185). Per-file cluster
			
 
				+  selection ranks clusters that contain a query entry point ahead of dense
			
 
				+  declaration blocks, and whole-file "envelope" nodes (a class/struct that
			
 
				+  spans most of the file) are excluded from clustering so the methods the
			
 
				+  query asked about aren't buried under the container's opening lines.
			
 
				+  Measured against the same repos used in the README benchmark, end state
			
 
				+  with line numbers on: Alamofire ~60% smaller per call, Excalidraw ~32%,
			
 
				+  VS Code ~12%. Agent-trust floor still holds — the Relationships section,
			
 
				+  scored cluster selection, and structured-source output are all retained.
			
 
				+  Thanks to [@essopsp](https://github.com/essopsp) for the repro.
			
 
				 
			
 
				 ## [0.7.10] - 2026-05-19
			
 
				 
			
--- a/__tests__/explore-output-budget.test.ts
+++ b/__tests__/explore-output-budget.test.ts
@@ -188,4 +188,38 @@ describe('codegraph_explore output respects the adaptive budget', () => {
 
				     const sourceFollowsHeader = text.indexOf('### Source Code') > 0;
			
 
				     expect(hasRelationships || sourceFollowsHeader).toBe(true);
			
 
				   });
			
 
				+
			
 
				+  it('prefixes source lines with line numbers by default (cat -n style)', async () => {
			
 
				+    delete process.env.CODEGRAPH_EXPLORE_LINENUMS;
			
 
				+    const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
			
 
				+    const text = result.content?.[0]?.text ?? '';
			
 
				+    // At least one fenced source line should look like `<digits>\t<code>`.
			
 
				+    expect(/\n\d+\t/.test(text)).toBe(true);
			
 
				+  });
			
 
				+
			
 
				+  it('omits line numbers when CODEGRAPH_EXPLORE_LINENUMS=0', async () => {
			
 
				+    process.env.CODEGRAPH_EXPLORE_LINENUMS = '0';
			
 
				+    try {
			
 
				+      const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
			
 
				+      const text = result.content?.[0]?.text ?? '';
			
 
				+      // The synthetic source has no tab-prefixed numeric lines of its own,
			
 
				+      // so none should appear when the toggle is off.
			
 
				+      expect(/\n\d+\t(?:export|  )/.test(text)).toBe(false);
			
 
				+    } finally {
			
 
				+      delete process.env.CODEGRAPH_EXPLORE_LINENUMS;
			
 
				+    }
			
 
				+  });
			
 
				+
			
 
				+  it('does not collapse a whole-file class into just its header (envelope filter)', async () => {
			
 
				+    // The synthetic `Session` class spans the entire file. Without the
			
 
				+    // envelope filter it would form one giant cluster that tail-trims to
			
 
				+    // the class declaration, hiding the methods. Confirm real method bodies
			
 
				+    // make it into the output. Regression guard for the #185 follow-up.
			
 
				+    const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
			
 
				+    const text = result.content?.[0]?.text ?? '';
			
 
				+    // A method body line (`methodN(arg: string)`) should appear, not just
			
 
				+    // the `export class Session {` opener.
			
 
				+    const hasMethodBody = /method\d+\(arg: string\)/.test(text);
			
 
				+    expect(hasMethodBody).toBe(true);
			
 
				+  });
			
 
				 });
			
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -142,6 +142,38 @@ export function getExploreOutputBudget(fileCount: number): ExploreOutputBudget {
 
				   };
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Whether `codegraph_explore` should prefix source lines with their line
			
 
				+ * numbers (cat -n style: `<num>\t<code>`).
			
 
				+ *
			
 
				+ * Line numbers let the agent cite `file:line` straight from the explore
			
 
				+ * payload instead of re-Reading the file just to find a line number — the
			
 
				+ * dominant residual cost on precise-tracing questions (#185 follow-up).
			
 
				+ *
			
 
				+ * Defaults ON. Set `CODEGRAPH_EXPLORE_LINENUMS=0` to disable (used by the
			
 
				+ * A/B harness to measure the payload-cost vs. read-savings tradeoff).
			
 
				+ */
			
 
				+function exploreLineNumbersEnabled(): boolean {
			
 
				+  return process.env.CODEGRAPH_EXPLORE_LINENUMS !== '0';
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Prefix each line of a source slice with its 1-based line number, matching
			
 
				+ * the Read tool's `cat -n` convention (number + tab) so the agent treats it
			
 
				+ * the same way it treats Read output.
			
 
				+ *
			
 
				+ * @param slice  contiguous source text (already extracted from the file)
			
 
				+ * @param firstLineNumber  the 1-based line number of the slice's first line
			
 
				+ */
			
 
				+function numberSourceLines(slice: string, firstLineNumber: number): string {
			
 
				+  const out: string[] = [];
			
 
				+  const split = slice.split('\n');
			
 
				+  for (let i = 0; i < split.length; i++) {
			
 
				+    out.push(`${firstLineNumber + i}\t${split[i]}`);
			
 
				+  }
			
 
				+  return out.join('\n');
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * Mark a Claude session as having consulted MCP tools.
			
 
				  * This enables Grep/Glob/Bash commands that would otherwise be blocked.
			
@@ -940,10 +972,19 @@ export class ToolHandler {
 
				       // are worth 10, directly-connected nodes 3, peripheral nodes 1, and
			
 
				       // bare edge-source lines 2 (less than a connected node but more than
			
 
				       // a peripheral one — they hint at a reference but aren't a definition).
			
 
				+      // Container kinds whose body can span most/all of a file. When such a
			
 
				+      // node covers most of the file we drop it from the ranges: keeping it
			
 
				+      // would merge every method inside it into one giant cluster spanning
			
 
				+      // the whole file, which then tail-trims down to just the container's
			
 
				+      // opening lines (its header/declarations) and buries the methods the
			
 
				+      // query actually asked about (#185 follow-up — Session.swift in
			
 
				+      // Alamofire is the canonical case: the `Session` class spans ~1,400
			
 
				+      // lines). We want the granular symbols inside, not the envelope.
			
 
				+      const ENVELOPE_KINDS = new Set(['file', 'module', 'class', 'struct', 'interface', 'enum', 'namespace', 'protocol', 'trait', 'component']);
			
 
				       const ranges: Array<{ start: number; end: number; name: string; kind: string; importance: number }> = group.nodes
			
 
				         .filter(n => n.startLine > 0 && n.endLine > 0)
			
 
				-        // Skip file/component nodes that span the entire file — they'd create one giant cluster
			
 
				-        .filter(n => !(n.kind === 'component' && n.startLine === 1 && n.endLine >= fileLines.length - 1))
			
 
				+        // Drop whole-file envelope nodes (containers covering >50% of the file).
			
 
				+        .filter(n => !(ENVELOPE_KINDS.has(n.kind) && (n.endLine - n.startLine + 1) > fileLines.length * 0.5))
			
 
				         .map(n => {
			
 
				           let importance = 1;
			
 
				           if (entryNodeIds.has(n.id)) importance = 10;
			
@@ -975,12 +1016,13 @@ export class ToolHandler {
 
				       if (ranges.length === 0) continue;
			
 
				 
			
 
				       const gapThreshold = budget.gapThreshold;
			
 
				-      const clusters: Array<{ start: number; end: number; symbols: string[]; score: number }> = [];
			
 
				+      const clusters: Array<{ start: number; end: number; symbols: string[]; score: number; maxImportance: number }> = [];
			
 
				       let current = {
			
 
				         start: ranges[0]!.start,
			
 
				         end: ranges[0]!.end,
			
 
				         symbols: [`${ranges[0]!.name}(${ranges[0]!.kind})`],
			
 
				         score: ranges[0]!.importance,
			
 
				+        maxImportance: ranges[0]!.importance,
			
 
				       };
			
 
				 
			
 
				       for (let i = 1; i < ranges.length; i++) {
			
@@ -989,6 +1031,7 @@ export class ToolHandler {
 
				           current.end = Math.max(current.end, r.end);
			
 
				           current.symbols.push(`${r.name}(${r.kind})`);
			
 
				           current.score += r.importance;
			
 
				+          current.maxImportance = Math.max(current.maxImportance, r.importance);
			
 
				         } else {
			
 
				           clusters.push(current);
			
 
				           current = {
			
@@ -996,6 +1039,7 @@ export class ToolHandler {
 
				             end: r.end,
			
 
				             symbols: [`${r.name}(${r.kind})`],
			
 
				             score: r.importance,
			
 
				+            maxImportance: r.importance,
			
 
				           };
			
 
				         }
			
 
				       }
			
@@ -1005,25 +1049,34 @@ export class ToolHandler {
 
				       // The pathological case (#185): a file like Session.swift where every
			
 
				       // method is adjacent collapses into one cluster spanning the whole
			
 
				       // file, and dumping that into the agent's context is most of the
			
 
				-      // token cost on small projects. We pick clusters in score order
			
 
				-      // (importance per line, so we don't prefer one giant low-density
			
 
				-      // cluster over several focused ones) until the per-file char cap is
			
 
				-      // hit. Truly enormous single clusters get tail-trimmed with a marker.
			
 
				+      // token cost on small projects. We pick clusters in priority order
			
 
				+      // until the per-file char cap is hit. Truly enormous single clusters
			
 
				+      // get tail-trimmed with a marker.
			
 
				       const contextPadding = 3;
			
 
				+      const withLineNumbers = exploreLineNumbersEnabled();
			
 
				       const buildSection = (c: { start: number; end: number }): string => {
			
 
				         const startIdx = Math.max(0, c.start - 1 - contextPadding);
			
 
				         const endIdx = Math.min(fileLines.length, c.end + contextPadding);
			
 
				-        return fileLines.slice(startIdx, endIdx).join('\n');
			
 
				+        const slice = fileLines.slice(startIdx, endIdx).join('\n');
			
 
				+        // startIdx is 0-based, so the slice's first line is line startIdx + 1.
			
 
				+        return withLineNumbers ? numberSourceLines(slice, startIdx + 1) : slice;
			
 
				       };
			
 
				       const GAP_MARKER = '\n\n// ... (gap) ...\n\n';
			
 
				 
			
 
				-      // Score clusters by score-per-line (density) so a 30-line cluster
			
 
				-      // with two entry symbols outranks a 400-line cluster with two
			
 
				-      // peripheral symbols. Stable tiebreak by score, then by smaller
			
 
				-      // span (cheaper to include).
			
 
				+      // Rank clusters for inclusion under the per-file cap. Entry-point
			
 
				+      // clusters come first: a cluster containing a query entry point
			
 
				+      // (importance 10) must outrank a dense block of mere declarations,
			
 
				+      // otherwise on a large file like Session.swift the top-of-file class
			
 
				+      // header + property list (many adjacent low-importance nodes, high
			
 
				+      // density) wins the budget and buries the actual methods the query
			
 
				+      // asked about (perform/didCreateURLRequest/task live deep in the
			
 
				+      // file). Within the same importance tier, prefer density (score per
			
 
				+      // line) so we still favor focused clusters over sprawling ones, then
			
 
				+      // smaller span as a cheap-to-include tiebreak.
			
 
				       const rankedClusters = clusters
			
 
				         .map((c, i) => ({ idx: i, span: c.end - c.start + 1, c }))
			
 
				         .sort((a, b) => {
			
 
				+          if (b.c.maxImportance !== a.c.maxImportance) return b.c.maxImportance - a.c.maxImportance;
			
 
				           const densityA = a.c.score / a.span;
			
 
				           const densityB = b.c.score / b.span;
			
 
				           if (densityB !== densityA) return densityB - densityA;