Ver código fonte

feat: Add codegraph_explore tool for comprehensive single-call code exploration

Introduces a new MCP tool that performs deep code exploration in a single call, returning comprehensive context with full source code sections grouped by file and relationship mapping. Designed to replace multiple codegraph_node + file read operations for thorough understanding of code topics. Updates documentation to position explore as the primary tool for deep exploration questions.
Colby McHenry 2 meses atrás
pai
commit
b927492bc0
3 arquivos alterados com 281 adições e 4 exclusões
  1. 2 1
      CLAUDE.md
  2. 4 1
      src/installer/claude-md-template.ts
  3. 275 2
      src/mcp/tools.ts

+ 2 - 1
CLAUDE.md

@@ -132,8 +132,9 @@ Use these tools **directly in the main session** for fast code exploration (repl
 
 | Tool | Use For |
 |------|---------|
+| `codegraph_explore` | **Deep exploration** — comprehensive context for a topic in ONE call |
+| `codegraph_context` | Quick context for a task (lighter than explore) |
 | `codegraph_search` | Find symbols by name (functions, classes, types) |
-| `codegraph_context` | Get relevant code context for a task |
 | `codegraph_callers` | Find what calls a function |
 | `codegraph_callees` | Find what a function calls |
 | `codegraph_impact` | See what's affected by changing a symbol |

+ 4 - 1
src/installer/claude-md-template.ts

@@ -20,13 +20,16 @@ CodeGraph builds a semantic knowledge graph of codebases for faster, smarter cod
 
 | Tool | Use For |
 |------|---------|
-| \`codegraph_context\` | Get relevant code context for a task (great starting point) |
+| \`codegraph_explore\` | **Deep exploration** — comprehensive context for a topic in ONE call (replaces Explore agents) |
+| \`codegraph_context\` | Quick context for a task (lighter than explore) |
 | \`codegraph_search\` | Find symbols by name (functions, classes, types) |
 | \`codegraph_callers\` | Find what calls a function |
 | \`codegraph_callees\` | Find what a function calls |
 | \`codegraph_impact\` | See what's affected by changing a symbol |
 | \`codegraph_node\` | Get details + source code for a symbol |
 
+**For deep exploration questions** (e.g., "how does the undo/redo system work?"), use \`codegraph_explore\` directly. It returns full source code sections from all relevant files in a single call — no need to spawn an Explore agent.
+
 **Do NOT tell Explore agents to use codegraph tools.** Testing shows Explore agents use codegraph for discovery then still read all the same files — making them slower, not faster. Codegraph's value is in the main session where it replaces the need for exhaustive file reading.
 
 ### If \`.codegraph/\` does NOT exist

+ 275 - 2
src/mcp/tools.ts

@@ -7,8 +7,8 @@
 import CodeGraph, { findNearestCodeGraphRoot } from '../index';
 import type { Node, Edge, SearchResult, Subgraph, TaskContext, NodeKind } from '../types';
 import { createHash } from 'crypto';
-import { writeFileSync } from 'fs';
-import { clamp } from '../utils';
+import { writeFileSync, readFileSync, existsSync } from 'fs';
+import { clamp, validatePathWithinRoot } from '../utils';
 import { tmpdir } from 'os';
 import { join } from 'path';
 
@@ -207,6 +207,26 @@ export const tools: ToolDefinition[] = [
       required: ['symbol'],
     },
   },
+  {
+    name: 'codegraph_explore',
+    description: 'Deep exploration tool — returns comprehensive context for a topic in a SINGLE call. Groups all relevant source code by file (contiguous sections, not snippets), includes a relationship map, and uses deeper graph traversal. Designed to replace multiple codegraph_node + file Read calls. Use this instead of codegraph_context when you need thorough understanding.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        query: {
+          type: 'string',
+          description: 'What you want to understand (e.g., "undo redo system", "authentication flow", "how routing works")',
+        },
+        maxFiles: {
+          type: 'number',
+          description: 'Maximum number of files to include source code from (default: 12)',
+          default: 12,
+        },
+        projectPath: projectPathProperty,
+      },
+      required: ['query'],
+    },
+  },
   {
     name: 'codegraph_status',
     description: 'Get the status of the CodeGraph index, including statistics about indexed files, nodes, and edges.',
@@ -360,6 +380,8 @@ export class ToolHandler {
           return await this.handleCallees(args);
         case 'codegraph_impact':
           return await this.handleImpact(args);
+        case 'codegraph_explore':
+          return await this.handleExplore(args);
         case 'codegraph_node':
           return await this.handleNode(args);
         case 'codegraph_status':
@@ -579,6 +601,257 @@ export class ToolHandler {
     return this.textResult(this.truncateOutput(formatted));
   }
 
+  /** Maximum output for explore tool (much larger than standard tools) */
+  private static readonly EXPLORE_MAX_OUTPUT = 50000;
+
+  /**
+   * Handle codegraph_explore — deep exploration in a single call
+   *
+   * Strategy: find relevant symbols via graph traversal, group by file,
+   * then read contiguous file sections covering all symbols per file.
+   * This replaces multiple codegraph_node + Read calls.
+   */
+  private async handleExplore(args: Record<string, unknown>): Promise<ToolResult> {
+    const query = this.validateString(args.query, 'query');
+    if (typeof query !== 'string') return query;
+
+    const cg = this.getCodeGraph(args.projectPath as string | undefined);
+    const maxFiles = clamp((args.maxFiles as number) || 12, 1, 20);
+    const projectRoot = cg.getProjectRoot();
+
+    // Step 1: Find relevant context with generous parameters
+    const subgraph = await cg.findRelevantContext(query, {
+      searchLimit: 8,
+      traversalDepth: 2,
+      maxNodes: 80,
+      minScore: 0.2,
+    });
+
+    if (subgraph.nodes.size === 0) {
+      return this.textResult(`No relevant code found for "${query}"`);
+    }
+
+    // Step 2: Group nodes by file, score by relevance
+    const fileGroups = new Map<string, { nodes: Node[]; score: number }>();
+    const entryNodeIds = new Set(subgraph.roots);
+
+    // Build a set of nodes directly connected to entry points (depth 1)
+    const connectedToEntry = new Set<string>();
+    for (const edge of subgraph.edges) {
+      if (entryNodeIds.has(edge.source)) connectedToEntry.add(edge.target);
+      if (entryNodeIds.has(edge.target)) connectedToEntry.add(edge.source);
+    }
+
+    for (const node of subgraph.nodes.values()) {
+      // Skip import/export nodes — they add noise without information
+      if (node.kind === 'import' || node.kind === 'export') continue;
+
+      const group = fileGroups.get(node.filePath) || { nodes: [], score: 0 };
+      group.nodes.push(node);
+      // Score: entry point nodes worth 10, directly connected worth 3, others worth 1
+      if (entryNodeIds.has(node.id)) {
+        group.score += 10;
+      } else if (connectedToEntry.has(node.id)) {
+        group.score += 3;
+      } else {
+        group.score += 1;
+      }
+      fileGroups.set(node.filePath, group);
+    }
+
+    // Only include files that have entry points or nodes directly connected to entry points
+    const relevantFiles = [...fileGroups.entries()].filter(([, group]) => group.score >= 3);
+
+    // Extract query terms for relevance checking
+    const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
+
+    // Sort files: highest relevance first, deprioritize low-value files
+    const sortedFiles = relevantFiles.sort((a, b) => {
+      const aPath = a[0].toLowerCase();
+      const bPath = b[0].toLowerCase();
+
+      // Check if any node name or file path relates to query terms
+      const hasQueryRelevance = (filePath: string, nodes: Node[]) => {
+        const fp = filePath.toLowerCase();
+        if (queryTerms.some(t => fp.includes(t))) return true;
+        return nodes.some(n => queryTerms.some(t => n.name.toLowerCase().includes(t)));
+      };
+
+      const aRelevant = hasQueryRelevance(aPath, a[1].nodes);
+      const bRelevant = hasQueryRelevance(bPath, b[1].nodes);
+      if (aRelevant !== bRelevant) return aRelevant ? -1 : 1;
+
+      // Deprioritize test files, icon files, and i18n files
+      const isLowValue = (p: string) =>
+        /\/(tests?|__tests?__|spec)\//i.test(p) ||
+        /\bicons?\b/i.test(p) ||
+        /\bi18n\b/i.test(p);
+      const aLow = isLowValue(aPath);
+      const bLow = isLowValue(bPath);
+      if (aLow !== bLow) return aLow ? 1 : -1;
+
+      if (a[1].score !== b[1].score) return b[1].score - a[1].score;
+      return b[1].nodes.length - a[1].nodes.length;
+    });
+
+    // Step 3: Build relationship map
+    const lines: string[] = [
+      `## Exploration: ${query}`,
+      '',
+      `Found ${subgraph.nodes.size} symbols across ${fileGroups.size} files.`,
+      '',
+    ];
+
+    // Relationship map — show how symbols connect
+    const significantEdges = subgraph.edges.filter(e =>
+      e.kind !== 'contains' // skip contains — it's implied by file grouping
+    );
+
+    if (significantEdges.length > 0) {
+      lines.push('### Relationships');
+      lines.push('');
+
+      // Group edges by kind for readability
+      const byKind = new Map<string, Array<{ source: string; target: string }>>();
+      for (const edge of significantEdges) {
+        const sourceNode = subgraph.nodes.get(edge.source);
+        const targetNode = subgraph.nodes.get(edge.target);
+        if (!sourceNode || !targetNode) continue;
+
+        const group = byKind.get(edge.kind) || [];
+        group.push({ source: sourceNode.name, target: targetNode.name });
+        byKind.set(edge.kind, group);
+      }
+
+      for (const [kind, edges] of byKind) {
+        // Show up to 15 relationships per kind
+        const shown = edges.slice(0, 15);
+        lines.push(`**${kind}:**`);
+        for (const e of shown) {
+          lines.push(`- ${e.source} → ${e.target}`);
+        }
+        if (edges.length > 15) {
+          lines.push(`- ... and ${edges.length - 15} more`);
+        }
+        lines.push('');
+      }
+    }
+
+    // Step 4: Read contiguous file sections
+    lines.push('### Source Code');
+    lines.push('');
+
+    let totalChars = lines.join('\n').length;
+    let filesIncluded = 0;
+
+    for (const [filePath, group] of sortedFiles) {
+      if (filesIncluded >= maxFiles) break;
+      if (totalChars > ToolHandler.EXPLORE_MAX_OUTPUT * 0.9) break;
+
+      const absPath = validatePathWithinRoot(projectRoot, filePath);
+      if (!absPath || !existsSync(absPath)) continue;
+
+      let fileContent: string;
+      try {
+        fileContent = readFileSync(absPath, 'utf-8');
+      } catch {
+        continue;
+      }
+
+      const fileLines = fileContent.split('\n');
+      const lang = group.nodes[0]?.language || '';
+
+      // Cluster nearby symbols to avoid reading huge gaps between distant symbols.
+      // Sort by start line, then merge overlapping/adjacent ranges (within 15 lines).
+      const ranges = group.nodes
+        .filter(n => n.startLine > 0 && n.endLine > 0)
+        .map(n => ({ start: n.startLine, end: n.endLine, name: n.name, kind: n.kind }))
+        .sort((a, b) => a.start - b.start);
+
+      if (ranges.length === 0) continue;
+
+      const GAP_THRESHOLD = 15; // merge sections within 15 lines of each other
+      const clusters: Array<{ start: number; end: number; symbols: string[] }> = [];
+      let current = { start: ranges[0]!.start, end: ranges[0]!.end, symbols: [`${ranges[0]!.name}(${ranges[0]!.kind})`] };
+
+      for (let i = 1; i < ranges.length; i++) {
+        const r = ranges[i]!;
+        if (r.start <= current.end + GAP_THRESHOLD) {
+          current.end = Math.max(current.end, r.end);
+          current.symbols.push(`${r.name}(${r.kind})`);
+        } else {
+          clusters.push(current);
+          current = { start: r.start, end: r.end, symbols: [`${r.name}(${r.kind})`] };
+        }
+      }
+      clusters.push(current);
+
+      // Build file section output from clusters
+      const contextPadding = 3;
+      let fileSection = '';
+      const allSymbols: string[] = [];
+
+      for (const cluster of clusters) {
+        const startIdx = Math.max(0, cluster.start - 1 - contextPadding);
+        const endIdx = Math.min(fileLines.length, cluster.end + contextPadding);
+        const section = fileLines.slice(startIdx, endIdx).join('\n');
+
+        if (fileSection.length > 0) {
+          fileSection += '\n\n// ... (gap) ...\n\n';
+        }
+        fileSection += section;
+        allSymbols.push(...cluster.symbols);
+      }
+
+      // Skip if this section would blow the output limit
+      if (totalChars + fileSection.length + 200 > ToolHandler.EXPLORE_MAX_OUTPUT) {
+        const budget = ToolHandler.EXPLORE_MAX_OUTPUT - totalChars - 200;
+        if (budget < 500) break;
+        const trimmed = fileSection.slice(0, budget) + '\n// ... trimmed ...';
+
+        lines.push(`#### ${filePath} — ${allSymbols.join(', ')}`);
+        lines.push('');
+        lines.push('```' + lang);
+        lines.push(trimmed);
+        lines.push('```');
+        lines.push('');
+        totalChars += trimmed.length + 200;
+        filesIncluded++;
+        break;
+      }
+
+      lines.push(`#### ${filePath} — ${allSymbols.join(', ')}`);
+      lines.push('');
+      lines.push('```' + lang);
+      lines.push(fileSection);
+      lines.push('```');
+      lines.push('');
+
+      totalChars += fileSection.length + 200;
+      filesIncluded++;
+    }
+
+    // Add remaining files as references (from both relevant and peripheral files)
+    const remainingRelevant = sortedFiles.slice(filesIncluded);
+    const peripheralFiles = [...fileGroups.entries()]
+      .filter(([, group]) => group.score < 3)
+      .sort((a, b) => b[1].score - a[1].score);
+    const remainingFiles = [...remainingRelevant, ...peripheralFiles];
+    if (remainingFiles.length > 0) {
+      lines.push('### Additional relevant files (not shown)');
+      lines.push('');
+      for (const [filePath, group] of remainingFiles.slice(0, 10)) {
+        const symbols = group.nodes.map(n => `${n.name}:${n.startLine}`).join(', ');
+        lines.push(`- ${filePath}: ${symbols}`);
+      }
+      if (remainingFiles.length > 10) {
+        lines.push(`- ... and ${remainingFiles.length - 10} more files`);
+      }
+    }
+
+    return this.textResult(lines.join('\n'));
+  }
+
   /**
    * Handle codegraph_node
    */