/** * MCP Tool Definitions * * Defines the tools exposed by the CodeGraph MCP server. */ import CodeGraph, { findNearestCodeGraphRoot } from '../index'; import type { Node, Edge, SearchResult, Subgraph, TaskContext, NodeKind } from '../types'; import { createHash } from 'crypto'; import { constants as fsConstants, closeSync, existsSync, openSync, readFileSync, writeSync, } from 'fs'; import { clamp, validatePathWithinRoot } from '../utils'; import { tmpdir } from 'os'; import { join } from 'path'; /** Maximum output length to prevent context bloat (characters) */ const MAX_OUTPUT_LENGTH = 15000; /** * Rust path roots that have no file-system equivalent — `crate` is the * current crate, `super` is the parent module, `self` is the current * module. Used by `matchesSymbol` to strip these before file-path * matching so `crate::configurator::stage_apply::run` resolves the * same as `configurator::stage_apply::run`. */ const RUST_PATH_PREFIXES = new Set(['crate', 'super', 'self']); /** * Node kinds that contain other symbols. For these, `codegraph_node` with * `includeCode=true` returns a structural outline (member names + signatures * + line numbers) instead of the full body, which for a large class is a * multi-thousand-character wall of source that bloats the agent's context. */ const CONTAINER_NODE_KINDS = new Set([ 'class', 'struct', 'interface', 'trait', 'protocol', 'enum', 'namespace', 'module', ]); /** Last `::` / `.` / `/`-separated segment of a qualified symbol. */ function lastQualifierPart(symbol: string): string { const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0); return parts[parts.length - 1] ?? symbol; } /** * Calculate the recommended number of codegraph_explore calls based on project size. * Larger codebases need more exploration calls to cover their surface area, * but smaller ones should use fewer to avoid unnecessary overhead. */ export function getExploreBudget(fileCount: number): number { if (fileCount < 500) return 1; if (fileCount < 5000) return 2; if (fileCount < 15000) return 3; if (fileCount < 25000) return 4; return 5; } /** * Adaptive output budget for `codegraph_explore`, scaled to project size. * * Smaller codebases get a tighter total cap, fewer default files, smaller * per-file cap, and tighter clustering — so a focused query on a 100-file * project doesn't dump a whole file's worth of source into the agent's * context. Larger codebases keep the generous defaults because the * agent's native discovery cost (grep + find + many Reads) genuinely * dwarfs a fat explore call at that scale. * * Meta-text (relationships map, "additional relevant files" list, * completeness signal, budget note) is gated off for tiny projects * where one rich call is the whole story and the extra prose is just * overhead. * * Tier breakpoints mirror `getExploreBudget` so a project sits in the * same tier across both knobs. */ export interface ExploreOutputBudget { /** Hard cap on total output characters. */ maxOutputChars: number; /** Default `maxFiles` when the caller didn't specify one. */ defaultMaxFiles: number; /** Cap on contiguous source returned per file (across all its clusters). */ maxCharsPerFile: number; /** Cluster gap threshold in lines — tighter clustering on small projects. */ gapThreshold: number; /** Max symbols listed in the per-file header (`#### path — sym(kind), ...`). */ maxSymbolsInFileHeader: number; /** Max edges shown per relationship kind in the Relationships section. */ maxEdgesPerRelationshipKind: number; /** Include the "Relationships" section. */ includeRelationships: boolean; /** Include the "Additional relevant files (not shown)" trailing list. */ includeAdditionalFiles: boolean; /** Include the "Complete source code is included above…" reminder. */ includeCompletenessSignal: boolean; /** Include the explore-budget reminder at the end. */ includeBudgetNote: boolean; } export function getExploreOutputBudget(fileCount: number): ExploreOutputBudget { if (fileCount < 500) { return { maxOutputChars: 18000, defaultMaxFiles: 5, maxCharsPerFile: 3800, gapThreshold: 8, maxSymbolsInFileHeader: 6, maxEdgesPerRelationshipKind: 6, includeRelationships: true, includeAdditionalFiles: false, includeCompletenessSignal: false, includeBudgetNote: false, }; } if (fileCount < 5000) { return { maxOutputChars: 13000, defaultMaxFiles: 6, maxCharsPerFile: 2500, gapThreshold: 10, maxSymbolsInFileHeader: 8, maxEdgesPerRelationshipKind: 8, includeRelationships: true, includeAdditionalFiles: true, includeCompletenessSignal: true, includeBudgetNote: true, }; } if (fileCount < 15000) { return { maxOutputChars: 35000, defaultMaxFiles: 12, maxCharsPerFile: 7000, gapThreshold: 15, maxSymbolsInFileHeader: 15, maxEdgesPerRelationshipKind: 15, includeRelationships: true, includeAdditionalFiles: true, includeCompletenessSignal: true, includeBudgetNote: true, }; } return { maxOutputChars: 38000, defaultMaxFiles: 14, maxCharsPerFile: 7000, gapThreshold: 15, maxSymbolsInFileHeader: 15, maxEdgesPerRelationshipKind: 15, includeRelationships: true, includeAdditionalFiles: true, includeCompletenessSignal: true, includeBudgetNote: true, }; } /** * Whether `codegraph_explore` should prefix source lines with their line * numbers (cat -n style: `\t`). * * Line numbers let the agent cite `file:line` straight from the explore * payload instead of re-Reading the file just to find a line number — the * dominant residual cost on precise-tracing questions (#185 follow-up). * * Defaults ON. Set `CODEGRAPH_EXPLORE_LINENUMS=0` to disable (used by the * A/B harness to measure the payload-cost vs. read-savings tradeoff). */ function exploreLineNumbersEnabled(): boolean { return process.env.CODEGRAPH_EXPLORE_LINENUMS !== '0'; } /** * Prefix each line of a source slice with its 1-based line number, matching * the Read tool's `cat -n` convention (number + tab) so the agent treats it * the same way it treats Read output. * * @param slice contiguous source text (already extracted from the file) * @param firstLineNumber the 1-based line number of the slice's first line */ function numberSourceLines(slice: string, firstLineNumber: number): string { const out: string[] = []; const split = slice.split('\n'); for (let i = 0; i < split.length; i++) { out.push(`${firstLineNumber + i}\t${split[i]}`); } return out.join('\n'); } /** * Mark a Claude session as having consulted MCP tools. * This enables Grep/Glob/Bash commands that would otherwise be blocked. * * Why the explicit openSync + O_NOFOLLOW dance instead of plain writeFileSync: * tmpdir() is world-writable on Linux (mode 1777), so on a shared multi-user * machine any other local user can pre-create `codegraph-consulted-` as * a symlink pointing at a file the victim owns. The old `writeFileSync` would * happily follow that link and overwrite the target's contents with the ISO * timestamp string (CWE-59). The session-id hash provides the predictability * gate, but it's defense-in-depth: if a session id ever surfaces in logs, * argv, or telemetry the attack becomes trivial, and the right fix is to not * follow links from /tmp paths in the first place. */ function markSessionConsulted(sessionId: string): void { try { const hash = createHash('md5').update(sessionId).digest('hex').slice(0, 16); const markerPath = join(tmpdir(), `codegraph-consulted-${hash}`); // O_NOFOLLOW makes openSync throw ELOOP if markerPath is already a symlink. // O_CREAT + O_TRUNC keep the original "create-or-overwrite" semantics, and // mode 0o600 prevents readback by other local users (the marker payload is // benign, but narrowing the exposure costs nothing). const flags = fsConstants.O_WRONLY | fsConstants.O_CREAT | fsConstants.O_TRUNC | fsConstants.O_NOFOLLOW; const fd = openSync(markerPath, flags, 0o600); try { writeSync(fd, new Date().toISOString()); } finally { closeSync(fd); } } catch { // Silently fail - don't break MCP on marker write failure. ELOOP from a // planted symlink lands here too, which is the intended behavior: refuse // to write rather than overwrite an attacker-chosen target. } } /** * MCP Tool definition */ export interface ToolDefinition { name: string; description: string; inputSchema: { type: 'object'; properties: Record; required?: string[]; }; } interface PropertySchema { type: string; description: string; enum?: string[]; default?: unknown; } /** * Tool execution result */ export interface ToolResult { content: Array<{ type: 'text'; text: string; }>; isError?: boolean; } /** * Common projectPath property for cross-project queries */ const projectPathProperty: PropertySchema = { type: 'string', description: 'Path to a different project with .codegraph/ initialized. If omitted, uses current project. Use this to query other codebases.', }; /** * All CodeGraph MCP tools * * Designed for minimal context usage - use codegraph_context as the primary tool, * and only use other tools for targeted follow-up queries. * * All tools support cross-project queries via the optional `projectPath` parameter. */ export const tools: ToolDefinition[] = [ { name: 'codegraph_search', description: 'Quick symbol search by name. Returns locations only (no code). Use codegraph_context instead for comprehensive task context.', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Symbol name or partial name (e.g., "auth", "signIn", "UserService")', }, kind: { type: 'string', description: 'Filter by node kind', enum: ['function', 'method', 'class', 'interface', 'type', 'variable', 'route', 'component'], }, limit: { type: 'number', description: 'Maximum results (default: 10)', default: 10, }, projectPath: projectPathProperty, }, required: ['query'], }, }, { name: 'codegraph_context', description: 'PRIMARY TOOL — call this FIRST for any "how does X work", architecture, feature, or bug-context question. Composes search + node + callers + callees and returns entry points, related symbols, and key code in ONE call — usually enough to answer with no further search/Read/Grep. Prefer this over chaining codegraph_search + codegraph_node, and over codegraph_explore. NOTE: provides CODE context, not product requirements; for new features still clarify UX/edge cases with the user.', inputSchema: { type: 'object', properties: { task: { type: 'string', description: 'Description of the task, bug, or feature to build context for', }, maxNodes: { type: 'number', description: 'Maximum symbols to include (default: 20)', default: 20, }, includeCode: { type: 'boolean', description: 'Include code snippets for key symbols (default: true)', default: true, }, projectPath: projectPathProperty, }, required: ['task'], }, }, { name: 'codegraph_callers', description: 'Find all functions/methods that call a specific symbol. Useful for understanding usage patterns and impact of changes.', inputSchema: { type: 'object', properties: { symbol: { type: 'string', description: 'Name of the function, method, or class to find callers for', }, limit: { type: 'number', description: 'Maximum number of callers to return (default: 20)', default: 20, }, projectPath: projectPathProperty, }, required: ['symbol'], }, }, { name: 'codegraph_callees', description: 'Find all functions/methods that a specific symbol calls. Useful for understanding dependencies and code flow.', inputSchema: { type: 'object', properties: { symbol: { type: 'string', description: 'Name of the function, method, or class to find callees for', }, limit: { type: 'number', description: 'Maximum number of callees to return (default: 20)', default: 20, }, projectPath: projectPathProperty, }, required: ['symbol'], }, }, { name: 'codegraph_impact', description: 'Analyze the impact radius of changing a symbol. Shows what code could be affected by modifications.', inputSchema: { type: 'object', properties: { symbol: { type: 'string', description: 'Name of the symbol to analyze impact for', }, depth: { type: 'number', description: 'How many levels of dependencies to traverse (default: 2)', default: 2, }, projectPath: projectPathProperty, }, required: ['symbol'], }, }, { name: 'codegraph_node', description: 'Get detailed info about ONE symbol (location, signature, docstring). Pass includeCode=true for source: a function/method returns its body; a class/interface/struct/enum returns a compact member OUTLINE (fields + method signatures + line numbers), not every method body — Read or codegraph_node a specific member for its body. Keep includeCode=false to minimize context. For SEVERAL related symbols, make ONE codegraph_explore (or codegraph_context) call instead of many node calls — repeated node calls each re-read the whole context and cost far more.', inputSchema: { type: 'object', properties: { symbol: { type: 'string', description: 'Name of the symbol to get details for', }, includeCode: { type: 'boolean', description: 'Include full source code (default: false to minimize context)', default: false, }, projectPath: projectPathProperty, }, required: ['symbol'], }, }, { name: 'codegraph_explore', description: 'Returns source for SEVERAL related symbols grouped by file, plus a relationship map, in ONE capped call. This is the efficient way to inspect many related symbols at once — strongly prefer it over a series of codegraph_node or Read calls (each separate call re-reads the whole context, so 8 node calls cost far more than 1 explore). Use it after codegraph_context when you need to see the actual source of several symbols. Query with specific symbol/file/code terms, NOT natural-language sentences — run codegraph_search first to find names. Bad: "how are agent prompts loaded and passed to the CLI". Good: "renderStaticScene drawElementOnCanvas ShapeCache renderElement.ts".', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Symbol names, file names, or short code terms to explore (e.g., "AuthService loginUser session-manager", "GraphTraverser BFS impact traversal.ts"). Use codegraph_search first to find relevant names.', }, maxFiles: { type: 'number', description: 'Maximum number of files to include source code from (default: 12)', default: 12, }, projectPath: projectPathProperty, }, required: ['query'], }, }, { name: 'codegraph_status', description: 'Get the status of the CodeGraph index, including statistics about indexed files, nodes, and edges.', inputSchema: { type: 'object', properties: { projectPath: projectPathProperty, }, }, }, { name: 'codegraph_files', description: 'REQUIRED for file/folder exploration. Get the project file structure from the CodeGraph index. Returns a tree view of all indexed files with metadata (language, symbol count). Much faster than Glob/filesystem scanning. Use this FIRST when exploring project structure, finding files, or understanding codebase organization.', inputSchema: { type: 'object', properties: { path: { type: 'string', description: 'Filter to files under this directory path (e.g., "src/components"). Returns all files if not specified.', }, pattern: { type: 'string', description: 'Filter files matching this glob pattern (e.g., "*.tsx", "**/*.test.ts")', }, format: { type: 'string', description: 'Output format: "tree" (hierarchical, default), "flat" (simple list), "grouped" (by language)', enum: ['tree', 'flat', 'grouped'], default: 'tree', }, includeMetadata: { type: 'boolean', description: 'Include file metadata like language and symbol count (default: true)', default: true, }, maxDepth: { type: 'number', description: 'Maximum directory depth to show (default: unlimited)', }, projectPath: projectPathProperty, }, }, }, ]; /** * Tool handler that executes tools against a CodeGraph instance * * Supports cross-project queries via the projectPath parameter. * Other projects are opened on-demand and cached for performance. */ export class ToolHandler { // Cache of opened CodeGraph instances for cross-project queries private projectCache: Map = new Map(); // The directory the server last searched for a default project. Surfaced in // the "not initialized" error so users can see why detection missed. private defaultProjectHint: string | null = null; constructor(private cg: CodeGraph | null) {} /** * Update the default CodeGraph instance (e.g. after lazy initialization) */ setDefaultCodeGraph(cg: CodeGraph): void { this.cg = cg; } /** * Record the directory the server tried to resolve the default project from. * Used only to make the "no default project" error actionable. */ setDefaultProjectHint(searchedPath: string): void { this.defaultProjectHint = searchedPath; } /** * Whether a default CodeGraph instance is available */ hasDefaultCodeGraph(): boolean { return this.cg !== null; } /** * Get tool definitions with dynamic descriptions based on project size. * The codegraph_explore tool description includes a budget recommendation * scaled to the number of indexed files. */ getTools(): ToolDefinition[] { if (!this.cg) return tools; try { const stats = this.cg.getStats(); const budget = getExploreBudget(stats.fileCount); return tools.map(tool => { if (tool.name === 'codegraph_explore') { return { ...tool, description: `${tool.description} Budget: make at most ${budget} calls for this project (${stats.fileCount.toLocaleString()} files indexed).`, }; } return tool; }); } catch { return tools; } } /** * Get CodeGraph instance for a project * * If projectPath is provided, opens that project's CodeGraph (cached). * Otherwise returns the default CodeGraph instance. * * Walks up parent directories to find the nearest .codegraph/ folder, * similar to how git finds .git/ directories. */ private getCodeGraph(projectPath?: string): CodeGraph { if (!projectPath) { if (!this.cg) { const searched = this.defaultProjectHint ?? process.cwd(); throw new Error( 'No CodeGraph project is loaded for this session.\n' + `Searched for a .codegraph/ directory starting from: ${searched}\n` + 'The index is likely fine — this is a working-directory detection issue: ' + "the MCP client launched the server outside your project and didn't report the " + 'workspace root. Fix it either way:\n' + ' • Pass projectPath to the tool call, e.g. projectPath: "/absolute/path/to/your/project"\n' + ' • Or add --path to the server\'s MCP config args: ["serve", "--mcp", "--path", "/absolute/path/to/your/project"]' ); } return this.cg; } // Check cache first (using original path as key) if (this.projectCache.has(projectPath)) { return this.projectCache.get(projectPath)!; } // Walk up parent directories to find nearest .codegraph/ const resolvedRoot = findNearestCodeGraphRoot(projectPath); if (!resolvedRoot) { throw new Error(`CodeGraph not initialized in ${projectPath}. Run 'codegraph init' in that project first.`); } // If the path resolves to the default project, reuse the already-open // default instance rather than opening a SECOND connection to the same DB. // A duplicate connection serializes reads against the watcher's auto-sync // writes; on the wasm backend (no WAL) that surfaces as intermittent // "database is locked" on concurrent tool calls. See issue #238. Deliberately // not cached under projectPath — the server owns and closes the default // instance, so routing it through projectCache.closeAll() would double-close it. if (this.cg && this.cg.getProjectRoot() === resolvedRoot) { return this.cg; } // Check if we already have this resolved root cached (different path, same project) if (this.projectCache.has(resolvedRoot)) { const cg = this.projectCache.get(resolvedRoot)!; // Cache under original path too for faster future lookups this.projectCache.set(projectPath, cg); return cg; } // Open and cache under both paths const cg = CodeGraph.openSync(resolvedRoot); this.projectCache.set(resolvedRoot, cg); if (projectPath !== resolvedRoot) { this.projectCache.set(projectPath, cg); } return cg; } /** * Close all cached project connections */ closeAll(): void { for (const cg of this.projectCache.values()) { cg.close(); } this.projectCache.clear(); } /** * Validate that a value is a non-empty string */ private validateString(value: unknown, name: string): string | ToolResult { if (typeof value !== 'string' || value.length === 0) { return this.errorResult(`${name} must be a non-empty string`); } return value; } /** * Execute a tool by name */ async execute(toolName: string, args: Record): Promise { try { switch (toolName) { case 'codegraph_search': return await this.handleSearch(args); case 'codegraph_context': return await this.handleContext(args); case 'codegraph_callers': return await this.handleCallers(args); case 'codegraph_callees': return await this.handleCallees(args); case 'codegraph_impact': return await this.handleImpact(args); case 'codegraph_explore': return await this.handleExplore(args); case 'codegraph_node': return await this.handleNode(args); case 'codegraph_status': return await this.handleStatus(args); case 'codegraph_files': return await this.handleFiles(args); default: return this.errorResult(`Unknown tool: ${toolName}`); } } catch (err) { return this.errorResult(`Tool execution failed: ${err instanceof Error ? err.message : String(err)}`); } } /** * Handle codegraph_search */ private async handleSearch(args: Record): Promise { const query = this.validateString(args.query, 'query'); if (typeof query !== 'string') return query; const cg = this.getCodeGraph(args.projectPath as string | undefined); const kind = args.kind as string | undefined; const rawLimit = Number(args.limit) || 10; const limit = clamp(rawLimit, 1, 100); const results = cg.searchNodes(query, { limit, kinds: kind ? [kind as NodeKind] : undefined, }); if (results.length === 0) { return this.textResult(`No results found for "${query}"`); } const formatted = this.formatSearchResults(results); return this.textResult(this.truncateOutput(formatted)); } /** * Handle codegraph_context */ private async handleContext(args: Record): Promise { const task = this.validateString(args.task, 'task'); if (typeof task !== 'string') return task; // Mark session as consulted (enables Grep/Glob/Bash) const sessionId = process.env.CLAUDE_SESSION_ID; if (sessionId) { markSessionConsulted(sessionId); } const cg = this.getCodeGraph(args.projectPath as string | undefined); const maxNodes = (args.maxNodes as number) || 20; const includeCode = args.includeCode !== false; const context = await cg.buildContext(task, { maxNodes, includeCode, format: 'markdown', }); // Detect if this looks like a feature request (vs bug fix or exploration) const isFeatureQuery = this.looksLikeFeatureRequest(task); const reminder = isFeatureQuery ? '\n\n⚠️ **Ask user:** UX preferences, edge cases, acceptance criteria' : ''; // buildContext returns string when format is 'markdown' if (typeof context === 'string') { return this.textResult(context + reminder); } // If it returns TaskContext, format it return this.textResult(this.formatTaskContext(context) + reminder); } /** * Heuristic to detect if a query looks like a feature request */ private looksLikeFeatureRequest(task: string): boolean { const featureKeywords = [ 'add', 'create', 'implement', 'build', 'enable', 'allow', 'new feature', 'support for', 'ability to', 'want to', 'should be able', 'need to add', 'swap', 'edit', 'modify' ]; const bugKeywords = [ 'fix', 'bug', 'error', 'broken', 'crash', 'issue', 'problem', 'not working', 'fails', 'undefined', 'null' ]; const explorationKeywords = [ 'how does', 'where is', 'what is', 'find', 'show me', 'explain', 'understand', 'explore' ]; const lowerTask = task.toLowerCase(); // If it's clearly a bug or exploration, not a feature if (bugKeywords.some(k => lowerTask.includes(k))) return false; if (explorationKeywords.some(k => lowerTask.includes(k))) return false; // If it matches feature keywords, it's likely a feature request return featureKeywords.some(k => lowerTask.includes(k)); } /** * Handle codegraph_callers */ private async handleCallers(args: Record): Promise { const symbol = this.validateString(args.symbol, 'symbol'); if (typeof symbol !== 'string') return symbol; const cg = this.getCodeGraph(args.projectPath as string | undefined); const limit = clamp((args.limit as number) || 20, 1, 100); const allMatches = this.findAllSymbols(cg, symbol); if (allMatches.nodes.length === 0) { return this.textResult(`Symbol "${symbol}" not found in the codebase`); } // Aggregate callers across all matching symbols const seen = new Set(); const allCallers: Node[] = []; for (const node of allMatches.nodes) { for (const c of cg.getCallers(node.id)) { if (!seen.has(c.node.id)) { seen.add(c.node.id); allCallers.push(c.node); } } } if (allCallers.length === 0) { return this.textResult(`No callers found for "${symbol}"${allMatches.note}`); } const formatted = this.formatNodeList(allCallers.slice(0, limit), `Callers of ${symbol}`) + allMatches.note; return this.textResult(this.truncateOutput(formatted)); } /** * Handle codegraph_callees */ private async handleCallees(args: Record): Promise { const symbol = this.validateString(args.symbol, 'symbol'); if (typeof symbol !== 'string') return symbol; const cg = this.getCodeGraph(args.projectPath as string | undefined); const limit = clamp((args.limit as number) || 20, 1, 100); const allMatches = this.findAllSymbols(cg, symbol); if (allMatches.nodes.length === 0) { return this.textResult(`Symbol "${symbol}" not found in the codebase`); } // Aggregate callees across all matching symbols const seen = new Set(); const allCallees: Node[] = []; for (const node of allMatches.nodes) { for (const c of cg.getCallees(node.id)) { if (!seen.has(c.node.id)) { seen.add(c.node.id); allCallees.push(c.node); } } } if (allCallees.length === 0) { return this.textResult(`No callees found for "${symbol}"${allMatches.note}`); } const formatted = this.formatNodeList(allCallees.slice(0, limit), `Callees of ${symbol}`) + allMatches.note; return this.textResult(this.truncateOutput(formatted)); } /** * Handle codegraph_impact */ private async handleImpact(args: Record): Promise { const symbol = this.validateString(args.symbol, 'symbol'); if (typeof symbol !== 'string') return symbol; const cg = this.getCodeGraph(args.projectPath as string | undefined); const depth = clamp((args.depth as number) || 2, 1, 10); const allMatches = this.findAllSymbols(cg, symbol); if (allMatches.nodes.length === 0) { return this.textResult(`Symbol "${symbol}" not found in the codebase`); } // Aggregate impact across all matching symbols const mergedNodes = new Map(); const mergedEdges: Edge[] = []; const seenEdges = new Set(); for (const node of allMatches.nodes) { const impact = cg.getImpactRadius(node.id, depth); for (const [id, n] of impact.nodes) { mergedNodes.set(id, n); } for (const e of impact.edges) { const key = `${e.source}->${e.target}:${e.kind}`; if (!seenEdges.has(key)) { seenEdges.add(key); mergedEdges.push(e); } } } const mergedImpact = { nodes: mergedNodes, edges: mergedEdges, roots: allMatches.nodes.map(n => n.id), }; const formatted = this.formatImpact(symbol, mergedImpact) + allMatches.note; return this.textResult(this.truncateOutput(formatted)); } /** * Handle codegraph_explore — deep exploration in a single call * * Strategy: find relevant symbols via graph traversal, group by file, * then read contiguous file sections covering all symbols per file. * This replaces multiple codegraph_node + Read calls. * * Output size is adaptive to project file count via * `getExploreOutputBudget` — see #185 for why a fixed 35k cap was a * tax on small projects while earning its keep on large ones. */ private async handleExplore(args: Record): Promise { const query = this.validateString(args.query, 'query'); if (typeof query !== 'string') return query; const cg = this.getCodeGraph(args.projectPath as string | undefined); const projectRoot = cg.getProjectRoot(); // Resolve adaptive output budget from project size. Falls back to the // largest-tier defaults if stats aren't available, which preserves // pre-#185 behavior for callers that hit the rare stats failure. let budget: ExploreOutputBudget; try { budget = getExploreOutputBudget(cg.getStats().fileCount); } catch { budget = getExploreOutputBudget(Infinity); } const maxFiles = clamp((args.maxFiles as number) || budget.defaultMaxFiles, 1, 20); // Step 1: Find relevant context with generous parameters. // Use a large maxNodes budget — explore has its own 35k char output limit // that prevents context bloat, so more nodes just means better coverage // across entry points (especially for large files like Svelte components). const subgraph = await cg.findRelevantContext(query, { searchLimit: 8, traversalDepth: 3, maxNodes: 200, minScore: 0.2, }); if (subgraph.nodes.size === 0) { return this.textResult(`No relevant code found for "${query}"`); } // Step 2: Group nodes by file, score by relevance const fileGroups = new Map(); const entryNodeIds = new Set(subgraph.roots); // Build a set of nodes directly connected to entry points (depth 1) const connectedToEntry = new Set(); for (const edge of subgraph.edges) { if (entryNodeIds.has(edge.source)) connectedToEntry.add(edge.target); if (entryNodeIds.has(edge.target)) connectedToEntry.add(edge.source); } for (const node of subgraph.nodes.values()) { // Skip import/export nodes — they add noise without information if (node.kind === 'import' || node.kind === 'export') continue; const group = fileGroups.get(node.filePath) || { nodes: [], score: 0 }; group.nodes.push(node); // Score: entry point nodes worth 10, directly connected worth 3, others worth 1 if (entryNodeIds.has(node.id)) { group.score += 10; } else if (connectedToEntry.has(node.id)) { group.score += 3; } else { group.score += 1; } fileGroups.set(node.filePath, group); } // Only include files that have entry points or nodes directly connected to entry points const relevantFiles = [...fileGroups.entries()].filter(([, group]) => group.score >= 3); // Extract query terms for relevance checking const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3); // Sort files: highest relevance first, deprioritize low-value files const sortedFiles = relevantFiles.sort((a, b) => { const aPath = a[0].toLowerCase(); const bPath = b[0].toLowerCase(); // Check if any node name or file path relates to query terms const hasQueryRelevance = (filePath: string, nodes: Node[]) => { const fp = filePath.toLowerCase(); if (queryTerms.some(t => fp.includes(t))) return true; return nodes.some(n => queryTerms.some(t => n.name.toLowerCase().includes(t))); }; const aRelevant = hasQueryRelevance(aPath, a[1].nodes); const bRelevant = hasQueryRelevance(bPath, b[1].nodes); if (aRelevant !== bRelevant) return aRelevant ? -1 : 1; // Deprioritize test files, icon files, and i18n files const isLowValue = (p: string) => /\/(tests?|__tests?__|spec)\//i.test(p) || /\bicons?\b/i.test(p) || /\bi18n\b/i.test(p); const aLow = isLowValue(aPath); const bLow = isLowValue(bPath); if (aLow !== bLow) return aLow ? 1 : -1; if (a[1].score !== b[1].score) return b[1].score - a[1].score; return b[1].nodes.length - a[1].nodes.length; }); // Step 3: Build relationship map const lines: string[] = [ `## Exploration: ${query}`, '', `Found ${subgraph.nodes.size} symbols across ${fileGroups.size} files.`, '', ]; // Relationship map — show how symbols connect const significantEdges = subgraph.edges.filter(e => e.kind !== 'contains' // skip contains — it's implied by file grouping ); if (budget.includeRelationships && significantEdges.length > 0) { lines.push('### Relationships'); lines.push(''); // Group edges by kind for readability const byKind = new Map>(); for (const edge of significantEdges) { const sourceNode = subgraph.nodes.get(edge.source); const targetNode = subgraph.nodes.get(edge.target); if (!sourceNode || !targetNode) continue; const group = byKind.get(edge.kind) || []; group.push({ source: sourceNode.name, target: targetNode.name }); byKind.set(edge.kind, group); } for (const [kind, edges] of byKind) { const cap = budget.maxEdgesPerRelationshipKind; const shown = edges.slice(0, cap); lines.push(`**${kind}:**`); for (const e of shown) { lines.push(`- ${e.source} → ${e.target}`); } if (edges.length > cap) { lines.push(`- ... and ${edges.length - cap} more`); } lines.push(''); } } // Step 4: Read contiguous file sections lines.push('### Source Code'); lines.push(''); let totalChars = lines.join('\n').length; let filesIncluded = 0; let anyFileTrimmed = false; for (const [filePath, group] of sortedFiles) { if (filesIncluded >= maxFiles) break; if (totalChars > budget.maxOutputChars * 0.9) break; const absPath = validatePathWithinRoot(projectRoot, filePath); if (!absPath || !existsSync(absPath)) continue; let fileContent: string; try { fileContent = readFileSync(absPath, 'utf-8'); } catch { continue; } const fileLines = fileContent.split('\n'); const lang = group.nodes[0]?.language || ''; // Cluster nearby symbols to avoid reading huge gaps between distant symbols. // Sort by start line, then merge overlapping/adjacent ranges (within the // adaptive gap threshold). Include both node ranges AND edge source // locations so template sections with component usages/calls are // covered (not just script block symbols). // // Each range carries an `importance` score so we can rank clusters // when the per-file budget forces us to drop some: entry-point nodes // are worth 10, directly-connected nodes 3, peripheral nodes 1, and // bare edge-source lines 2 (less than a connected node but more than // a peripheral one — they hint at a reference but aren't a definition). // Container kinds whose body can span most/all of a file. When such a // node covers most of the file we drop it from the ranges: keeping it // would merge every method inside it into one giant cluster spanning // the whole file, which then tail-trims down to just the container's // opening lines (its header/declarations) and buries the methods the // query actually asked about (#185 follow-up — Session.swift in // Alamofire is the canonical case: the `Session` class spans ~1,400 // lines). We want the granular symbols inside, not the envelope. const ENVELOPE_KINDS = new Set(['file', 'module', 'class', 'struct', 'interface', 'enum', 'namespace', 'protocol', 'trait', 'component']); const ranges: Array<{ start: number; end: number; name: string; kind: string; importance: number }> = group.nodes .filter(n => n.startLine > 0 && n.endLine > 0) // Drop whole-file envelope nodes (containers covering >50% of the file). .filter(n => !(ENVELOPE_KINDS.has(n.kind) && (n.endLine - n.startLine + 1) > fileLines.length * 0.5)) .map(n => { let importance = 1; if (entryNodeIds.has(n.id)) importance = 10; else if (connectedToEntry.has(n.id)) importance = 3; return { start: n.startLine, end: n.endLine, name: n.name, kind: n.kind, importance }; }); // Add edge source locations in this file — captures template references // (component usages, event handlers) that aren't nodes themselves. // Query edges directly from the DB (not just the subgraph) because BFS // traversal may have pruned template reference targets due to node budget. const edgeLines = new Set(); // dedup by "line:name" for (const node of group.nodes) { const outgoing = cg.getOutgoingEdges(node.id); for (const edge of outgoing) { if (!edge.line || edge.line <= 0 || edge.kind === 'contains') continue; const key = `${edge.line}:${edge.target}`; if (edgeLines.has(key)) continue; edgeLines.add(key); // Look up target name from subgraph first, fall back to edge kind const targetNode = subgraph.nodes.get(edge.target); const targetName = targetNode?.name ?? edge.kind; ranges.push({ start: edge.line, end: edge.line, name: targetName, kind: edge.kind, importance: 2 }); } } ranges.sort((a, b) => a.start - b.start); if (ranges.length === 0) continue; const gapThreshold = budget.gapThreshold; const clusters: Array<{ start: number; end: number; symbols: string[]; score: number; maxImportance: number }> = []; let current = { start: ranges[0]!.start, end: ranges[0]!.end, symbols: [`${ranges[0]!.name}(${ranges[0]!.kind})`], score: ranges[0]!.importance, maxImportance: ranges[0]!.importance, }; for (let i = 1; i < ranges.length; i++) { const r = ranges[i]!; if (r.start <= current.end + gapThreshold) { current.end = Math.max(current.end, r.end); current.symbols.push(`${r.name}(${r.kind})`); current.score += r.importance; current.maxImportance = Math.max(current.maxImportance, r.importance); } else { clusters.push(current); current = { start: r.start, end: r.end, symbols: [`${r.name}(${r.kind})`], score: r.importance, maxImportance: r.importance, }; } } clusters.push(current); // Build file section output from clusters, capped by per-file budget. // The pathological case (#185): a file like Session.swift where every // method is adjacent collapses into one cluster spanning the whole // file, and dumping that into the agent's context is most of the // token cost on small projects. We pick clusters in priority order // until the per-file char cap is hit. Truly enormous single clusters // get tail-trimmed with a marker. const contextPadding = 3; const withLineNumbers = exploreLineNumbersEnabled(); const buildSection = (c: { start: number; end: number }): string => { const startIdx = Math.max(0, c.start - 1 - contextPadding); const endIdx = Math.min(fileLines.length, c.end + contextPadding); const slice = fileLines.slice(startIdx, endIdx).join('\n'); // startIdx is 0-based, so the slice's first line is line startIdx + 1. return withLineNumbers ? numberSourceLines(slice, startIdx + 1) : slice; }; // Language-neutral separator (no `//` — not a comment in Python, Ruby, // etc.). With line numbers on, the line-number jump also signals the gap. const GAP_MARKER = '\n\n... (gap) ...\n\n'; // Rank clusters for inclusion under the per-file cap. Entry-point // clusters come first: a cluster containing a query entry point // (importance 10) must outrank a dense block of mere declarations, // otherwise on a large file like Session.swift the top-of-file class // header + property list (many adjacent low-importance nodes, high // density) wins the budget and buries the actual methods the query // asked about (perform/didCreateURLRequest/task live deep in the // file). Within the same importance tier, prefer density (score per // line) so we still favor focused clusters over sprawling ones, then // smaller span as a cheap-to-include tiebreak. const rankedClusters = clusters .map((c, i) => ({ idx: i, span: c.end - c.start + 1, c })) .sort((a, b) => { if (b.c.maxImportance !== a.c.maxImportance) return b.c.maxImportance - a.c.maxImportance; const densityA = a.c.score / a.span; const densityB = b.c.score / b.span; if (densityB !== densityA) return densityB - densityA; if (b.c.score !== a.c.score) return b.c.score - a.c.score; return a.span - b.span; }); const chosenIndices = new Set(); let projectedChars = 0; for (const rc of rankedClusters) { const sectionLen = buildSection(rc.c).length + (chosenIndices.size > 0 ? GAP_MARKER.length : 0); // Always take the top-ranked cluster, even if oversize, so we don't // return an empty file section (agent would then re-Read the file, // negating the savings). if (chosenIndices.size === 0) { chosenIndices.add(rc.idx); projectedChars += sectionLen; continue; } if (projectedChars + sectionLen > budget.maxCharsPerFile) continue; chosenIndices.add(rc.idx); projectedChars += sectionLen; } // Emit chosen clusters in source order so the file reads top-to-bottom. let fileSection = ''; const allSymbols: string[] = []; let fileTrimmed = false; for (let i = 0; i < clusters.length; i++) { if (!chosenIndices.has(i)) continue; const cluster = clusters[i]!; const section = buildSection(cluster); if (fileSection.length > 0) fileSection += GAP_MARKER; fileSection += section; allSymbols.push(...cluster.symbols); } // If a single chosen cluster is still oversize (long monolithic // function), tail-trim it. Better one trimmed view than nothing. if (fileSection.length > budget.maxCharsPerFile) { fileSection = fileSection.slice(0, budget.maxCharsPerFile) + '\n... (trimmed) ...'; fileTrimmed = true; } if (chosenIndices.size < clusters.length || fileTrimmed) { anyFileTrimmed = true; } // Dedupe + cap the symbols list shown in the per-file header. Some // files (Session.swift in Alamofire) produced 3.4KB symbol lists // from cluster scoring + edge-source lines, dwarfing the per-file // body cap. Show top names by frequency, with a "+N more" tail. const symbolCounts = new Map(); for (const s of allSymbols) { symbolCounts.set(s, (symbolCounts.get(s) ?? 0) + 1); } const sortedSymbols = [...symbolCounts.entries()] .sort((a, b) => b[1] - a[1]) .map(([name]) => name); const headerCap = budget.maxSymbolsInFileHeader; const headerSymbols = sortedSymbols.slice(0, headerCap); const omittedCount = sortedSymbols.length - headerSymbols.length; const headerSuffix = omittedCount > 0 ? `${headerSymbols.join(', ')}, +${omittedCount} more` : headerSymbols.join(', '); const fileHeader = `#### ${filePath} — ${headerSuffix}`; // Respect the total output cap on a file-by-file basis. if (totalChars + fileSection.length + 200 > budget.maxOutputChars) { const remaining = budget.maxOutputChars - totalChars - 200; if (remaining < 500) break; const trimmed = fileSection.slice(0, remaining) + '\n... (trimmed) ...'; lines.push(fileHeader); lines.push(''); lines.push('```' + lang); lines.push(trimmed); lines.push('```'); lines.push(''); totalChars += trimmed.length + 200; filesIncluded++; anyFileTrimmed = true; break; } lines.push(fileHeader); lines.push(''); lines.push('```' + lang); lines.push(fileSection); lines.push('```'); lines.push(''); totalChars += fileSection.length + 200; filesIncluded++; } // Add remaining files as references (from both relevant and peripheral files). // Small projects (per budget) skip this — the relevant story already fits // in the source section, and a trailing pointer list is pure overhead. if (budget.includeAdditionalFiles) { const remainingRelevant = sortedFiles.slice(filesIncluded); const peripheralFiles = [...fileGroups.entries()] .filter(([, group]) => group.score < 3) .sort((a, b) => b[1].score - a[1].score); const remainingFiles = [...remainingRelevant, ...peripheralFiles]; if (remainingFiles.length > 0) { lines.push('### Additional relevant files (not shown)'); lines.push(''); for (const [filePath, group] of remainingFiles.slice(0, 10)) { const symbols = group.nodes.map(n => `${n.name}:${n.startLine}`).join(', '); lines.push(`- ${filePath}: ${symbols}`); } if (remainingFiles.length > 10) { lines.push(`- ... and ${remainingFiles.length - 10} more files`); } } } // Add completeness signal so agents know they don't need to re-read these files. // On small projects the budget gates this off — but if we actually had to // trim or drop clusters, surface a brief note so the agent knows it can // still Read for more detail. if (budget.includeCompletenessSignal) { lines.push(''); lines.push('---'); lines.push(`> **Complete source code is included above for ${filesIncluded} files.** You do NOT need to re-read these files — the relevant sections are already shown in full. Only use Read/Grep for files listed under "Additional relevant files" if you need more detail.`); } else if (anyFileTrimmed) { lines.push(''); lines.push(`> Some file sections were trimmed for size. Use \`codegraph_node\` or Read for the full source if needed.`); } // Add explore budget note based on project size if (budget.includeBudgetNote) { try { const stats = cg.getStats(); const callBudget = getExploreBudget(stats.fileCount); lines.push(''); lines.push(`> **Explore budget: ${callBudget} calls max for this project (${stats.fileCount.toLocaleString()} files indexed).** Stop exploring and synthesize your answer once you've used ${callBudget} calls — do NOT make additional explore calls beyond this budget.`); } catch { // Stats unavailable — skip budget note } } // Hard-cap to the adaptive budget. The per-file loop bounds the source // sections, but the relationship map, additional-files list, and // completeness/budget notes can still push the assembled output past // maxOutputChars (observed 30k against a 28k tier cap). A fat explore // payload persists in the agent's context and is re-read as cache-input // on every subsequent turn, so the overrun is paid many times over. const output = lines.join('\n'); if (output.length > budget.maxOutputChars) { const cut = output.slice(0, budget.maxOutputChars); const lastNewline = cut.lastIndexOf('\n'); const safe = lastNewline > budget.maxOutputChars * 0.8 ? cut.slice(0, lastNewline) : cut; return this.textResult(safe + '\n\n... (explore output truncated to budget — use codegraph_node or Read for more)'); } return this.textResult(output); } /** * Handle codegraph_node */ private async handleNode(args: Record): Promise { const symbol = this.validateString(args.symbol, 'symbol'); if (typeof symbol !== 'string') return symbol; const cg = this.getCodeGraph(args.projectPath as string | undefined); // Default to false to minimize context usage const includeCode = args.includeCode === true; const match = this.findSymbol(cg, symbol); if (!match) { return this.textResult(`Symbol "${symbol}" not found in the codebase`); } let code: string | null = null; let outline: string | null = null; if (includeCode) { // For container symbols (class/interface/struct/…), the full body is the // sum of every method body — a wall of source (e.g. a 10k-char class) // that bloats context and is rarely needed in full. Return a structural // outline (members + signatures + line numbers) instead; the agent can // Read or codegraph_node a specific method for its body. Leaf symbols // (function/method/etc.) return their full body as before. if (CONTAINER_NODE_KINDS.has(match.node.kind)) { outline = this.buildContainerOutline(cg, match.node); } if (!outline) { code = await cg.getCode(match.node.id); } } const formatted = this.formatNodeDetails(match.node, code, outline) + match.note; return this.textResult(this.truncateOutput(formatted)); } /** * Handle codegraph_status */ private async handleStatus(args: Record): Promise { const cg = this.getCodeGraph(args.projectPath as string | undefined); const stats = cg.getStats(); const lines: string[] = [ '## CodeGraph Status', '', `**Files indexed:** ${stats.fileCount}`, `**Total nodes:** ${stats.nodeCount}`, `**Total edges:** ${stats.edgeCount}`, `**Database size:** ${(stats.dbSizeBytes / 1024 / 1024).toFixed(2)} MB`, ]; // Surface the active SQLite backend (node:sqlite, Node's built-in real // SQLite — full WAL + FTS5, no native build). lines.push(`**Backend:** node:sqlite (Node built-in) — full WAL + FTS5`); // Effective journal mode. 'wal' ⇒ concurrent reads never block on a writer; // anything else ⇒ they can ("database is locked"). node:sqlite supports WAL // everywhere, so a non-wal mode means the filesystem can't (network/ // virtualized mounts, WSL2 /mnt). See issue #238. const journalMode = cg.getJournalMode(); if (journalMode === 'wal') { lines.push(`**Journal mode:** wal (concurrent reads safe)`); } else { lines.push( `**Journal mode:** ⚠ ${journalMode || 'unknown'} — WAL not active, so reads ` + `can block on a concurrent write (WAL appears unsupported on this filesystem)` ); } lines.push('', '### Nodes by Kind:'); for (const [kind, count] of Object.entries(stats.nodesByKind)) { if ((count as number) > 0) { lines.push(`- ${kind}: ${count}`); } } lines.push('', '### Languages:'); for (const [lang, count] of Object.entries(stats.filesByLanguage)) { if ((count as number) > 0) { lines.push(`- ${lang}: ${count}`); } } return this.textResult(lines.join('\n')); } /** * Handle codegraph_files - get project file structure from the index */ private async handleFiles(args: Record): Promise { const cg = this.getCodeGraph(args.projectPath as string | undefined); const pathFilter = args.path as string | undefined; const pattern = args.pattern as string | undefined; const format = (args.format as 'tree' | 'flat' | 'grouped') || 'tree'; const includeMetadata = args.includeMetadata !== false; const maxDepth = args.maxDepth != null ? clamp(args.maxDepth as number, 1, 20) : undefined; // Get all files from the index const allFiles = cg.getFiles(); if (allFiles.length === 0) { return this.textResult('No files indexed. Run `codegraph index` first.'); } // Filter by path prefix let files = pathFilter ? allFiles.filter(f => f.path.startsWith(pathFilter) || f.path.startsWith('./' + pathFilter)) : allFiles; // Filter by glob pattern if (pattern) { const regex = this.globToRegex(pattern); files = files.filter(f => regex.test(f.path)); } if (files.length === 0) { return this.textResult(`No files found matching the criteria.`); } // Format output let output: string; switch (format) { case 'flat': output = this.formatFilesFlat(files, includeMetadata); break; case 'grouped': output = this.formatFilesGrouped(files, includeMetadata); break; case 'tree': default: output = this.formatFilesTree(files, includeMetadata, maxDepth); break; } return this.textResult(this.truncateOutput(output)); } /** * Convert glob pattern to regex */ private globToRegex(pattern: string): RegExp { const escaped = pattern .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape special regex chars except * and ? .replace(/\*\*/g, '{{GLOBSTAR}}') // Temp placeholder for ** .replace(/\*/g, '[^/]*') // * matches anything except / .replace(/\?/g, '[^/]') // ? matches single char except / .replace(/\{\{GLOBSTAR\}\}/g, '.*'); // ** matches anything including / return new RegExp(escaped); } /** * Format files as a flat list */ private formatFilesFlat(files: { path: string; language: string; nodeCount: number }[], includeMetadata: boolean): string { const lines: string[] = [`## Files (${files.length})`, '']; for (const file of files.sort((a, b) => a.path.localeCompare(b.path))) { if (includeMetadata) { lines.push(`- ${file.path} (${file.language}, ${file.nodeCount} symbols)`); } else { lines.push(`- ${file.path}`); } } return lines.join('\n'); } /** * Format files grouped by language */ private formatFilesGrouped(files: { path: string; language: string; nodeCount: number }[], includeMetadata: boolean): string { const byLang = new Map(); for (const file of files) { const existing = byLang.get(file.language) || []; existing.push(file); byLang.set(file.language, existing); } const lines: string[] = [`## Files by Language (${files.length} total)`, '']; // Sort languages by file count (descending) const sortedLangs = [...byLang.entries()].sort((a, b) => b[1].length - a[1].length); for (const [lang, langFiles] of sortedLangs) { lines.push(`### ${lang} (${langFiles.length})`); for (const file of langFiles.sort((a, b) => a.path.localeCompare(b.path))) { if (includeMetadata) { lines.push(`- ${file.path} (${file.nodeCount} symbols)`); } else { lines.push(`- ${file.path}`); } } lines.push(''); } return lines.join('\n'); } /** * Format files as a tree structure */ private formatFilesTree( files: { path: string; language: string; nodeCount: number }[], includeMetadata: boolean, maxDepth?: number ): string { // Build tree structure interface TreeNode { name: string; children: Map; file?: { language: string; nodeCount: number }; } const root: TreeNode = { name: '', children: new Map() }; for (const file of files) { const parts = file.path.split('/'); let current = root; for (let i = 0; i < parts.length; i++) { const part = parts[i]; if (!part) continue; if (!current.children.has(part)) { current.children.set(part, { name: part, children: new Map() }); } current = current.children.get(part)!; // If this is the last part, it's a file if (i === parts.length - 1) { current.file = { language: file.language, nodeCount: file.nodeCount }; } } } // Render tree const lines: string[] = [`## Project Structure (${files.length} files)`, '']; const renderNode = (node: TreeNode, prefix: string, isLast: boolean, depth: number): void => { if (maxDepth !== undefined && depth > maxDepth) return; const connector = isLast ? '└── ' : '├── '; const childPrefix = isLast ? ' ' : '│ '; if (node.name) { let line = prefix + connector + node.name; if (node.file && includeMetadata) { line += ` (${node.file.language}, ${node.file.nodeCount} symbols)`; } lines.push(line); } const children = [...node.children.values()]; // Sort: directories first, then files, both alphabetically children.sort((a, b) => { const aIsDir = a.children.size > 0 && !a.file; const bIsDir = b.children.size > 0 && !b.file; if (aIsDir !== bIsDir) return aIsDir ? -1 : 1; return a.name.localeCompare(b.name); }); for (let i = 0; i < children.length; i++) { const child = children[i]!; const nextPrefix = node.name ? prefix + childPrefix : prefix; renderNode(child, nextPrefix, i === children.length - 1, depth + 1); } }; renderNode(root, '', true, 0); return lines.join('\n'); } // ========================================================================= // Symbol resolution helpers // ========================================================================= /** * Find a symbol by name, handling disambiguation when multiple matches exist. * Returns the best match and a note about alternatives if any. */ /** * Check if a node matches a symbol query. * * Accepts simple names (`run`) and three flavors of qualifier: * - dotted `Session.request` (TS/JS/Python) * - colon-pair `stage_apply::run` (Rust, C++, Ruby) * - slash `configurator/stage_apply` (path-ish) * * Multi-level qualifiers compose: `crate::configurator::stage_apply::run` * works. Rust path prefixes (`crate`, `super`, `self`) are stripped so * the canonical `crate::module::symbol` form resolves. * * Resolution order, last part must always equal `node.name`: * 1. Suffix-match against `qualifiedName` (handles class-scoped methods * where the extractor builds the qualified name from the AST stack) * 2. File-path containment (handles file-derived modules in Rust/ * Python — `stage_apply::run` matches a `run` in `stage_apply.rs`) */ private matchesSymbol(node: Node, symbol: string): boolean { // Simple name match if (node.name === symbol) return true; // File basename match (e.g., "product-card" matches "product-card.liquid") if (node.kind === 'file' && node.name.replace(/\.[^.]+$/, '') === symbol) return true; // Qualified-name lookups: split on any supported separator. `\w` keeps // identifier chars (incl. `_`) intact; everything else is treated as // a separator we tolerate. if (!/[.\/]|::/.test(symbol)) return false; const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0); if (parts.length < 2) return false; const lastPart = parts[parts.length - 1]!; if (node.name !== lastPart) return false; // Stage 1: qualified-name suffix match. The extractor joins the // semantic hierarchy with `::`, so `Session.request` and // `Session::request` both become `Session::request` here. const colonSuffix = parts.join('::'); if (node.qualifiedName.includes(colonSuffix)) return true; // Stage 2: file-path containment. Rust modules and Python packages // are not in `qualifiedName` — they're encoded in the file path. So // `stage_apply::run` matches a `run` in any file whose path // contains a `stage_apply` segment (with or without an extension). // // Filter out Rust path prefixes that have no file-system equivalent. const containerHints = parts.slice(0, -1).filter((p) => !RUST_PATH_PREFIXES.has(p)); if (containerHints.length === 0) return false; const segments = node.filePath.split('/').filter((s) => s.length > 0); return containerHints.every((hint) => segments.some((seg) => seg === hint || seg.replace(/\.[^.]+$/, '') === hint) ); } private findSymbol(cg: CodeGraph, symbol: string): { node: Node; note: string } | null { // Use higher limit for qualified lookups (e.g., "Session.request", // "stage_apply::run") since the target may rank lower in FTS when // there are many partial matches across the qualifier parts. const isQualified = /[.\/]|::/.test(symbol); const limit = isQualified ? 50 : 10; let results = cg.searchNodes(symbol, { limit }); // FTS strips colons as a special char, so `stage_apply::run` searches // for the literal `stage_applyrun` and finds nothing. Re-search by // the bare last part and let `matchesSymbol` filter by qualifier. if (isQualified && results.length === 0) { const tail = lastQualifierPart(symbol); if (tail && tail !== symbol) results = cg.searchNodes(tail, { limit }); } if (results.length === 0 || !results[0]) { return null; } const exactMatches = results.filter(r => this.matchesSymbol(r.node, symbol)); if (exactMatches.length === 1) { return { node: exactMatches[0]!.node, note: '' }; } if (exactMatches.length > 1) { // Multiple exact matches - pick first, note the others const picked = exactMatches[0]!.node; const others = exactMatches.slice(1).map(r => `${r.node.name} (${r.node.kind}) at ${r.node.filePath}:${r.node.startLine}` ); const note = `\n\n> **Note:** ${exactMatches.length} symbols named "${symbol}". Showing results for \`${picked.filePath}:${picked.startLine}\`. Others: ${others.join(', ')}`; return { node: picked, note }; } // No exact match. For qualified lookups, don't silently fall back // to a fuzzy result — the user typed a specific qualifier, and // resolving `stage_apply::nonexistent_fn` to the unrelated // `stage_apply.rs` file would be actively misleading (#173). if (isQualified) return null; return { node: results[0]!.node, note: '' }; } /** * Find ALL symbols matching a name. Used by callers/callees/impact to aggregate * results across all matching symbols (e.g., multiple classes with an `execute` method). */ private findAllSymbols(cg: CodeGraph, symbol: string): { nodes: Node[]; note: string } { let results = cg.searchNodes(symbol, { limit: 50 }); // Mirror the fallback in `findSymbol` for qualified queries — FTS // strips colons, so a module-qualified lookup needs a second pass // by the bare last part. if (results.length === 0 && /[.\/]|::/.test(symbol)) { const tail = lastQualifierPart(symbol); if (tail && tail !== symbol) results = cg.searchNodes(tail, { limit: 50 }); } if (results.length === 0) { return { nodes: [], note: '' }; } const exactMatches = results.filter(r => this.matchesSymbol(r.node, symbol)); if (exactMatches.length <= 1) { const node = exactMatches[0]?.node ?? results[0]!.node; return { nodes: [node], note: '' }; } const locations = exactMatches.map(r => `${r.node.kind} at ${r.node.filePath}:${r.node.startLine}` ); const note = `\n\n> **Note:** Aggregated results across ${exactMatches.length} symbols named "${symbol}": ${locations.join(', ')}`; return { nodes: exactMatches.map(r => r.node), note }; } /** * Truncate output if it exceeds the maximum length */ private truncateOutput(text: string): string { if (text.length <= MAX_OUTPUT_LENGTH) return text; const truncated = text.slice(0, MAX_OUTPUT_LENGTH); const lastNewline = truncated.lastIndexOf('\n'); const cutPoint = lastNewline > MAX_OUTPUT_LENGTH * 0.8 ? lastNewline : MAX_OUTPUT_LENGTH; return truncated.slice(0, cutPoint) + '\n\n... (output truncated)'; } // ========================================================================= // Formatting helpers (compact by default to reduce context usage) // ========================================================================= private formatSearchResults(results: SearchResult[]): string { const lines: string[] = [`## Search Results (${results.length} found)`, '']; for (const result of results) { const { node } = result; const location = node.startLine ? `:${node.startLine}` : ''; // Compact format: one line per result with key info lines.push(`### ${node.name} (${node.kind})`); lines.push(`${node.filePath}${location}`); if (node.signature) lines.push(`\`${node.signature}\``); lines.push(''); } return lines.join('\n'); } private formatNodeList(nodes: Node[], title: string): string { const lines: string[] = [`## ${title} (${nodes.length} found)`, '']; for (const node of nodes) { const location = node.startLine ? `:${node.startLine}` : ''; // Compact: just name, kind, location lines.push(`- ${node.name} (${node.kind}) - ${node.filePath}${location}`); } return lines.join('\n'); } private formatImpact(symbol: string, impact: Subgraph): string { const nodeCount = impact.nodes.size; // Compact format: just list affected symbols grouped by file const lines: string[] = [ `## Impact: "${symbol}" affects ${nodeCount} symbols`, '', ]; // Group by file const byFile = new Map(); for (const node of impact.nodes.values()) { const existing = byFile.get(node.filePath) || []; existing.push(node); byFile.set(node.filePath, existing); } for (const [file, nodes] of byFile) { lines.push(`**${file}:**`); // Compact: inline list const nodeList = nodes.map(n => `${n.name}:${n.startLine}`).join(', '); lines.push(nodeList); lines.push(''); } return lines.join('\n'); } /** * Build a compact structural outline of a container symbol from its * indexed children (methods, fields, properties, …) — name, kind, * line number, and signature — so the agent gets the shape of a class * without the full source of every method. Returns '' when the container * has no indexed children, so the caller can fall back to full source. */ private buildContainerOutline(cg: CodeGraph, node: Node): string { const children = cg.getChildren(node.id) .filter(c => c.kind !== 'import' && c.kind !== 'export') .sort((a, b) => (a.startLine ?? 0) - (b.startLine ?? 0)); if (children.length === 0) return ''; const lines = [`**Members (${children.length}):**`, '']; for (const c of children) { const loc = c.startLine ? `:${c.startLine}` : ''; const sig = c.signature ? ` — \`${c.signature}\`` : ''; lines.push(`- ${c.name} (${c.kind})${loc}${sig}`); } return lines.join('\n'); } private formatNodeDetails(node: Node, code: string | null, outline?: string | null): string { const location = node.startLine ? `:${node.startLine}` : ''; const lines: string[] = [ `## ${node.name} (${node.kind})`, '', `**Location:** ${node.filePath}${location}`, ]; if (node.signature) { lines.push(`**Signature:** \`${node.signature}\``); } // Only include docstring if it's short and useful if (node.docstring && node.docstring.length < 200) { lines.push('', node.docstring); } if (outline) { lines.push('', outline, '', `> Structural outline only. Read \`${node.filePath}\` or call codegraph_node on a specific member for its body.`); } else if (code) { lines.push('', '```' + node.language, code, '```'); } return lines.join('\n'); } private formatTaskContext(context: TaskContext): string { return context.summary || 'No context found'; } private textResult(text: string): ToolResult { return { content: [{ type: 'text', text }], }; } private errorResult(message: string): ToolResult { return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true, }; } }