tools.ts 70 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863
  1. /**
  2. * MCP Tool Definitions
  3. *
  4. * Defines the tools exposed by the CodeGraph MCP server.
  5. */
  6. import CodeGraph, { findNearestCodeGraphRoot } from '../index';
  7. import type { Node, Edge, SearchResult, Subgraph, TaskContext, NodeKind } from '../types';
  8. import { createHash } from 'crypto';
  9. import {
  10. constants as fsConstants,
  11. closeSync,
  12. existsSync,
  13. openSync,
  14. readFileSync,
  15. writeSync,
  16. } from 'fs';
  17. import { clamp, validatePathWithinRoot } from '../utils';
  18. import { tmpdir } from 'os';
  19. import { join } from 'path';
  20. /** Maximum output length to prevent context bloat (characters) */
  21. const MAX_OUTPUT_LENGTH = 15000;
  22. /**
  23. * Rust path roots that have no file-system equivalent — `crate` is the
  24. * current crate, `super` is the parent module, `self` is the current
  25. * module. Used by `matchesSymbol` to strip these before file-path
  26. * matching so `crate::configurator::stage_apply::run` resolves the
  27. * same as `configurator::stage_apply::run`.
  28. */
  29. const RUST_PATH_PREFIXES = new Set(['crate', 'super', 'self']);
  30. /**
  31. * Node kinds that contain other symbols. For these, `codegraph_node` with
  32. * `includeCode=true` returns a structural outline (member names + signatures
  33. * + line numbers) instead of the full body, which for a large class is a
  34. * multi-thousand-character wall of source that bloats the agent's context.
  35. */
  36. const CONTAINER_NODE_KINDS = new Set<NodeKind>([
  37. 'class', 'struct', 'interface', 'trait', 'protocol', 'enum', 'namespace', 'module',
  38. ]);
  39. /** Last `::` / `.` / `/`-separated segment of a qualified symbol. */
  40. function lastQualifierPart(symbol: string): string {
  41. const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0);
  42. return parts[parts.length - 1] ?? symbol;
  43. }
  44. /**
  45. * Calculate the recommended number of codegraph_explore calls based on project size.
  46. * Larger codebases need more exploration calls to cover their surface area,
  47. * but smaller ones should use fewer to avoid unnecessary overhead.
  48. */
  49. export function getExploreBudget(fileCount: number): number {
  50. if (fileCount < 500) return 1;
  51. if (fileCount < 5000) return 2;
  52. if (fileCount < 15000) return 3;
  53. if (fileCount < 25000) return 4;
  54. return 5;
  55. }
  56. /**
  57. * Adaptive output budget for `codegraph_explore`, scaled to project size.
  58. *
  59. * Smaller codebases get a tighter total cap, fewer default files, smaller
  60. * per-file cap, and tighter clustering — so a focused query on a 100-file
  61. * project doesn't dump a whole file's worth of source into the agent's
  62. * context. Larger codebases keep the generous defaults because the
  63. * agent's native discovery cost (grep + find + many Reads) genuinely
  64. * dwarfs a fat explore call at that scale.
  65. *
  66. * Meta-text (relationships map, "additional relevant files" list,
  67. * completeness signal, budget note) is gated off for tiny projects
  68. * where one rich call is the whole story and the extra prose is just
  69. * overhead.
  70. *
  71. * Tier breakpoints mirror `getExploreBudget` so a project sits in the
  72. * same tier across both knobs.
  73. */
  74. export interface ExploreOutputBudget {
  75. /** Hard cap on total output characters. */
  76. maxOutputChars: number;
  77. /** Default `maxFiles` when the caller didn't specify one. */
  78. defaultMaxFiles: number;
  79. /** Cap on contiguous source returned per file (across all its clusters). */
  80. maxCharsPerFile: number;
  81. /** Cluster gap threshold in lines — tighter clustering on small projects. */
  82. gapThreshold: number;
  83. /** Max symbols listed in the per-file header (`#### path — sym(kind), ...`). */
  84. maxSymbolsInFileHeader: number;
  85. /** Max edges shown per relationship kind in the Relationships section. */
  86. maxEdgesPerRelationshipKind: number;
  87. /** Include the "Relationships" section. */
  88. includeRelationships: boolean;
  89. /** Include the "Additional relevant files (not shown)" trailing list. */
  90. includeAdditionalFiles: boolean;
  91. /** Include the "Complete source code is included above…" reminder. */
  92. includeCompletenessSignal: boolean;
  93. /** Include the explore-budget reminder at the end. */
  94. includeBudgetNote: boolean;
  95. }
  96. export function getExploreOutputBudget(fileCount: number): ExploreOutputBudget {
  97. if (fileCount < 500) {
  98. return {
  99. maxOutputChars: 18000,
  100. defaultMaxFiles: 5,
  101. maxCharsPerFile: 3800,
  102. gapThreshold: 8,
  103. maxSymbolsInFileHeader: 6,
  104. maxEdgesPerRelationshipKind: 6,
  105. includeRelationships: true,
  106. includeAdditionalFiles: false,
  107. includeCompletenessSignal: false,
  108. includeBudgetNote: false,
  109. };
  110. }
  111. if (fileCount < 5000) {
  112. return {
  113. maxOutputChars: 13000,
  114. defaultMaxFiles: 6,
  115. maxCharsPerFile: 2500,
  116. gapThreshold: 10,
  117. maxSymbolsInFileHeader: 8,
  118. maxEdgesPerRelationshipKind: 8,
  119. includeRelationships: true,
  120. includeAdditionalFiles: true,
  121. includeCompletenessSignal: true,
  122. includeBudgetNote: true,
  123. };
  124. }
  125. if (fileCount < 15000) {
  126. return {
  127. maxOutputChars: 35000,
  128. defaultMaxFiles: 12,
  129. maxCharsPerFile: 7000,
  130. gapThreshold: 15,
  131. maxSymbolsInFileHeader: 15,
  132. maxEdgesPerRelationshipKind: 15,
  133. includeRelationships: true,
  134. includeAdditionalFiles: true,
  135. includeCompletenessSignal: true,
  136. includeBudgetNote: true,
  137. };
  138. }
  139. return {
  140. maxOutputChars: 38000,
  141. defaultMaxFiles: 14,
  142. maxCharsPerFile: 7000,
  143. gapThreshold: 15,
  144. maxSymbolsInFileHeader: 15,
  145. maxEdgesPerRelationshipKind: 15,
  146. includeRelationships: true,
  147. includeAdditionalFiles: true,
  148. includeCompletenessSignal: true,
  149. includeBudgetNote: true,
  150. };
  151. }
  152. /**
  153. * Whether `codegraph_explore` should prefix source lines with their line
  154. * numbers (cat -n style: `<num>\t<code>`).
  155. *
  156. * Line numbers let the agent cite `file:line` straight from the explore
  157. * payload instead of re-Reading the file just to find a line number — the
  158. * dominant residual cost on precise-tracing questions (#185 follow-up).
  159. *
  160. * Defaults ON. Set `CODEGRAPH_EXPLORE_LINENUMS=0` to disable (used by the
  161. * A/B harness to measure the payload-cost vs. read-savings tradeoff).
  162. */
  163. function exploreLineNumbersEnabled(): boolean {
  164. return process.env.CODEGRAPH_EXPLORE_LINENUMS !== '0';
  165. }
  166. /**
  167. * Prefix each line of a source slice with its 1-based line number, matching
  168. * the Read tool's `cat -n` convention (number + tab) so the agent treats it
  169. * the same way it treats Read output.
  170. *
  171. * @param slice contiguous source text (already extracted from the file)
  172. * @param firstLineNumber the 1-based line number of the slice's first line
  173. */
  174. function numberSourceLines(slice: string, firstLineNumber: number): string {
  175. const out: string[] = [];
  176. const split = slice.split('\n');
  177. for (let i = 0; i < split.length; i++) {
  178. out.push(`${firstLineNumber + i}\t${split[i]}`);
  179. }
  180. return out.join('\n');
  181. }
  182. /**
  183. * Mark a Claude session as having consulted MCP tools.
  184. * This enables Grep/Glob/Bash commands that would otherwise be blocked.
  185. *
  186. * Why the explicit openSync + O_NOFOLLOW dance instead of plain writeFileSync:
  187. * tmpdir() is world-writable on Linux (mode 1777), so on a shared multi-user
  188. * machine any other local user can pre-create `codegraph-consulted-<hash>` as
  189. * a symlink pointing at a file the victim owns. The old `writeFileSync` would
  190. * happily follow that link and overwrite the target's contents with the ISO
  191. * timestamp string (CWE-59). The session-id hash provides the predictability
  192. * gate, but it's defense-in-depth: if a session id ever surfaces in logs,
  193. * argv, or telemetry the attack becomes trivial, and the right fix is to not
  194. * follow links from /tmp paths in the first place.
  195. */
  196. function markSessionConsulted(sessionId: string): void {
  197. try {
  198. const hash = createHash('md5').update(sessionId).digest('hex').slice(0, 16);
  199. const markerPath = join(tmpdir(), `codegraph-consulted-${hash}`);
  200. // O_NOFOLLOW makes openSync throw ELOOP if markerPath is already a symlink.
  201. // O_CREAT + O_TRUNC keep the original "create-or-overwrite" semantics, and
  202. // mode 0o600 prevents readback by other local users (the marker payload is
  203. // benign, but narrowing the exposure costs nothing).
  204. const flags = fsConstants.O_WRONLY | fsConstants.O_CREAT | fsConstants.O_TRUNC | fsConstants.O_NOFOLLOW;
  205. const fd = openSync(markerPath, flags, 0o600);
  206. try {
  207. writeSync(fd, new Date().toISOString());
  208. } finally {
  209. closeSync(fd);
  210. }
  211. } catch {
  212. // Silently fail - don't break MCP on marker write failure. ELOOP from a
  213. // planted symlink lands here too, which is the intended behavior: refuse
  214. // to write rather than overwrite an attacker-chosen target.
  215. }
  216. }
  217. /**
  218. * MCP Tool definition
  219. */
  220. export interface ToolDefinition {
  221. name: string;
  222. description: string;
  223. inputSchema: {
  224. type: 'object';
  225. properties: Record<string, PropertySchema>;
  226. required?: string[];
  227. };
  228. }
  229. interface PropertySchema {
  230. type: string;
  231. description: string;
  232. enum?: string[];
  233. default?: unknown;
  234. }
  235. /**
  236. * Tool execution result
  237. */
  238. export interface ToolResult {
  239. content: Array<{
  240. type: 'text';
  241. text: string;
  242. }>;
  243. isError?: boolean;
  244. }
  245. /**
  246. * Common projectPath property for cross-project queries
  247. */
  248. const projectPathProperty: PropertySchema = {
  249. type: 'string',
  250. description: 'Path to a different project with .codegraph/ initialized. If omitted, uses current project. Use this to query other codebases.',
  251. };
  252. /**
  253. * All CodeGraph MCP tools
  254. *
  255. * Designed for minimal context usage - use codegraph_context as the primary tool,
  256. * and only use other tools for targeted follow-up queries.
  257. *
  258. * All tools support cross-project queries via the optional `projectPath` parameter.
  259. */
  260. export const tools: ToolDefinition[] = [
  261. {
  262. name: 'codegraph_search',
  263. description: 'Quick symbol search by name. Returns locations only (no code). Use codegraph_context instead for comprehensive task context.',
  264. inputSchema: {
  265. type: 'object',
  266. properties: {
  267. query: {
  268. type: 'string',
  269. description: 'Symbol name or partial name (e.g., "auth", "signIn", "UserService")',
  270. },
  271. kind: {
  272. type: 'string',
  273. description: 'Filter by node kind',
  274. enum: ['function', 'method', 'class', 'interface', 'type', 'variable', 'route', 'component'],
  275. },
  276. limit: {
  277. type: 'number',
  278. description: 'Maximum results (default: 10)',
  279. default: 10,
  280. },
  281. projectPath: projectPathProperty,
  282. },
  283. required: ['query'],
  284. },
  285. },
  286. {
  287. name: 'codegraph_context',
  288. description: 'PRIMARY TOOL — call this FIRST for any "how does X work", architecture, feature, or bug-context question. Composes search + node + callers + callees and returns entry points, related symbols, and key code in ONE call — usually enough to answer with no further search/Read/Grep. Prefer this over chaining codegraph_search + codegraph_node, and over codegraph_explore. NOTE: provides CODE context, not product requirements; for new features still clarify UX/edge cases with the user.',
  289. inputSchema: {
  290. type: 'object',
  291. properties: {
  292. task: {
  293. type: 'string',
  294. description: 'Description of the task, bug, or feature to build context for',
  295. },
  296. maxNodes: {
  297. type: 'number',
  298. description: 'Maximum symbols to include (default: 20)',
  299. default: 20,
  300. },
  301. includeCode: {
  302. type: 'boolean',
  303. description: 'Include code snippets for key symbols (default: true)',
  304. default: true,
  305. },
  306. projectPath: projectPathProperty,
  307. },
  308. required: ['task'],
  309. },
  310. },
  311. {
  312. name: 'codegraph_callers',
  313. description: 'Find all functions/methods that call a specific symbol. Useful for understanding usage patterns and impact of changes.',
  314. inputSchema: {
  315. type: 'object',
  316. properties: {
  317. symbol: {
  318. type: 'string',
  319. description: 'Name of the function, method, or class to find callers for',
  320. },
  321. limit: {
  322. type: 'number',
  323. description: 'Maximum number of callers to return (default: 20)',
  324. default: 20,
  325. },
  326. projectPath: projectPathProperty,
  327. },
  328. required: ['symbol'],
  329. },
  330. },
  331. {
  332. name: 'codegraph_callees',
  333. description: 'Find all functions/methods that a specific symbol calls. Useful for understanding dependencies and code flow.',
  334. inputSchema: {
  335. type: 'object',
  336. properties: {
  337. symbol: {
  338. type: 'string',
  339. description: 'Name of the function, method, or class to find callees for',
  340. },
  341. limit: {
  342. type: 'number',
  343. description: 'Maximum number of callees to return (default: 20)',
  344. default: 20,
  345. },
  346. projectPath: projectPathProperty,
  347. },
  348. required: ['symbol'],
  349. },
  350. },
  351. {
  352. name: 'codegraph_impact',
  353. description: 'Analyze the impact radius of changing a symbol. Shows what code could be affected by modifications.',
  354. inputSchema: {
  355. type: 'object',
  356. properties: {
  357. symbol: {
  358. type: 'string',
  359. description: 'Name of the symbol to analyze impact for',
  360. },
  361. depth: {
  362. type: 'number',
  363. description: 'How many levels of dependencies to traverse (default: 2)',
  364. default: 2,
  365. },
  366. projectPath: projectPathProperty,
  367. },
  368. required: ['symbol'],
  369. },
  370. },
  371. {
  372. name: 'codegraph_node',
  373. description: 'Get detailed info about ONE symbol (location, signature, docstring). Pass includeCode=true for source: a function/method returns its body; a class/interface/struct/enum returns a compact member OUTLINE (fields + method signatures + line numbers), not every method body — Read or codegraph_node a specific member for its body. Keep includeCode=false to minimize context. For SEVERAL related symbols, make ONE codegraph_explore (or codegraph_context) call instead of many node calls — repeated node calls each re-read the whole context and cost far more.',
  374. inputSchema: {
  375. type: 'object',
  376. properties: {
  377. symbol: {
  378. type: 'string',
  379. description: 'Name of the symbol to get details for',
  380. },
  381. includeCode: {
  382. type: 'boolean',
  383. description: 'Include full source code (default: false to minimize context)',
  384. default: false,
  385. },
  386. projectPath: projectPathProperty,
  387. },
  388. required: ['symbol'],
  389. },
  390. },
  391. {
  392. name: 'codegraph_explore',
  393. description: 'Returns source for SEVERAL related symbols grouped by file, plus a relationship map, in ONE capped call. This is the efficient way to inspect many related symbols at once — strongly prefer it over a series of codegraph_node or Read calls (each separate call re-reads the whole context, so 8 node calls cost far more than 1 explore). Use it after codegraph_context when you need to see the actual source of several symbols. Query with specific symbol/file/code terms, NOT natural-language sentences — run codegraph_search first to find names. Bad: "how are agent prompts loaded and passed to the CLI". Good: "renderStaticScene drawElementOnCanvas ShapeCache renderElement.ts".',
  394. inputSchema: {
  395. type: 'object',
  396. properties: {
  397. query: {
  398. type: 'string',
  399. description: 'Symbol names, file names, or short code terms to explore (e.g., "AuthService loginUser session-manager", "GraphTraverser BFS impact traversal.ts"). Use codegraph_search first to find relevant names.',
  400. },
  401. maxFiles: {
  402. type: 'number',
  403. description: 'Maximum number of files to include source code from (default: 12)',
  404. default: 12,
  405. },
  406. projectPath: projectPathProperty,
  407. },
  408. required: ['query'],
  409. },
  410. },
  411. {
  412. name: 'codegraph_status',
  413. description: 'Get the status of the CodeGraph index, including statistics about indexed files, nodes, and edges.',
  414. inputSchema: {
  415. type: 'object',
  416. properties: {
  417. projectPath: projectPathProperty,
  418. },
  419. },
  420. },
  421. {
  422. name: 'codegraph_files',
  423. description: 'REQUIRED for file/folder exploration. Get the project file structure from the CodeGraph index. Returns a tree view of all indexed files with metadata (language, symbol count). Much faster than Glob/filesystem scanning. Use this FIRST when exploring project structure, finding files, or understanding codebase organization.',
  424. inputSchema: {
  425. type: 'object',
  426. properties: {
  427. path: {
  428. type: 'string',
  429. description: 'Filter to files under this directory path (e.g., "src/components"). Returns all files if not specified.',
  430. },
  431. pattern: {
  432. type: 'string',
  433. description: 'Filter files matching this glob pattern (e.g., "*.tsx", "**/*.test.ts")',
  434. },
  435. format: {
  436. type: 'string',
  437. description: 'Output format: "tree" (hierarchical, default), "flat" (simple list), "grouped" (by language)',
  438. enum: ['tree', 'flat', 'grouped'],
  439. default: 'tree',
  440. },
  441. includeMetadata: {
  442. type: 'boolean',
  443. description: 'Include file metadata like language and symbol count (default: true)',
  444. default: true,
  445. },
  446. maxDepth: {
  447. type: 'number',
  448. description: 'Maximum directory depth to show (default: unlimited)',
  449. },
  450. projectPath: projectPathProperty,
  451. },
  452. },
  453. },
  454. ];
  455. /**
  456. * Tool handler that executes tools against a CodeGraph instance
  457. *
  458. * Supports cross-project queries via the projectPath parameter.
  459. * Other projects are opened on-demand and cached for performance.
  460. */
  461. export class ToolHandler {
  462. // Cache of opened CodeGraph instances for cross-project queries
  463. private projectCache: Map<string, CodeGraph> = new Map();
  464. // The directory the server last searched for a default project. Surfaced in
  465. // the "not initialized" error so users can see why detection missed.
  466. private defaultProjectHint: string | null = null;
  467. constructor(private cg: CodeGraph | null) {}
  468. /**
  469. * Update the default CodeGraph instance (e.g. after lazy initialization)
  470. */
  471. setDefaultCodeGraph(cg: CodeGraph): void {
  472. this.cg = cg;
  473. }
  474. /**
  475. * Record the directory the server tried to resolve the default project from.
  476. * Used only to make the "no default project" error actionable.
  477. */
  478. setDefaultProjectHint(searchedPath: string): void {
  479. this.defaultProjectHint = searchedPath;
  480. }
  481. /**
  482. * Whether a default CodeGraph instance is available
  483. */
  484. hasDefaultCodeGraph(): boolean {
  485. return this.cg !== null;
  486. }
  487. /**
  488. * Get tool definitions with dynamic descriptions based on project size.
  489. * The codegraph_explore tool description includes a budget recommendation
  490. * scaled to the number of indexed files.
  491. */
  492. getTools(): ToolDefinition[] {
  493. if (!this.cg) return tools;
  494. try {
  495. const stats = this.cg.getStats();
  496. const budget = getExploreBudget(stats.fileCount);
  497. return tools.map(tool => {
  498. if (tool.name === 'codegraph_explore') {
  499. return {
  500. ...tool,
  501. description: `${tool.description} Budget: make at most ${budget} calls for this project (${stats.fileCount.toLocaleString()} files indexed).`,
  502. };
  503. }
  504. return tool;
  505. });
  506. } catch {
  507. return tools;
  508. }
  509. }
  510. /**
  511. * Get CodeGraph instance for a project
  512. *
  513. * If projectPath is provided, opens that project's CodeGraph (cached).
  514. * Otherwise returns the default CodeGraph instance.
  515. *
  516. * Walks up parent directories to find the nearest .codegraph/ folder,
  517. * similar to how git finds .git/ directories.
  518. */
  519. private getCodeGraph(projectPath?: string): CodeGraph {
  520. if (!projectPath) {
  521. if (!this.cg) {
  522. const searched = this.defaultProjectHint ?? process.cwd();
  523. throw new Error(
  524. 'No CodeGraph project is loaded for this session.\n' +
  525. `Searched for a .codegraph/ directory starting from: ${searched}\n` +
  526. 'The index is likely fine — this is a working-directory detection issue: ' +
  527. "the MCP client launched the server outside your project and didn't report the " +
  528. 'workspace root. Fix it either way:\n' +
  529. ' • Pass projectPath to the tool call, e.g. projectPath: "/absolute/path/to/your/project"\n' +
  530. ' • Or add --path to the server\'s MCP config args: ["serve", "--mcp", "--path", "/absolute/path/to/your/project"]'
  531. );
  532. }
  533. return this.cg;
  534. }
  535. // Check cache first (using original path as key)
  536. if (this.projectCache.has(projectPath)) {
  537. return this.projectCache.get(projectPath)!;
  538. }
  539. // Walk up parent directories to find nearest .codegraph/
  540. const resolvedRoot = findNearestCodeGraphRoot(projectPath);
  541. if (!resolvedRoot) {
  542. throw new Error(`CodeGraph not initialized in ${projectPath}. Run 'codegraph init' in that project first.`);
  543. }
  544. // If the path resolves to the default project, reuse the already-open
  545. // default instance rather than opening a SECOND connection to the same DB.
  546. // A duplicate connection serializes reads against the watcher's auto-sync
  547. // writes; on the wasm backend (no WAL) that surfaces as intermittent
  548. // "database is locked" on concurrent tool calls. See issue #238. Deliberately
  549. // not cached under projectPath — the server owns and closes the default
  550. // instance, so routing it through projectCache.closeAll() would double-close it.
  551. if (this.cg && this.cg.getProjectRoot() === resolvedRoot) {
  552. return this.cg;
  553. }
  554. // Check if we already have this resolved root cached (different path, same project)
  555. if (this.projectCache.has(resolvedRoot)) {
  556. const cg = this.projectCache.get(resolvedRoot)!;
  557. // Cache under original path too for faster future lookups
  558. this.projectCache.set(projectPath, cg);
  559. return cg;
  560. }
  561. // Open and cache under both paths
  562. const cg = CodeGraph.openSync(resolvedRoot);
  563. this.projectCache.set(resolvedRoot, cg);
  564. if (projectPath !== resolvedRoot) {
  565. this.projectCache.set(projectPath, cg);
  566. }
  567. return cg;
  568. }
  569. /**
  570. * Close all cached project connections
  571. */
  572. closeAll(): void {
  573. for (const cg of this.projectCache.values()) {
  574. cg.close();
  575. }
  576. this.projectCache.clear();
  577. }
  578. /**
  579. * Validate that a value is a non-empty string
  580. */
  581. private validateString(value: unknown, name: string): string | ToolResult {
  582. if (typeof value !== 'string' || value.length === 0) {
  583. return this.errorResult(`${name} must be a non-empty string`);
  584. }
  585. return value;
  586. }
  587. /**
  588. * Execute a tool by name
  589. */
  590. async execute(toolName: string, args: Record<string, unknown>): Promise<ToolResult> {
  591. try {
  592. switch (toolName) {
  593. case 'codegraph_search':
  594. return await this.handleSearch(args);
  595. case 'codegraph_context':
  596. return await this.handleContext(args);
  597. case 'codegraph_callers':
  598. return await this.handleCallers(args);
  599. case 'codegraph_callees':
  600. return await this.handleCallees(args);
  601. case 'codegraph_impact':
  602. return await this.handleImpact(args);
  603. case 'codegraph_explore':
  604. return await this.handleExplore(args);
  605. case 'codegraph_node':
  606. return await this.handleNode(args);
  607. case 'codegraph_status':
  608. return await this.handleStatus(args);
  609. case 'codegraph_files':
  610. return await this.handleFiles(args);
  611. default:
  612. return this.errorResult(`Unknown tool: ${toolName}`);
  613. }
  614. } catch (err) {
  615. return this.errorResult(`Tool execution failed: ${err instanceof Error ? err.message : String(err)}`);
  616. }
  617. }
  618. /**
  619. * Handle codegraph_search
  620. */
  621. private async handleSearch(args: Record<string, unknown>): Promise<ToolResult> {
  622. const query = this.validateString(args.query, 'query');
  623. if (typeof query !== 'string') return query;
  624. const cg = this.getCodeGraph(args.projectPath as string | undefined);
  625. const kind = args.kind as string | undefined;
  626. const rawLimit = Number(args.limit) || 10;
  627. const limit = clamp(rawLimit, 1, 100);
  628. const results = cg.searchNodes(query, {
  629. limit,
  630. kinds: kind ? [kind as NodeKind] : undefined,
  631. });
  632. if (results.length === 0) {
  633. return this.textResult(`No results found for "${query}"`);
  634. }
  635. const formatted = this.formatSearchResults(results);
  636. return this.textResult(this.truncateOutput(formatted));
  637. }
  638. /**
  639. * Handle codegraph_context
  640. */
  641. private async handleContext(args: Record<string, unknown>): Promise<ToolResult> {
  642. const task = this.validateString(args.task, 'task');
  643. if (typeof task !== 'string') return task;
  644. // Mark session as consulted (enables Grep/Glob/Bash)
  645. const sessionId = process.env.CLAUDE_SESSION_ID;
  646. if (sessionId) {
  647. markSessionConsulted(sessionId);
  648. }
  649. const cg = this.getCodeGraph(args.projectPath as string | undefined);
  650. const maxNodes = (args.maxNodes as number) || 20;
  651. const includeCode = args.includeCode !== false;
  652. const context = await cg.buildContext(task, {
  653. maxNodes,
  654. includeCode,
  655. format: 'markdown',
  656. });
  657. // Detect if this looks like a feature request (vs bug fix or exploration)
  658. const isFeatureQuery = this.looksLikeFeatureRequest(task);
  659. const reminder = isFeatureQuery
  660. ? '\n\n⚠️ **Ask user:** UX preferences, edge cases, acceptance criteria'
  661. : '';
  662. // buildContext returns string when format is 'markdown'
  663. if (typeof context === 'string') {
  664. return this.textResult(context + reminder);
  665. }
  666. // If it returns TaskContext, format it
  667. return this.textResult(this.formatTaskContext(context) + reminder);
  668. }
  669. /**
  670. * Heuristic to detect if a query looks like a feature request
  671. */
  672. private looksLikeFeatureRequest(task: string): boolean {
  673. const featureKeywords = [
  674. 'add', 'create', 'implement', 'build', 'enable', 'allow',
  675. 'new feature', 'support for', 'ability to', 'want to',
  676. 'should be able', 'need to add', 'swap', 'edit', 'modify'
  677. ];
  678. const bugKeywords = [
  679. 'fix', 'bug', 'error', 'broken', 'crash', 'issue', 'problem',
  680. 'not working', 'fails', 'undefined', 'null'
  681. ];
  682. const explorationKeywords = [
  683. 'how does', 'where is', 'what is', 'find', 'show me',
  684. 'explain', 'understand', 'explore'
  685. ];
  686. const lowerTask = task.toLowerCase();
  687. // If it's clearly a bug or exploration, not a feature
  688. if (bugKeywords.some(k => lowerTask.includes(k))) return false;
  689. if (explorationKeywords.some(k => lowerTask.includes(k))) return false;
  690. // If it matches feature keywords, it's likely a feature request
  691. return featureKeywords.some(k => lowerTask.includes(k));
  692. }
  693. /**
  694. * Handle codegraph_callers
  695. */
  696. private async handleCallers(args: Record<string, unknown>): Promise<ToolResult> {
  697. const symbol = this.validateString(args.symbol, 'symbol');
  698. if (typeof symbol !== 'string') return symbol;
  699. const cg = this.getCodeGraph(args.projectPath as string | undefined);
  700. const limit = clamp((args.limit as number) || 20, 1, 100);
  701. const allMatches = this.findAllSymbols(cg, symbol);
  702. if (allMatches.nodes.length === 0) {
  703. return this.textResult(`Symbol "${symbol}" not found in the codebase`);
  704. }
  705. // Aggregate callers across all matching symbols
  706. const seen = new Set<string>();
  707. const allCallers: Node[] = [];
  708. for (const node of allMatches.nodes) {
  709. for (const c of cg.getCallers(node.id)) {
  710. if (!seen.has(c.node.id)) {
  711. seen.add(c.node.id);
  712. allCallers.push(c.node);
  713. }
  714. }
  715. }
  716. if (allCallers.length === 0) {
  717. return this.textResult(`No callers found for "${symbol}"${allMatches.note}`);
  718. }
  719. const formatted = this.formatNodeList(allCallers.slice(0, limit), `Callers of ${symbol}`) + allMatches.note;
  720. return this.textResult(this.truncateOutput(formatted));
  721. }
  722. /**
  723. * Handle codegraph_callees
  724. */
  725. private async handleCallees(args: Record<string, unknown>): Promise<ToolResult> {
  726. const symbol = this.validateString(args.symbol, 'symbol');
  727. if (typeof symbol !== 'string') return symbol;
  728. const cg = this.getCodeGraph(args.projectPath as string | undefined);
  729. const limit = clamp((args.limit as number) || 20, 1, 100);
  730. const allMatches = this.findAllSymbols(cg, symbol);
  731. if (allMatches.nodes.length === 0) {
  732. return this.textResult(`Symbol "${symbol}" not found in the codebase`);
  733. }
  734. // Aggregate callees across all matching symbols
  735. const seen = new Set<string>();
  736. const allCallees: Node[] = [];
  737. for (const node of allMatches.nodes) {
  738. for (const c of cg.getCallees(node.id)) {
  739. if (!seen.has(c.node.id)) {
  740. seen.add(c.node.id);
  741. allCallees.push(c.node);
  742. }
  743. }
  744. }
  745. if (allCallees.length === 0) {
  746. return this.textResult(`No callees found for "${symbol}"${allMatches.note}`);
  747. }
  748. const formatted = this.formatNodeList(allCallees.slice(0, limit), `Callees of ${symbol}`) + allMatches.note;
  749. return this.textResult(this.truncateOutput(formatted));
  750. }
  751. /**
  752. * Handle codegraph_impact
  753. */
  754. private async handleImpact(args: Record<string, unknown>): Promise<ToolResult> {
  755. const symbol = this.validateString(args.symbol, 'symbol');
  756. if (typeof symbol !== 'string') return symbol;
  757. const cg = this.getCodeGraph(args.projectPath as string | undefined);
  758. const depth = clamp((args.depth as number) || 2, 1, 10);
  759. const allMatches = this.findAllSymbols(cg, symbol);
  760. if (allMatches.nodes.length === 0) {
  761. return this.textResult(`Symbol "${symbol}" not found in the codebase`);
  762. }
  763. // Aggregate impact across all matching symbols
  764. const mergedNodes = new Map<string, Node>();
  765. const mergedEdges: Edge[] = [];
  766. const seenEdges = new Set<string>();
  767. for (const node of allMatches.nodes) {
  768. const impact = cg.getImpactRadius(node.id, depth);
  769. for (const [id, n] of impact.nodes) {
  770. mergedNodes.set(id, n);
  771. }
  772. for (const e of impact.edges) {
  773. const key = `${e.source}->${e.target}:${e.kind}`;
  774. if (!seenEdges.has(key)) {
  775. seenEdges.add(key);
  776. mergedEdges.push(e);
  777. }
  778. }
  779. }
  780. const mergedImpact = {
  781. nodes: mergedNodes,
  782. edges: mergedEdges,
  783. roots: allMatches.nodes.map(n => n.id),
  784. };
  785. const formatted = this.formatImpact(symbol, mergedImpact) + allMatches.note;
  786. return this.textResult(this.truncateOutput(formatted));
  787. }
  788. /**
  789. * Handle codegraph_explore — deep exploration in a single call
  790. *
  791. * Strategy: find relevant symbols via graph traversal, group by file,
  792. * then read contiguous file sections covering all symbols per file.
  793. * This replaces multiple codegraph_node + Read calls.
  794. *
  795. * Output size is adaptive to project file count via
  796. * `getExploreOutputBudget` — see #185 for why a fixed 35k cap was a
  797. * tax on small projects while earning its keep on large ones.
  798. */
  799. private async handleExplore(args: Record<string, unknown>): Promise<ToolResult> {
  800. const query = this.validateString(args.query, 'query');
  801. if (typeof query !== 'string') return query;
  802. const cg = this.getCodeGraph(args.projectPath as string | undefined);
  803. const projectRoot = cg.getProjectRoot();
  804. // Resolve adaptive output budget from project size. Falls back to the
  805. // largest-tier defaults if stats aren't available, which preserves
  806. // pre-#185 behavior for callers that hit the rare stats failure.
  807. let budget: ExploreOutputBudget;
  808. try {
  809. budget = getExploreOutputBudget(cg.getStats().fileCount);
  810. } catch {
  811. budget = getExploreOutputBudget(Infinity);
  812. }
  813. const maxFiles = clamp((args.maxFiles as number) || budget.defaultMaxFiles, 1, 20);
  814. // Step 1: Find relevant context with generous parameters.
  815. // Use a large maxNodes budget — explore has its own 35k char output limit
  816. // that prevents context bloat, so more nodes just means better coverage
  817. // across entry points (especially for large files like Svelte components).
  818. const subgraph = await cg.findRelevantContext(query, {
  819. searchLimit: 8,
  820. traversalDepth: 3,
  821. maxNodes: 200,
  822. minScore: 0.2,
  823. });
  824. if (subgraph.nodes.size === 0) {
  825. return this.textResult(`No relevant code found for "${query}"`);
  826. }
  827. // Step 2: Group nodes by file, score by relevance
  828. const fileGroups = new Map<string, { nodes: Node[]; score: number }>();
  829. const entryNodeIds = new Set(subgraph.roots);
  830. // Build a set of nodes directly connected to entry points (depth 1)
  831. const connectedToEntry = new Set<string>();
  832. for (const edge of subgraph.edges) {
  833. if (entryNodeIds.has(edge.source)) connectedToEntry.add(edge.target);
  834. if (entryNodeIds.has(edge.target)) connectedToEntry.add(edge.source);
  835. }
  836. for (const node of subgraph.nodes.values()) {
  837. // Skip import/export nodes — they add noise without information
  838. if (node.kind === 'import' || node.kind === 'export') continue;
  839. const group = fileGroups.get(node.filePath) || { nodes: [], score: 0 };
  840. group.nodes.push(node);
  841. // Score: entry point nodes worth 10, directly connected worth 3, others worth 1
  842. if (entryNodeIds.has(node.id)) {
  843. group.score += 10;
  844. } else if (connectedToEntry.has(node.id)) {
  845. group.score += 3;
  846. } else {
  847. group.score += 1;
  848. }
  849. fileGroups.set(node.filePath, group);
  850. }
  851. // Only include files that have entry points or nodes directly connected to entry points
  852. const relevantFiles = [...fileGroups.entries()].filter(([, group]) => group.score >= 3);
  853. // Extract query terms for relevance checking
  854. const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
  855. // Sort files: highest relevance first, deprioritize low-value files
  856. const sortedFiles = relevantFiles.sort((a, b) => {
  857. const aPath = a[0].toLowerCase();
  858. const bPath = b[0].toLowerCase();
  859. // Check if any node name or file path relates to query terms
  860. const hasQueryRelevance = (filePath: string, nodes: Node[]) => {
  861. const fp = filePath.toLowerCase();
  862. if (queryTerms.some(t => fp.includes(t))) return true;
  863. return nodes.some(n => queryTerms.some(t => n.name.toLowerCase().includes(t)));
  864. };
  865. const aRelevant = hasQueryRelevance(aPath, a[1].nodes);
  866. const bRelevant = hasQueryRelevance(bPath, b[1].nodes);
  867. if (aRelevant !== bRelevant) return aRelevant ? -1 : 1;
  868. // Deprioritize test files, icon files, and i18n files
  869. const isLowValue = (p: string) =>
  870. /\/(tests?|__tests?__|spec)\//i.test(p) ||
  871. /\bicons?\b/i.test(p) ||
  872. /\bi18n\b/i.test(p);
  873. const aLow = isLowValue(aPath);
  874. const bLow = isLowValue(bPath);
  875. if (aLow !== bLow) return aLow ? 1 : -1;
  876. if (a[1].score !== b[1].score) return b[1].score - a[1].score;
  877. return b[1].nodes.length - a[1].nodes.length;
  878. });
  879. // Step 3: Build relationship map
  880. const lines: string[] = [
  881. `## Exploration: ${query}`,
  882. '',
  883. `Found ${subgraph.nodes.size} symbols across ${fileGroups.size} files.`,
  884. '',
  885. ];
  886. // Relationship map — show how symbols connect
  887. const significantEdges = subgraph.edges.filter(e =>
  888. e.kind !== 'contains' // skip contains — it's implied by file grouping
  889. );
  890. if (budget.includeRelationships && significantEdges.length > 0) {
  891. lines.push('### Relationships');
  892. lines.push('');
  893. // Group edges by kind for readability
  894. const byKind = new Map<string, Array<{ source: string; target: string }>>();
  895. for (const edge of significantEdges) {
  896. const sourceNode = subgraph.nodes.get(edge.source);
  897. const targetNode = subgraph.nodes.get(edge.target);
  898. if (!sourceNode || !targetNode) continue;
  899. const group = byKind.get(edge.kind) || [];
  900. group.push({ source: sourceNode.name, target: targetNode.name });
  901. byKind.set(edge.kind, group);
  902. }
  903. for (const [kind, edges] of byKind) {
  904. const cap = budget.maxEdgesPerRelationshipKind;
  905. const shown = edges.slice(0, cap);
  906. lines.push(`**${kind}:**`);
  907. for (const e of shown) {
  908. lines.push(`- ${e.source} → ${e.target}`);
  909. }
  910. if (edges.length > cap) {
  911. lines.push(`- ... and ${edges.length - cap} more`);
  912. }
  913. lines.push('');
  914. }
  915. }
  916. // Step 4: Read contiguous file sections
  917. lines.push('### Source Code');
  918. lines.push('');
  919. let totalChars = lines.join('\n').length;
  920. let filesIncluded = 0;
  921. let anyFileTrimmed = false;
  922. for (const [filePath, group] of sortedFiles) {
  923. if (filesIncluded >= maxFiles) break;
  924. if (totalChars > budget.maxOutputChars * 0.9) break;
  925. const absPath = validatePathWithinRoot(projectRoot, filePath);
  926. if (!absPath || !existsSync(absPath)) continue;
  927. let fileContent: string;
  928. try {
  929. fileContent = readFileSync(absPath, 'utf-8');
  930. } catch {
  931. continue;
  932. }
  933. const fileLines = fileContent.split('\n');
  934. const lang = group.nodes[0]?.language || '';
  935. // Cluster nearby symbols to avoid reading huge gaps between distant symbols.
  936. // Sort by start line, then merge overlapping/adjacent ranges (within the
  937. // adaptive gap threshold). Include both node ranges AND edge source
  938. // locations so template sections with component usages/calls are
  939. // covered (not just script block symbols).
  940. //
  941. // Each range carries an `importance` score so we can rank clusters
  942. // when the per-file budget forces us to drop some: entry-point nodes
  943. // are worth 10, directly-connected nodes 3, peripheral nodes 1, and
  944. // bare edge-source lines 2 (less than a connected node but more than
  945. // a peripheral one — they hint at a reference but aren't a definition).
  946. // Container kinds whose body can span most/all of a file. When such a
  947. // node covers most of the file we drop it from the ranges: keeping it
  948. // would merge every method inside it into one giant cluster spanning
  949. // the whole file, which then tail-trims down to just the container's
  950. // opening lines (its header/declarations) and buries the methods the
  951. // query actually asked about (#185 follow-up — Session.swift in
  952. // Alamofire is the canonical case: the `Session` class spans ~1,400
  953. // lines). We want the granular symbols inside, not the envelope.
  954. const ENVELOPE_KINDS = new Set(['file', 'module', 'class', 'struct', 'interface', 'enum', 'namespace', 'protocol', 'trait', 'component']);
  955. const ranges: Array<{ start: number; end: number; name: string; kind: string; importance: number }> = group.nodes
  956. .filter(n => n.startLine > 0 && n.endLine > 0)
  957. // Drop whole-file envelope nodes (containers covering >50% of the file).
  958. .filter(n => !(ENVELOPE_KINDS.has(n.kind) && (n.endLine - n.startLine + 1) > fileLines.length * 0.5))
  959. .map(n => {
  960. let importance = 1;
  961. if (entryNodeIds.has(n.id)) importance = 10;
  962. else if (connectedToEntry.has(n.id)) importance = 3;
  963. return { start: n.startLine, end: n.endLine, name: n.name, kind: n.kind, importance };
  964. });
  965. // Add edge source locations in this file — captures template references
  966. // (component usages, event handlers) that aren't nodes themselves.
  967. // Query edges directly from the DB (not just the subgraph) because BFS
  968. // traversal may have pruned template reference targets due to node budget.
  969. const edgeLines = new Set<string>(); // dedup by "line:name"
  970. for (const node of group.nodes) {
  971. const outgoing = cg.getOutgoingEdges(node.id);
  972. for (const edge of outgoing) {
  973. if (!edge.line || edge.line <= 0 || edge.kind === 'contains') continue;
  974. const key = `${edge.line}:${edge.target}`;
  975. if (edgeLines.has(key)) continue;
  976. edgeLines.add(key);
  977. // Look up target name from subgraph first, fall back to edge kind
  978. const targetNode = subgraph.nodes.get(edge.target);
  979. const targetName = targetNode?.name ?? edge.kind;
  980. ranges.push({ start: edge.line, end: edge.line, name: targetName, kind: edge.kind, importance: 2 });
  981. }
  982. }
  983. ranges.sort((a, b) => a.start - b.start);
  984. if (ranges.length === 0) continue;
  985. const gapThreshold = budget.gapThreshold;
  986. const clusters: Array<{ start: number; end: number; symbols: string[]; score: number; maxImportance: number }> = [];
  987. let current = {
  988. start: ranges[0]!.start,
  989. end: ranges[0]!.end,
  990. symbols: [`${ranges[0]!.name}(${ranges[0]!.kind})`],
  991. score: ranges[0]!.importance,
  992. maxImportance: ranges[0]!.importance,
  993. };
  994. for (let i = 1; i < ranges.length; i++) {
  995. const r = ranges[i]!;
  996. if (r.start <= current.end + gapThreshold) {
  997. current.end = Math.max(current.end, r.end);
  998. current.symbols.push(`${r.name}(${r.kind})`);
  999. current.score += r.importance;
  1000. current.maxImportance = Math.max(current.maxImportance, r.importance);
  1001. } else {
  1002. clusters.push(current);
  1003. current = {
  1004. start: r.start,
  1005. end: r.end,
  1006. symbols: [`${r.name}(${r.kind})`],
  1007. score: r.importance,
  1008. maxImportance: r.importance,
  1009. };
  1010. }
  1011. }
  1012. clusters.push(current);
  1013. // Build file section output from clusters, capped by per-file budget.
  1014. // The pathological case (#185): a file like Session.swift where every
  1015. // method is adjacent collapses into one cluster spanning the whole
  1016. // file, and dumping that into the agent's context is most of the
  1017. // token cost on small projects. We pick clusters in priority order
  1018. // until the per-file char cap is hit. Truly enormous single clusters
  1019. // get tail-trimmed with a marker.
  1020. const contextPadding = 3;
  1021. const withLineNumbers = exploreLineNumbersEnabled();
  1022. const buildSection = (c: { start: number; end: number }): string => {
  1023. const startIdx = Math.max(0, c.start - 1 - contextPadding);
  1024. const endIdx = Math.min(fileLines.length, c.end + contextPadding);
  1025. const slice = fileLines.slice(startIdx, endIdx).join('\n');
  1026. // startIdx is 0-based, so the slice's first line is line startIdx + 1.
  1027. return withLineNumbers ? numberSourceLines(slice, startIdx + 1) : slice;
  1028. };
  1029. // Language-neutral separator (no `//` — not a comment in Python, Ruby,
  1030. // etc.). With line numbers on, the line-number jump also signals the gap.
  1031. const GAP_MARKER = '\n\n... (gap) ...\n\n';
  1032. // Rank clusters for inclusion under the per-file cap. Entry-point
  1033. // clusters come first: a cluster containing a query entry point
  1034. // (importance 10) must outrank a dense block of mere declarations,
  1035. // otherwise on a large file like Session.swift the top-of-file class
  1036. // header + property list (many adjacent low-importance nodes, high
  1037. // density) wins the budget and buries the actual methods the query
  1038. // asked about (perform/didCreateURLRequest/task live deep in the
  1039. // file). Within the same importance tier, prefer density (score per
  1040. // line) so we still favor focused clusters over sprawling ones, then
  1041. // smaller span as a cheap-to-include tiebreak.
  1042. const rankedClusters = clusters
  1043. .map((c, i) => ({ idx: i, span: c.end - c.start + 1, c }))
  1044. .sort((a, b) => {
  1045. if (b.c.maxImportance !== a.c.maxImportance) return b.c.maxImportance - a.c.maxImportance;
  1046. const densityA = a.c.score / a.span;
  1047. const densityB = b.c.score / b.span;
  1048. if (densityB !== densityA) return densityB - densityA;
  1049. if (b.c.score !== a.c.score) return b.c.score - a.c.score;
  1050. return a.span - b.span;
  1051. });
  1052. const chosenIndices = new Set<number>();
  1053. let projectedChars = 0;
  1054. for (const rc of rankedClusters) {
  1055. const sectionLen = buildSection(rc.c).length + (chosenIndices.size > 0 ? GAP_MARKER.length : 0);
  1056. // Always take the top-ranked cluster, even if oversize, so we don't
  1057. // return an empty file section (agent would then re-Read the file,
  1058. // negating the savings).
  1059. if (chosenIndices.size === 0) {
  1060. chosenIndices.add(rc.idx);
  1061. projectedChars += sectionLen;
  1062. continue;
  1063. }
  1064. if (projectedChars + sectionLen > budget.maxCharsPerFile) continue;
  1065. chosenIndices.add(rc.idx);
  1066. projectedChars += sectionLen;
  1067. }
  1068. // Emit chosen clusters in source order so the file reads top-to-bottom.
  1069. let fileSection = '';
  1070. const allSymbols: string[] = [];
  1071. let fileTrimmed = false;
  1072. for (let i = 0; i < clusters.length; i++) {
  1073. if (!chosenIndices.has(i)) continue;
  1074. const cluster = clusters[i]!;
  1075. const section = buildSection(cluster);
  1076. if (fileSection.length > 0) fileSection += GAP_MARKER;
  1077. fileSection += section;
  1078. allSymbols.push(...cluster.symbols);
  1079. }
  1080. // If a single chosen cluster is still oversize (long monolithic
  1081. // function), tail-trim it. Better one trimmed view than nothing.
  1082. if (fileSection.length > budget.maxCharsPerFile) {
  1083. fileSection = fileSection.slice(0, budget.maxCharsPerFile) + '\n... (trimmed) ...';
  1084. fileTrimmed = true;
  1085. }
  1086. if (chosenIndices.size < clusters.length || fileTrimmed) {
  1087. anyFileTrimmed = true;
  1088. }
  1089. // Dedupe + cap the symbols list shown in the per-file header. Some
  1090. // files (Session.swift in Alamofire) produced 3.4KB symbol lists
  1091. // from cluster scoring + edge-source lines, dwarfing the per-file
  1092. // body cap. Show top names by frequency, with a "+N more" tail.
  1093. const symbolCounts = new Map<string, number>();
  1094. for (const s of allSymbols) {
  1095. symbolCounts.set(s, (symbolCounts.get(s) ?? 0) + 1);
  1096. }
  1097. const sortedSymbols = [...symbolCounts.entries()]
  1098. .sort((a, b) => b[1] - a[1])
  1099. .map(([name]) => name);
  1100. const headerCap = budget.maxSymbolsInFileHeader;
  1101. const headerSymbols = sortedSymbols.slice(0, headerCap);
  1102. const omittedCount = sortedSymbols.length - headerSymbols.length;
  1103. const headerSuffix = omittedCount > 0
  1104. ? `${headerSymbols.join(', ')}, +${omittedCount} more`
  1105. : headerSymbols.join(', ');
  1106. const fileHeader = `#### ${filePath} — ${headerSuffix}`;
  1107. // Respect the total output cap on a file-by-file basis.
  1108. if (totalChars + fileSection.length + 200 > budget.maxOutputChars) {
  1109. const remaining = budget.maxOutputChars - totalChars - 200;
  1110. if (remaining < 500) break;
  1111. const trimmed = fileSection.slice(0, remaining) + '\n... (trimmed) ...';
  1112. lines.push(fileHeader);
  1113. lines.push('');
  1114. lines.push('```' + lang);
  1115. lines.push(trimmed);
  1116. lines.push('```');
  1117. lines.push('');
  1118. totalChars += trimmed.length + 200;
  1119. filesIncluded++;
  1120. anyFileTrimmed = true;
  1121. break;
  1122. }
  1123. lines.push(fileHeader);
  1124. lines.push('');
  1125. lines.push('```' + lang);
  1126. lines.push(fileSection);
  1127. lines.push('```');
  1128. lines.push('');
  1129. totalChars += fileSection.length + 200;
  1130. filesIncluded++;
  1131. }
  1132. // Add remaining files as references (from both relevant and peripheral files).
  1133. // Small projects (per budget) skip this — the relevant story already fits
  1134. // in the source section, and a trailing pointer list is pure overhead.
  1135. if (budget.includeAdditionalFiles) {
  1136. const remainingRelevant = sortedFiles.slice(filesIncluded);
  1137. const peripheralFiles = [...fileGroups.entries()]
  1138. .filter(([, group]) => group.score < 3)
  1139. .sort((a, b) => b[1].score - a[1].score);
  1140. const remainingFiles = [...remainingRelevant, ...peripheralFiles];
  1141. if (remainingFiles.length > 0) {
  1142. lines.push('### Additional relevant files (not shown)');
  1143. lines.push('');
  1144. for (const [filePath, group] of remainingFiles.slice(0, 10)) {
  1145. const symbols = group.nodes.map(n => `${n.name}:${n.startLine}`).join(', ');
  1146. lines.push(`- ${filePath}: ${symbols}`);
  1147. }
  1148. if (remainingFiles.length > 10) {
  1149. lines.push(`- ... and ${remainingFiles.length - 10} more files`);
  1150. }
  1151. }
  1152. }
  1153. // Add completeness signal so agents know they don't need to re-read these files.
  1154. // On small projects the budget gates this off — but if we actually had to
  1155. // trim or drop clusters, surface a brief note so the agent knows it can
  1156. // still Read for more detail.
  1157. if (budget.includeCompletenessSignal) {
  1158. lines.push('');
  1159. lines.push('---');
  1160. lines.push(`> **Complete source code is included above for ${filesIncluded} files.** You do NOT need to re-read these files — the relevant sections are already shown in full. Only use Read/Grep for files listed under "Additional relevant files" if you need more detail.`);
  1161. } else if (anyFileTrimmed) {
  1162. lines.push('');
  1163. lines.push(`> Some file sections were trimmed for size. Use \`codegraph_node\` or Read for the full source if needed.`);
  1164. }
  1165. // Add explore budget note based on project size
  1166. if (budget.includeBudgetNote) {
  1167. try {
  1168. const stats = cg.getStats();
  1169. const callBudget = getExploreBudget(stats.fileCount);
  1170. lines.push('');
  1171. lines.push(`> **Explore budget: ${callBudget} calls max for this project (${stats.fileCount.toLocaleString()} files indexed).** Stop exploring and synthesize your answer once you've used ${callBudget} calls — do NOT make additional explore calls beyond this budget.`);
  1172. } catch {
  1173. // Stats unavailable — skip budget note
  1174. }
  1175. }
  1176. // Hard-cap to the adaptive budget. The per-file loop bounds the source
  1177. // sections, but the relationship map, additional-files list, and
  1178. // completeness/budget notes can still push the assembled output past
  1179. // maxOutputChars (observed 30k against a 28k tier cap). A fat explore
  1180. // payload persists in the agent's context and is re-read as cache-input
  1181. // on every subsequent turn, so the overrun is paid many times over.
  1182. const output = lines.join('\n');
  1183. if (output.length > budget.maxOutputChars) {
  1184. const cut = output.slice(0, budget.maxOutputChars);
  1185. const lastNewline = cut.lastIndexOf('\n');
  1186. const safe = lastNewline > budget.maxOutputChars * 0.8 ? cut.slice(0, lastNewline) : cut;
  1187. return this.textResult(safe + '\n\n... (explore output truncated to budget — use codegraph_node or Read for more)');
  1188. }
  1189. return this.textResult(output);
  1190. }
  1191. /**
  1192. * Handle codegraph_node
  1193. */
  1194. private async handleNode(args: Record<string, unknown>): Promise<ToolResult> {
  1195. const symbol = this.validateString(args.symbol, 'symbol');
  1196. if (typeof symbol !== 'string') return symbol;
  1197. const cg = this.getCodeGraph(args.projectPath as string | undefined);
  1198. // Default to false to minimize context usage
  1199. const includeCode = args.includeCode === true;
  1200. const match = this.findSymbol(cg, symbol);
  1201. if (!match) {
  1202. return this.textResult(`Symbol "${symbol}" not found in the codebase`);
  1203. }
  1204. let code: string | null = null;
  1205. let outline: string | null = null;
  1206. if (includeCode) {
  1207. // For container symbols (class/interface/struct/…), the full body is the
  1208. // sum of every method body — a wall of source (e.g. a 10k-char class)
  1209. // that bloats context and is rarely needed in full. Return a structural
  1210. // outline (members + signatures + line numbers) instead; the agent can
  1211. // Read or codegraph_node a specific method for its body. Leaf symbols
  1212. // (function/method/etc.) return their full body as before.
  1213. if (CONTAINER_NODE_KINDS.has(match.node.kind)) {
  1214. outline = this.buildContainerOutline(cg, match.node);
  1215. }
  1216. if (!outline) {
  1217. code = await cg.getCode(match.node.id);
  1218. }
  1219. }
  1220. const formatted = this.formatNodeDetails(match.node, code, outline) + match.note;
  1221. return this.textResult(this.truncateOutput(formatted));
  1222. }
  1223. /**
  1224. * Handle codegraph_status
  1225. */
  1226. private async handleStatus(args: Record<string, unknown>): Promise<ToolResult> {
  1227. const cg = this.getCodeGraph(args.projectPath as string | undefined);
  1228. const stats = cg.getStats();
  1229. const lines: string[] = [
  1230. '## CodeGraph Status',
  1231. '',
  1232. `**Files indexed:** ${stats.fileCount}`,
  1233. `**Total nodes:** ${stats.nodeCount}`,
  1234. `**Total edges:** ${stats.edgeCount}`,
  1235. `**Database size:** ${(stats.dbSizeBytes / 1024 / 1024).toFixed(2)} MB`,
  1236. ];
  1237. // Surface the active SQLite backend (node:sqlite, Node's built-in real
  1238. // SQLite — full WAL + FTS5, no native build).
  1239. lines.push(`**Backend:** node:sqlite (Node built-in) — full WAL + FTS5`);
  1240. // Effective journal mode. 'wal' ⇒ concurrent reads never block on a writer;
  1241. // anything else ⇒ they can ("database is locked"). node:sqlite supports WAL
  1242. // everywhere, so a non-wal mode means the filesystem can't (network/
  1243. // virtualized mounts, WSL2 /mnt). See issue #238.
  1244. const journalMode = cg.getJournalMode();
  1245. if (journalMode === 'wal') {
  1246. lines.push(`**Journal mode:** wal (concurrent reads safe)`);
  1247. } else {
  1248. lines.push(
  1249. `**Journal mode:** ⚠ ${journalMode || 'unknown'} — WAL not active, so reads ` +
  1250. `can block on a concurrent write (WAL appears unsupported on this filesystem)`
  1251. );
  1252. }
  1253. lines.push('', '### Nodes by Kind:');
  1254. for (const [kind, count] of Object.entries(stats.nodesByKind)) {
  1255. if ((count as number) > 0) {
  1256. lines.push(`- ${kind}: ${count}`);
  1257. }
  1258. }
  1259. lines.push('', '### Languages:');
  1260. for (const [lang, count] of Object.entries(stats.filesByLanguage)) {
  1261. if ((count as number) > 0) {
  1262. lines.push(`- ${lang}: ${count}`);
  1263. }
  1264. }
  1265. return this.textResult(lines.join('\n'));
  1266. }
  1267. /**
  1268. * Handle codegraph_files - get project file structure from the index
  1269. */
  1270. private async handleFiles(args: Record<string, unknown>): Promise<ToolResult> {
  1271. const cg = this.getCodeGraph(args.projectPath as string | undefined);
  1272. const pathFilter = args.path as string | undefined;
  1273. const pattern = args.pattern as string | undefined;
  1274. const format = (args.format as 'tree' | 'flat' | 'grouped') || 'tree';
  1275. const includeMetadata = args.includeMetadata !== false;
  1276. const maxDepth = args.maxDepth != null ? clamp(args.maxDepth as number, 1, 20) : undefined;
  1277. // Get all files from the index
  1278. const allFiles = cg.getFiles();
  1279. if (allFiles.length === 0) {
  1280. return this.textResult('No files indexed. Run `codegraph index` first.');
  1281. }
  1282. // Filter by path prefix
  1283. let files = pathFilter
  1284. ? allFiles.filter(f => f.path.startsWith(pathFilter) || f.path.startsWith('./' + pathFilter))
  1285. : allFiles;
  1286. // Filter by glob pattern
  1287. if (pattern) {
  1288. const regex = this.globToRegex(pattern);
  1289. files = files.filter(f => regex.test(f.path));
  1290. }
  1291. if (files.length === 0) {
  1292. return this.textResult(`No files found matching the criteria.`);
  1293. }
  1294. // Format output
  1295. let output: string;
  1296. switch (format) {
  1297. case 'flat':
  1298. output = this.formatFilesFlat(files, includeMetadata);
  1299. break;
  1300. case 'grouped':
  1301. output = this.formatFilesGrouped(files, includeMetadata);
  1302. break;
  1303. case 'tree':
  1304. default:
  1305. output = this.formatFilesTree(files, includeMetadata, maxDepth);
  1306. break;
  1307. }
  1308. return this.textResult(this.truncateOutput(output));
  1309. }
  1310. /**
  1311. * Convert glob pattern to regex
  1312. */
  1313. private globToRegex(pattern: string): RegExp {
  1314. const escaped = pattern
  1315. .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape special regex chars except * and ?
  1316. .replace(/\*\*/g, '{{GLOBSTAR}}') // Temp placeholder for **
  1317. .replace(/\*/g, '[^/]*') // * matches anything except /
  1318. .replace(/\?/g, '[^/]') // ? matches single char except /
  1319. .replace(/\{\{GLOBSTAR\}\}/g, '.*'); // ** matches anything including /
  1320. return new RegExp(escaped);
  1321. }
  1322. /**
  1323. * Format files as a flat list
  1324. */
  1325. private formatFilesFlat(files: { path: string; language: string; nodeCount: number }[], includeMetadata: boolean): string {
  1326. const lines: string[] = [`## Files (${files.length})`, ''];
  1327. for (const file of files.sort((a, b) => a.path.localeCompare(b.path))) {
  1328. if (includeMetadata) {
  1329. lines.push(`- ${file.path} (${file.language}, ${file.nodeCount} symbols)`);
  1330. } else {
  1331. lines.push(`- ${file.path}`);
  1332. }
  1333. }
  1334. return lines.join('\n');
  1335. }
  1336. /**
  1337. * Format files grouped by language
  1338. */
  1339. private formatFilesGrouped(files: { path: string; language: string; nodeCount: number }[], includeMetadata: boolean): string {
  1340. const byLang = new Map<string, typeof files>();
  1341. for (const file of files) {
  1342. const existing = byLang.get(file.language) || [];
  1343. existing.push(file);
  1344. byLang.set(file.language, existing);
  1345. }
  1346. const lines: string[] = [`## Files by Language (${files.length} total)`, ''];
  1347. // Sort languages by file count (descending)
  1348. const sortedLangs = [...byLang.entries()].sort((a, b) => b[1].length - a[1].length);
  1349. for (const [lang, langFiles] of sortedLangs) {
  1350. lines.push(`### ${lang} (${langFiles.length})`);
  1351. for (const file of langFiles.sort((a, b) => a.path.localeCompare(b.path))) {
  1352. if (includeMetadata) {
  1353. lines.push(`- ${file.path} (${file.nodeCount} symbols)`);
  1354. } else {
  1355. lines.push(`- ${file.path}`);
  1356. }
  1357. }
  1358. lines.push('');
  1359. }
  1360. return lines.join('\n');
  1361. }
  1362. /**
  1363. * Format files as a tree structure
  1364. */
  1365. private formatFilesTree(
  1366. files: { path: string; language: string; nodeCount: number }[],
  1367. includeMetadata: boolean,
  1368. maxDepth?: number
  1369. ): string {
  1370. // Build tree structure
  1371. interface TreeNode {
  1372. name: string;
  1373. children: Map<string, TreeNode>;
  1374. file?: { language: string; nodeCount: number };
  1375. }
  1376. const root: TreeNode = { name: '', children: new Map() };
  1377. for (const file of files) {
  1378. const parts = file.path.split('/');
  1379. let current = root;
  1380. for (let i = 0; i < parts.length; i++) {
  1381. const part = parts[i];
  1382. if (!part) continue;
  1383. if (!current.children.has(part)) {
  1384. current.children.set(part, { name: part, children: new Map() });
  1385. }
  1386. current = current.children.get(part)!;
  1387. // If this is the last part, it's a file
  1388. if (i === parts.length - 1) {
  1389. current.file = { language: file.language, nodeCount: file.nodeCount };
  1390. }
  1391. }
  1392. }
  1393. // Render tree
  1394. const lines: string[] = [`## Project Structure (${files.length} files)`, ''];
  1395. const renderNode = (node: TreeNode, prefix: string, isLast: boolean, depth: number): void => {
  1396. if (maxDepth !== undefined && depth > maxDepth) return;
  1397. const connector = isLast ? '└── ' : '├── ';
  1398. const childPrefix = isLast ? ' ' : '│ ';
  1399. if (node.name) {
  1400. let line = prefix + connector + node.name;
  1401. if (node.file && includeMetadata) {
  1402. line += ` (${node.file.language}, ${node.file.nodeCount} symbols)`;
  1403. }
  1404. lines.push(line);
  1405. }
  1406. const children = [...node.children.values()];
  1407. // Sort: directories first, then files, both alphabetically
  1408. children.sort((a, b) => {
  1409. const aIsDir = a.children.size > 0 && !a.file;
  1410. const bIsDir = b.children.size > 0 && !b.file;
  1411. if (aIsDir !== bIsDir) return aIsDir ? -1 : 1;
  1412. return a.name.localeCompare(b.name);
  1413. });
  1414. for (let i = 0; i < children.length; i++) {
  1415. const child = children[i]!;
  1416. const nextPrefix = node.name ? prefix + childPrefix : prefix;
  1417. renderNode(child, nextPrefix, i === children.length - 1, depth + 1);
  1418. }
  1419. };
  1420. renderNode(root, '', true, 0);
  1421. return lines.join('\n');
  1422. }
  1423. // =========================================================================
  1424. // Symbol resolution helpers
  1425. // =========================================================================
  1426. /**
  1427. * Find a symbol by name, handling disambiguation when multiple matches exist.
  1428. * Returns the best match and a note about alternatives if any.
  1429. */
  1430. /**
  1431. * Check if a node matches a symbol query.
  1432. *
  1433. * Accepts simple names (`run`) and three flavors of qualifier:
  1434. * - dotted `Session.request` (TS/JS/Python)
  1435. * - colon-pair `stage_apply::run` (Rust, C++, Ruby)
  1436. * - slash `configurator/stage_apply` (path-ish)
  1437. *
  1438. * Multi-level qualifiers compose: `crate::configurator::stage_apply::run`
  1439. * works. Rust path prefixes (`crate`, `super`, `self`) are stripped so
  1440. * the canonical `crate::module::symbol` form resolves.
  1441. *
  1442. * Resolution order, last part must always equal `node.name`:
  1443. * 1. Suffix-match against `qualifiedName` (handles class-scoped methods
  1444. * where the extractor builds the qualified name from the AST stack)
  1445. * 2. File-path containment (handles file-derived modules in Rust/
  1446. * Python — `stage_apply::run` matches a `run` in `stage_apply.rs`)
  1447. */
  1448. private matchesSymbol(node: Node, symbol: string): boolean {
  1449. // Simple name match
  1450. if (node.name === symbol) return true;
  1451. // File basename match (e.g., "product-card" matches "product-card.liquid")
  1452. if (node.kind === 'file' && node.name.replace(/\.[^.]+$/, '') === symbol) return true;
  1453. // Qualified-name lookups: split on any supported separator. `\w` keeps
  1454. // identifier chars (incl. `_`) intact; everything else is treated as
  1455. // a separator we tolerate.
  1456. if (!/[.\/]|::/.test(symbol)) return false;
  1457. const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0);
  1458. if (parts.length < 2) return false;
  1459. const lastPart = parts[parts.length - 1]!;
  1460. if (node.name !== lastPart) return false;
  1461. // Stage 1: qualified-name suffix match. The extractor joins the
  1462. // semantic hierarchy with `::`, so `Session.request` and
  1463. // `Session::request` both become `Session::request` here.
  1464. const colonSuffix = parts.join('::');
  1465. if (node.qualifiedName.includes(colonSuffix)) return true;
  1466. // Stage 2: file-path containment. Rust modules and Python packages
  1467. // are not in `qualifiedName` — they're encoded in the file path. So
  1468. // `stage_apply::run` matches a `run` in any file whose path
  1469. // contains a `stage_apply` segment (with or without an extension).
  1470. //
  1471. // Filter out Rust path prefixes that have no file-system equivalent.
  1472. const containerHints = parts.slice(0, -1).filter((p) => !RUST_PATH_PREFIXES.has(p));
  1473. if (containerHints.length === 0) return false;
  1474. const segments = node.filePath.split('/').filter((s) => s.length > 0);
  1475. return containerHints.every((hint) =>
  1476. segments.some((seg) => seg === hint || seg.replace(/\.[^.]+$/, '') === hint)
  1477. );
  1478. }
  1479. private findSymbol(cg: CodeGraph, symbol: string): { node: Node; note: string } | null {
  1480. // Use higher limit for qualified lookups (e.g., "Session.request",
  1481. // "stage_apply::run") since the target may rank lower in FTS when
  1482. // there are many partial matches across the qualifier parts.
  1483. const isQualified = /[.\/]|::/.test(symbol);
  1484. const limit = isQualified ? 50 : 10;
  1485. let results = cg.searchNodes(symbol, { limit });
  1486. // FTS strips colons as a special char, so `stage_apply::run` searches
  1487. // for the literal `stage_applyrun` and finds nothing. Re-search by
  1488. // the bare last part and let `matchesSymbol` filter by qualifier.
  1489. if (isQualified && results.length === 0) {
  1490. const tail = lastQualifierPart(symbol);
  1491. if (tail && tail !== symbol) results = cg.searchNodes(tail, { limit });
  1492. }
  1493. if (results.length === 0 || !results[0]) {
  1494. return null;
  1495. }
  1496. const exactMatches = results.filter(r => this.matchesSymbol(r.node, symbol));
  1497. if (exactMatches.length === 1) {
  1498. return { node: exactMatches[0]!.node, note: '' };
  1499. }
  1500. if (exactMatches.length > 1) {
  1501. // Multiple exact matches - pick first, note the others
  1502. const picked = exactMatches[0]!.node;
  1503. const others = exactMatches.slice(1).map(r =>
  1504. `${r.node.name} (${r.node.kind}) at ${r.node.filePath}:${r.node.startLine}`
  1505. );
  1506. const note = `\n\n> **Note:** ${exactMatches.length} symbols named "${symbol}". Showing results for \`${picked.filePath}:${picked.startLine}\`. Others: ${others.join(', ')}`;
  1507. return { node: picked, note };
  1508. }
  1509. // No exact match. For qualified lookups, don't silently fall back
  1510. // to a fuzzy result — the user typed a specific qualifier, and
  1511. // resolving `stage_apply::nonexistent_fn` to the unrelated
  1512. // `stage_apply.rs` file would be actively misleading (#173).
  1513. if (isQualified) return null;
  1514. return { node: results[0]!.node, note: '' };
  1515. }
  1516. /**
  1517. * Find ALL symbols matching a name. Used by callers/callees/impact to aggregate
  1518. * results across all matching symbols (e.g., multiple classes with an `execute` method).
  1519. */
  1520. private findAllSymbols(cg: CodeGraph, symbol: string): { nodes: Node[]; note: string } {
  1521. let results = cg.searchNodes(symbol, { limit: 50 });
  1522. // Mirror the fallback in `findSymbol` for qualified queries — FTS
  1523. // strips colons, so a module-qualified lookup needs a second pass
  1524. // by the bare last part.
  1525. if (results.length === 0 && /[.\/]|::/.test(symbol)) {
  1526. const tail = lastQualifierPart(symbol);
  1527. if (tail && tail !== symbol) results = cg.searchNodes(tail, { limit: 50 });
  1528. }
  1529. if (results.length === 0) {
  1530. return { nodes: [], note: '' };
  1531. }
  1532. const exactMatches = results.filter(r => this.matchesSymbol(r.node, symbol));
  1533. if (exactMatches.length <= 1) {
  1534. const node = exactMatches[0]?.node ?? results[0]!.node;
  1535. return { nodes: [node], note: '' };
  1536. }
  1537. const locations = exactMatches.map(r =>
  1538. `${r.node.kind} at ${r.node.filePath}:${r.node.startLine}`
  1539. );
  1540. const note = `\n\n> **Note:** Aggregated results across ${exactMatches.length} symbols named "${symbol}": ${locations.join(', ')}`;
  1541. return { nodes: exactMatches.map(r => r.node), note };
  1542. }
  1543. /**
  1544. * Truncate output if it exceeds the maximum length
  1545. */
  1546. private truncateOutput(text: string): string {
  1547. if (text.length <= MAX_OUTPUT_LENGTH) return text;
  1548. const truncated = text.slice(0, MAX_OUTPUT_LENGTH);
  1549. const lastNewline = truncated.lastIndexOf('\n');
  1550. const cutPoint = lastNewline > MAX_OUTPUT_LENGTH * 0.8 ? lastNewline : MAX_OUTPUT_LENGTH;
  1551. return truncated.slice(0, cutPoint) + '\n\n... (output truncated)';
  1552. }
  1553. // =========================================================================
  1554. // Formatting helpers (compact by default to reduce context usage)
  1555. // =========================================================================
  1556. private formatSearchResults(results: SearchResult[]): string {
  1557. const lines: string[] = [`## Search Results (${results.length} found)`, ''];
  1558. for (const result of results) {
  1559. const { node } = result;
  1560. const location = node.startLine ? `:${node.startLine}` : '';
  1561. // Compact format: one line per result with key info
  1562. lines.push(`### ${node.name} (${node.kind})`);
  1563. lines.push(`${node.filePath}${location}`);
  1564. if (node.signature) lines.push(`\`${node.signature}\``);
  1565. lines.push('');
  1566. }
  1567. return lines.join('\n');
  1568. }
  1569. private formatNodeList(nodes: Node[], title: string): string {
  1570. const lines: string[] = [`## ${title} (${nodes.length} found)`, ''];
  1571. for (const node of nodes) {
  1572. const location = node.startLine ? `:${node.startLine}` : '';
  1573. // Compact: just name, kind, location
  1574. lines.push(`- ${node.name} (${node.kind}) - ${node.filePath}${location}`);
  1575. }
  1576. return lines.join('\n');
  1577. }
  1578. private formatImpact(symbol: string, impact: Subgraph): string {
  1579. const nodeCount = impact.nodes.size;
  1580. // Compact format: just list affected symbols grouped by file
  1581. const lines: string[] = [
  1582. `## Impact: "${symbol}" affects ${nodeCount} symbols`,
  1583. '',
  1584. ];
  1585. // Group by file
  1586. const byFile = new Map<string, Node[]>();
  1587. for (const node of impact.nodes.values()) {
  1588. const existing = byFile.get(node.filePath) || [];
  1589. existing.push(node);
  1590. byFile.set(node.filePath, existing);
  1591. }
  1592. for (const [file, nodes] of byFile) {
  1593. lines.push(`**${file}:**`);
  1594. // Compact: inline list
  1595. const nodeList = nodes.map(n => `${n.name}:${n.startLine}`).join(', ');
  1596. lines.push(nodeList);
  1597. lines.push('');
  1598. }
  1599. return lines.join('\n');
  1600. }
  1601. /**
  1602. * Build a compact structural outline of a container symbol from its
  1603. * indexed children (methods, fields, properties, …) — name, kind,
  1604. * line number, and signature — so the agent gets the shape of a class
  1605. * without the full source of every method. Returns '' when the container
  1606. * has no indexed children, so the caller can fall back to full source.
  1607. */
  1608. private buildContainerOutline(cg: CodeGraph, node: Node): string {
  1609. const children = cg.getChildren(node.id)
  1610. .filter(c => c.kind !== 'import' && c.kind !== 'export')
  1611. .sort((a, b) => (a.startLine ?? 0) - (b.startLine ?? 0));
  1612. if (children.length === 0) return '';
  1613. const lines = [`**Members (${children.length}):**`, ''];
  1614. for (const c of children) {
  1615. const loc = c.startLine ? `:${c.startLine}` : '';
  1616. const sig = c.signature ? ` — \`${c.signature}\`` : '';
  1617. lines.push(`- ${c.name} (${c.kind})${loc}${sig}`);
  1618. }
  1619. return lines.join('\n');
  1620. }
  1621. private formatNodeDetails(node: Node, code: string | null, outline?: string | null): string {
  1622. const location = node.startLine ? `:${node.startLine}` : '';
  1623. const lines: string[] = [
  1624. `## ${node.name} (${node.kind})`,
  1625. '',
  1626. `**Location:** ${node.filePath}${location}`,
  1627. ];
  1628. if (node.signature) {
  1629. lines.push(`**Signature:** \`${node.signature}\``);
  1630. }
  1631. // Only include docstring if it's short and useful
  1632. if (node.docstring && node.docstring.length < 200) {
  1633. lines.push('', node.docstring);
  1634. }
  1635. if (outline) {
  1636. lines.push('', outline, '',
  1637. `> Structural outline only. Read \`${node.filePath}\` or call codegraph_node on a specific member for its body.`);
  1638. } else if (code) {
  1639. lines.push('', '```' + node.language, code, '```');
  1640. }
  1641. return lines.join('\n');
  1642. }
  1643. private formatTaskContext(context: TaskContext): string {
  1644. return context.summary || 'No context found';
  1645. }
  1646. private textResult(text: string): ToolResult {
  1647. return {
  1648. content: [{ type: 'text', text }],
  1649. };
  1650. }
  1651. private errorResult(message: string): ToolResult {
  1652. return {
  1653. content: [{ type: 'text', text: `Error: ${message}` }],
  1654. isError: true,
  1655. };
  1656. }
  1657. }