index.ts 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. /**
  2. * Context Builder
  3. *
  4. * Builds rich context for tasks by combining semantic search with graph traversal.
  5. * Outputs structured context ready to inject into Claude.
  6. */
  7. import * as fs from 'fs';
  8. import {
  9. Node,
  10. Edge,
  11. NodeKind,
  12. EdgeKind,
  13. Subgraph,
  14. CodeBlock,
  15. TaskContext,
  16. TaskInput,
  17. BuildContextOptions,
  18. FindRelevantContextOptions,
  19. SearchResult,
  20. } from '../types';
  21. import { QueryBuilder } from '../db/queries';
  22. import { GraphTraverser } from '../graph';
  23. import { VectorManager } from '../vectors';
  24. import { formatContextAsMarkdown, formatContextAsJson } from './formatter';
  25. import { logDebug, logWarn } from '../errors';
  26. import { validatePathWithinRoot } from '../utils';
  27. /**
  28. * Extract likely symbol names from a natural language query
  29. *
  30. * Identifies potential code symbols using patterns:
  31. * - CamelCase: UserService, signInWithGoogle
  32. * - snake_case: user_service, sign_in
  33. * - SCREAMING_SNAKE: MAX_RETRIES
  34. * - dot.notation: app.isPackaged (extracts both sides)
  35. * - Single words that look like identifiers (no spaces, not common English words)
  36. *
  37. * @param query - Natural language query
  38. * @returns Array of potential symbol names
  39. */
  40. function extractSymbolsFromQuery(query: string): string[] {
  41. const symbols = new Set<string>();
  42. // Extract CamelCase identifiers (2+ chars, starts with letter)
  43. const camelCasePattern = /\b([A-Z][a-z]+(?:[A-Z][a-z]*)*|[a-z]+(?:[A-Z][a-z]*)+)\b/g;
  44. let match;
  45. while ((match = camelCasePattern.exec(query)) !== null) {
  46. if (match[1] && match[1].length >= 2) {
  47. symbols.add(match[1]);
  48. }
  49. }
  50. // Extract snake_case identifiers
  51. const snakeCasePattern = /\b([a-z][a-z0-9]*(?:_[a-z0-9]+)+)\b/gi;
  52. while ((match = snakeCasePattern.exec(query)) !== null) {
  53. if (match[1] && match[1].length >= 3) {
  54. symbols.add(match[1]);
  55. }
  56. }
  57. // Extract SCREAMING_SNAKE_CASE
  58. const screamingPattern = /\b([A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+)\b/g;
  59. while ((match = screamingPattern.exec(query)) !== null) {
  60. if (match[1]) {
  61. symbols.add(match[1]);
  62. }
  63. }
  64. // Extract dot.notation and split into parts (e.g., "app.isPackaged" -> ["app", "isPackaged"])
  65. const dotPattern = /\b([a-zA-Z][a-zA-Z0-9]*(?:\.[a-zA-Z][a-zA-Z0-9]*)+)\b/g;
  66. while ((match = dotPattern.exec(query)) !== null) {
  67. if (match[1]) {
  68. // Add both the full path and individual parts
  69. symbols.add(match[1]);
  70. const parts = match[1].split('.');
  71. for (const part of parts) {
  72. if (part.length >= 2) {
  73. symbols.add(part);
  74. }
  75. }
  76. }
  77. }
  78. // Filter out common English words that might match patterns
  79. const commonWords = new Set([
  80. 'the', 'and', 'for', 'with', 'from', 'this', 'that', 'have', 'been',
  81. 'will', 'would', 'could', 'should', 'does', 'done', 'make', 'made',
  82. 'use', 'used', 'using', 'work', 'works', 'find', 'found', 'show',
  83. 'call', 'called', 'calling', 'get', 'set', 'add', 'all', 'any',
  84. 'how', 'what', 'when', 'where', 'which', 'who', 'why'
  85. ]);
  86. return Array.from(symbols).filter(s => !commonWords.has(s.toLowerCase()));
  87. }
  88. /**
  89. * Default options for context building
  90. *
  91. * Tuned for minimal context usage while still providing useful results:
  92. * - Fewer nodes and code blocks by default
  93. * - Smaller code block size limit
  94. * - Shallower traversal
  95. */
  96. const DEFAULT_BUILD_OPTIONS: Required<BuildContextOptions> = {
  97. maxNodes: 20, // Reduced from 50 - most tasks don't need 50 symbols
  98. maxCodeBlocks: 5, // Reduced from 10 - only show most relevant code
  99. maxCodeBlockSize: 1500, // Reduced from 2000
  100. includeCode: true,
  101. format: 'markdown',
  102. searchLimit: 3, // Reduced from 5 - fewer entry points
  103. traversalDepth: 1, // Reduced from 2 - shallower graph expansion
  104. minScore: 0.3,
  105. };
  106. /**
  107. * Node kinds that provide high information value in context results.
  108. * Imports/exports are excluded because they have near-zero information density -
  109. * they tell you something exists, not how it works.
  110. */
  111. const HIGH_VALUE_NODE_KINDS: NodeKind[] = [
  112. 'function', 'method', 'class', 'interface', 'type_alias', 'struct', 'trait',
  113. 'component', 'route', 'variable', 'constant', 'enum', 'module', 'namespace',
  114. ];
  115. /**
  116. * Default options for finding relevant context
  117. */
  118. const DEFAULT_FIND_OPTIONS: Required<FindRelevantContextOptions> = {
  119. searchLimit: 3, // Reduced from 5
  120. traversalDepth: 1, // Reduced from 2
  121. maxNodes: 20, // Reduced from 50
  122. minScore: 0.3,
  123. edgeKinds: [],
  124. nodeKinds: HIGH_VALUE_NODE_KINDS, // Filter out imports/exports by default
  125. };
  126. /**
  127. * Context Builder
  128. *
  129. * Coordinates semantic search and graph traversal to build
  130. * comprehensive context for tasks.
  131. */
  132. export class ContextBuilder {
  133. private projectRoot: string;
  134. private queries: QueryBuilder;
  135. private traverser: GraphTraverser;
  136. private vectorManager: VectorManager | null;
  137. constructor(
  138. projectRoot: string,
  139. queries: QueryBuilder,
  140. traverser: GraphTraverser,
  141. vectorManager: VectorManager | null
  142. ) {
  143. this.projectRoot = projectRoot;
  144. this.queries = queries;
  145. this.traverser = traverser;
  146. this.vectorManager = vectorManager;
  147. }
  148. /**
  149. * Build context for a task
  150. *
  151. * Pipeline:
  152. * 1. Parse task input (string or {title, description})
  153. * 2. Run semantic search to find entry points
  154. * 3. Expand graph around entry points
  155. * 4. Extract code blocks for key nodes
  156. * 5. Format output for Claude
  157. *
  158. * @param input - Task description or object with title/description
  159. * @param options - Build options
  160. * @returns TaskContext (structured) or formatted string
  161. */
  162. async buildContext(
  163. input: TaskInput,
  164. options: BuildContextOptions = {}
  165. ): Promise<TaskContext | string> {
  166. const opts = { ...DEFAULT_BUILD_OPTIONS, ...options };
  167. // Parse input
  168. const query = typeof input === 'string' ? input : `${input.title}${input.description ? `: ${input.description}` : ''}`;
  169. // Find relevant context (semantic search + graph expansion)
  170. const subgraph = await this.findRelevantContext(query, {
  171. searchLimit: opts.searchLimit,
  172. traversalDepth: opts.traversalDepth,
  173. maxNodes: opts.maxNodes,
  174. minScore: opts.minScore,
  175. });
  176. // Get entry points (nodes from semantic search)
  177. const entryPoints = this.getEntryPoints(subgraph);
  178. // Extract code blocks for key nodes
  179. const codeBlocks = opts.includeCode
  180. ? await this.extractCodeBlocks(subgraph, opts.maxCodeBlocks, opts.maxCodeBlockSize)
  181. : [];
  182. // Get related files
  183. const relatedFiles = this.getRelatedFiles(subgraph);
  184. // Generate summary
  185. const summary = this.generateSummary(query, subgraph, entryPoints);
  186. // Calculate stats
  187. const stats = {
  188. nodeCount: subgraph.nodes.size,
  189. edgeCount: subgraph.edges.length,
  190. fileCount: relatedFiles.length,
  191. codeBlockCount: codeBlocks.length,
  192. totalCodeSize: codeBlocks.reduce((sum, block) => sum + block.content.length, 0),
  193. };
  194. const context: TaskContext = {
  195. query,
  196. subgraph,
  197. entryPoints,
  198. codeBlocks,
  199. relatedFiles,
  200. summary,
  201. stats,
  202. };
  203. // Return formatted output or raw context
  204. if (opts.format === 'markdown') {
  205. return formatContextAsMarkdown(context);
  206. } else if (opts.format === 'json') {
  207. return formatContextAsJson(context);
  208. }
  209. return context;
  210. }
  211. /**
  212. * Find relevant subgraph for a query
  213. *
  214. * Uses hybrid search combining exact symbol lookup with semantic search:
  215. * 1. Extract potential symbol names from query
  216. * 2. Look up exact matches for those symbols (high confidence)
  217. * 3. Use semantic search for concept matching
  218. * 4. Merge results, prioritizing exact matches
  219. * 5. Traverse graph from entry points
  220. *
  221. * @param query - Natural language query
  222. * @param options - Search and traversal options
  223. * @returns Subgraph of relevant nodes and edges
  224. */
  225. async findRelevantContext(
  226. query: string,
  227. options: FindRelevantContextOptions = {}
  228. ): Promise<Subgraph> {
  229. const opts = { ...DEFAULT_FIND_OPTIONS, ...options };
  230. // Start with empty subgraph
  231. const nodes = new Map<string, Node>();
  232. const edges: Edge[] = [];
  233. const roots: string[] = [];
  234. // Handle empty query - return empty subgraph
  235. if (!query || query.trim().length === 0) {
  236. return { nodes, edges, roots };
  237. }
  238. // === HYBRID SEARCH ===
  239. // Step 1: Extract potential symbol names from query
  240. const symbolsFromQuery = extractSymbolsFromQuery(query);
  241. logDebug('Extracted symbols from query', { query, symbols: symbolsFromQuery });
  242. // Step 2: Look up exact matches for extracted symbols
  243. let exactMatches: SearchResult[] = [];
  244. if (symbolsFromQuery.length > 0) {
  245. try {
  246. exactMatches = this.queries.findNodesByExactName(symbolsFromQuery, {
  247. limit: Math.ceil(opts.searchLimit * 2), // Get more since we'll merge
  248. kinds: opts.nodeKinds && opts.nodeKinds.length > 0 ? opts.nodeKinds : undefined,
  249. });
  250. logDebug('Exact symbol matches', { count: exactMatches.length });
  251. } catch (error) {
  252. logDebug('Exact symbol lookup failed', { error: String(error) });
  253. }
  254. }
  255. // Step 3: Try semantic search if vector manager is available
  256. let semanticResults: SearchResult[] = [];
  257. if (this.vectorManager && this.vectorManager.isInitialized()) {
  258. try {
  259. semanticResults = await this.vectorManager.search(query, {
  260. limit: opts.searchLimit,
  261. kinds: opts.nodeKinds && opts.nodeKinds.length > 0 ? opts.nodeKinds : undefined,
  262. });
  263. logDebug('Semantic search results', { count: semanticResults.length });
  264. } catch (error) {
  265. logDebug('Semantic search failed, falling back to text search', { query, error: String(error) });
  266. }
  267. }
  268. // Step 4: Fall back to text search if no semantic results
  269. if (semanticResults.length === 0 && exactMatches.length === 0) {
  270. try {
  271. const textResults = this.queries.searchNodes(query, {
  272. limit: opts.searchLimit,
  273. kinds: opts.nodeKinds && opts.nodeKinds.length > 0 ? opts.nodeKinds : undefined,
  274. });
  275. semanticResults = textResults;
  276. } catch (error) {
  277. logWarn('Text search failed', { query, error: String(error) });
  278. // Return empty results
  279. }
  280. }
  281. // Step 5: Merge results, prioritizing exact matches
  282. const seenIds = new Set<string>();
  283. let searchResults: SearchResult[] = [];
  284. // Add exact matches first (highest priority)
  285. for (const result of exactMatches) {
  286. if (!seenIds.has(result.node.id)) {
  287. seenIds.add(result.node.id);
  288. searchResults.push(result);
  289. }
  290. }
  291. // Add semantic/text results
  292. for (const result of semanticResults) {
  293. if (!seenIds.has(result.node.id)) {
  294. seenIds.add(result.node.id);
  295. searchResults.push(result);
  296. }
  297. }
  298. // Limit total results
  299. searchResults = searchResults.slice(0, opts.searchLimit * 2);
  300. // Filter by minimum score
  301. let filteredResults = searchResults.filter((r) => r.score >= opts.minScore);
  302. // Resolve imports/exports to their actual definitions
  303. // If someone searches "terminal" and finds `import { TerminalPanel }`,
  304. // they want the TerminalPanel class, not the import statement
  305. filteredResults = this.resolveImportsToDefinitions(filteredResults);
  306. // Add entry points to subgraph
  307. for (const result of filteredResults) {
  308. nodes.set(result.node.id, result.node);
  309. roots.push(result.node.id);
  310. }
  311. // Traverse from each entry point
  312. for (const result of filteredResults) {
  313. const traversalResult = this.traverser.traverseBFS(result.node.id, {
  314. maxDepth: opts.traversalDepth,
  315. edgeKinds: opts.edgeKinds && opts.edgeKinds.length > 0 ? opts.edgeKinds : undefined,
  316. nodeKinds: opts.nodeKinds && opts.nodeKinds.length > 0 ? opts.nodeKinds : undefined,
  317. direction: 'both',
  318. limit: Math.ceil(opts.maxNodes / Math.max(1, filteredResults.length)),
  319. });
  320. // Merge nodes
  321. for (const [id, node] of traversalResult.nodes) {
  322. if (!nodes.has(id)) {
  323. nodes.set(id, node);
  324. }
  325. }
  326. // Merge edges (avoid duplicates)
  327. for (const edge of traversalResult.edges) {
  328. const exists = edges.some(
  329. (e) => e.source === edge.source && e.target === edge.target && e.kind === edge.kind
  330. );
  331. if (!exists) {
  332. edges.push(edge);
  333. }
  334. }
  335. }
  336. // Trim to max nodes if needed
  337. if (nodes.size > opts.maxNodes) {
  338. // Prioritize entry points and their direct neighbors
  339. const priorityIds = new Set(roots);
  340. for (const edge of edges) {
  341. if (priorityIds.has(edge.source)) {
  342. priorityIds.add(edge.target);
  343. }
  344. if (priorityIds.has(edge.target)) {
  345. priorityIds.add(edge.source);
  346. }
  347. }
  348. // Keep priority nodes, then fill remaining slots
  349. const trimmedNodes = new Map<string, Node>();
  350. for (const id of priorityIds) {
  351. const node = nodes.get(id);
  352. if (node && trimmedNodes.size < opts.maxNodes) {
  353. trimmedNodes.set(id, node);
  354. }
  355. }
  356. // Fill remaining from other nodes
  357. for (const [id, node] of nodes) {
  358. if (trimmedNodes.size >= opts.maxNodes) break;
  359. if (!trimmedNodes.has(id)) {
  360. trimmedNodes.set(id, node);
  361. }
  362. }
  363. // Filter edges to only include kept nodes
  364. const trimmedEdges = edges.filter(
  365. (e) => trimmedNodes.has(e.source) && trimmedNodes.has(e.target)
  366. );
  367. return { nodes: trimmedNodes, edges: trimmedEdges, roots };
  368. }
  369. return { nodes, edges, roots };
  370. }
  371. /**
  372. * Get the source code for a node
  373. *
  374. * Reads the file and extracts the code between startLine and endLine.
  375. *
  376. * @param nodeId - ID of the node
  377. * @returns Code string or null if not found
  378. */
  379. async getCode(nodeId: string): Promise<string | null> {
  380. const node = this.queries.getNodeById(nodeId);
  381. if (!node) {
  382. return null;
  383. }
  384. return this.extractNodeCode(node);
  385. }
  386. /**
  387. * Extract code from a node's source file
  388. */
  389. private async extractNodeCode(node: Node): Promise<string | null> {
  390. const filePath = validatePathWithinRoot(this.projectRoot, node.filePath);
  391. if (!filePath || !fs.existsSync(filePath)) {
  392. return null;
  393. }
  394. try {
  395. const content = fs.readFileSync(filePath, 'utf-8');
  396. const lines = content.split('\n');
  397. // Extract lines (1-indexed to 0-indexed)
  398. const startIdx = Math.max(0, node.startLine - 1);
  399. const endIdx = Math.min(lines.length, node.endLine);
  400. return lines.slice(startIdx, endIdx).join('\n');
  401. } catch (error) {
  402. logDebug('Failed to extract code from node', { nodeId: node.id, filePath: node.filePath, error: String(error) });
  403. return null;
  404. }
  405. }
  406. /**
  407. * Get entry points from a subgraph (the root nodes)
  408. */
  409. private getEntryPoints(subgraph: Subgraph): Node[] {
  410. return subgraph.roots
  411. .map((id) => subgraph.nodes.get(id))
  412. .filter((n): n is Node => n !== undefined);
  413. }
  414. /**
  415. * Extract code blocks for key nodes in the subgraph
  416. */
  417. private async extractCodeBlocks(
  418. subgraph: Subgraph,
  419. maxBlocks: number,
  420. maxBlockSize: number
  421. ): Promise<CodeBlock[]> {
  422. const blocks: CodeBlock[] = [];
  423. // Prioritize entry points, then functions/methods
  424. const priorityNodes: Node[] = [];
  425. // First: entry points
  426. for (const id of subgraph.roots) {
  427. const node = subgraph.nodes.get(id);
  428. if (node) {
  429. priorityNodes.push(node);
  430. }
  431. }
  432. // Then: functions and methods
  433. for (const node of subgraph.nodes.values()) {
  434. if (!subgraph.roots.includes(node.id)) {
  435. if (node.kind === 'function' || node.kind === 'method') {
  436. priorityNodes.push(node);
  437. }
  438. }
  439. }
  440. // Then: classes
  441. for (const node of subgraph.nodes.values()) {
  442. if (!subgraph.roots.includes(node.id)) {
  443. if (node.kind === 'class') {
  444. priorityNodes.push(node);
  445. }
  446. }
  447. }
  448. // Extract code for priority nodes
  449. for (const node of priorityNodes) {
  450. if (blocks.length >= maxBlocks) break;
  451. const code = await this.extractNodeCode(node);
  452. if (code) {
  453. // Truncate if too long
  454. const truncated = code.length > maxBlockSize
  455. ? code.slice(0, maxBlockSize) + '\n// ... truncated ...'
  456. : code;
  457. blocks.push({
  458. content: truncated,
  459. filePath: node.filePath,
  460. startLine: node.startLine,
  461. endLine: node.endLine,
  462. language: node.language,
  463. node,
  464. });
  465. }
  466. }
  467. return blocks;
  468. }
  469. /**
  470. * Get unique files from a subgraph
  471. */
  472. private getRelatedFiles(subgraph: Subgraph): string[] {
  473. const files = new Set<string>();
  474. for (const node of subgraph.nodes.values()) {
  475. files.add(node.filePath);
  476. }
  477. return Array.from(files).sort();
  478. }
  479. /**
  480. * Generate a summary of the context
  481. */
  482. private generateSummary(_query: string, subgraph: Subgraph, entryPoints: Node[]): string {
  483. const nodeCount = subgraph.nodes.size;
  484. const edgeCount = subgraph.edges.length;
  485. const files = this.getRelatedFiles(subgraph);
  486. const entryPointNames = entryPoints
  487. .slice(0, 3)
  488. .map((n) => n.name)
  489. .join(', ');
  490. const remaining = entryPoints.length > 3 ? ` and ${entryPoints.length - 3} more` : '';
  491. return `Found ${nodeCount} relevant code symbols across ${files.length} files. ` +
  492. `Key entry points: ${entryPointNames}${remaining}. ` +
  493. `${edgeCount} relationships identified.`;
  494. }
  495. /**
  496. * Resolve import/export nodes to their actual definitions
  497. *
  498. * When search returns `import { TerminalPanel }`, users want the TerminalPanel
  499. * class definition, not the import statement. This follows the `imports` edge
  500. * to find and return the actual definition instead.
  501. *
  502. * @param results - Search results that may include import/export nodes
  503. * @returns Results with imports resolved to definitions where possible
  504. */
  505. private resolveImportsToDefinitions(results: SearchResult[]): SearchResult[] {
  506. const resolved: SearchResult[] = [];
  507. const seenIds = new Set<string>();
  508. for (const result of results) {
  509. const { node, score } = result;
  510. // If it's not an import/export, keep it as-is
  511. if (node.kind !== 'import' && node.kind !== 'export') {
  512. if (!seenIds.has(node.id)) {
  513. seenIds.add(node.id);
  514. resolved.push(result);
  515. }
  516. continue;
  517. }
  518. // For imports/exports, try to find what they reference
  519. // Imports have outgoing 'imports' edges to the definition
  520. // Exports have outgoing 'exports' edges to the definition
  521. const edgeKind = node.kind === 'import' ? 'imports' : 'exports';
  522. const outgoingEdges = this.queries.getOutgoingEdges(node.id, [edgeKind as EdgeKind]);
  523. let foundDefinition = false;
  524. for (const edge of outgoingEdges) {
  525. const targetNode = this.queries.getNodeById(edge.target);
  526. if (targetNode && !seenIds.has(targetNode.id)) {
  527. // Found the definition - use it instead of the import
  528. seenIds.add(targetNode.id);
  529. resolved.push({
  530. node: targetNode,
  531. score: score, // Preserve the original score
  532. });
  533. foundDefinition = true;
  534. logDebug('Resolved import to definition', {
  535. import: node.name,
  536. definition: targetNode.name,
  537. kind: targetNode.kind,
  538. });
  539. }
  540. }
  541. // If we couldn't resolve the import, skip it (it's low-value on its own)
  542. if (!foundDefinition) {
  543. logDebug('Skipping unresolved import', { name: node.name, file: node.filePath });
  544. }
  545. }
  546. return resolved;
  547. }
  548. }
  549. /**
  550. * Create a context builder
  551. */
  552. export function createContextBuilder(
  553. projectRoot: string,
  554. queries: QueryBuilder,
  555. traverser: GraphTraverser,
  556. vectorManager: VectorManager | null
  557. ): ContextBuilder {
  558. return new ContextBuilder(projectRoot, queries, traverser, vectorManager);
  559. }
  560. // Re-export formatter
  561. export { formatContextAsMarkdown, formatContextAsJson } from './formatter';