index.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /**
  2. * Context Builder
  3. *
  4. * Builds rich context for tasks by combining semantic search with graph traversal.
  5. * Outputs structured context ready to inject into Claude.
  6. */
  7. import * as fs from 'fs';
  8. import * as path from 'path';
  9. import {
  10. Node,
  11. Edge,
  12. Subgraph,
  13. CodeBlock,
  14. TaskContext,
  15. TaskInput,
  16. BuildContextOptions,
  17. FindRelevantContextOptions,
  18. SearchResult,
  19. } from '../types';
  20. import { QueryBuilder } from '../db/queries';
  21. import { GraphTraverser } from '../graph';
  22. import { VectorManager } from '../vectors';
  23. import { formatContextAsMarkdown, formatContextAsJson } from './formatter';
  24. import { logDebug, logWarn } from '../errors';
  25. /**
  26. * Default options for context building
  27. */
  28. const DEFAULT_BUILD_OPTIONS: Required<BuildContextOptions> = {
  29. maxNodes: 50,
  30. maxCodeBlocks: 10,
  31. maxCodeBlockSize: 2000,
  32. includeCode: true,
  33. format: 'markdown',
  34. searchLimit: 5,
  35. traversalDepth: 2,
  36. minScore: 0.3,
  37. };
  38. /**
  39. * Default options for finding relevant context
  40. */
  41. const DEFAULT_FIND_OPTIONS: Required<FindRelevantContextOptions> = {
  42. searchLimit: 5,
  43. traversalDepth: 2,
  44. maxNodes: 50,
  45. minScore: 0.3,
  46. edgeKinds: [],
  47. nodeKinds: [],
  48. };
  49. /**
  50. * Context Builder
  51. *
  52. * Coordinates semantic search and graph traversal to build
  53. * comprehensive context for tasks.
  54. */
  55. export class ContextBuilder {
  56. private projectRoot: string;
  57. private queries: QueryBuilder;
  58. private traverser: GraphTraverser;
  59. private vectorManager: VectorManager | null;
  60. constructor(
  61. projectRoot: string,
  62. queries: QueryBuilder,
  63. traverser: GraphTraverser,
  64. vectorManager: VectorManager | null
  65. ) {
  66. this.projectRoot = projectRoot;
  67. this.queries = queries;
  68. this.traverser = traverser;
  69. this.vectorManager = vectorManager;
  70. }
  71. /**
  72. * Build context for a task
  73. *
  74. * Pipeline:
  75. * 1. Parse task input (string or {title, description})
  76. * 2. Run semantic search to find entry points
  77. * 3. Expand graph around entry points
  78. * 4. Extract code blocks for key nodes
  79. * 5. Format output for Claude
  80. *
  81. * @param input - Task description or object with title/description
  82. * @param options - Build options
  83. * @returns TaskContext (structured) or formatted string
  84. */
  85. async buildContext(
  86. input: TaskInput,
  87. options: BuildContextOptions = {}
  88. ): Promise<TaskContext | string> {
  89. const opts = { ...DEFAULT_BUILD_OPTIONS, ...options };
  90. // Parse input
  91. const query = typeof input === 'string' ? input : `${input.title}${input.description ? `: ${input.description}` : ''}`;
  92. // Find relevant context (semantic search + graph expansion)
  93. const subgraph = await this.findRelevantContext(query, {
  94. searchLimit: opts.searchLimit,
  95. traversalDepth: opts.traversalDepth,
  96. maxNodes: opts.maxNodes,
  97. minScore: opts.minScore,
  98. });
  99. // Get entry points (nodes from semantic search)
  100. const entryPoints = this.getEntryPoints(subgraph);
  101. // Extract code blocks for key nodes
  102. const codeBlocks = opts.includeCode
  103. ? await this.extractCodeBlocks(subgraph, opts.maxCodeBlocks, opts.maxCodeBlockSize)
  104. : [];
  105. // Get related files
  106. const relatedFiles = this.getRelatedFiles(subgraph);
  107. // Generate summary
  108. const summary = this.generateSummary(query, subgraph, entryPoints);
  109. // Calculate stats
  110. const stats = {
  111. nodeCount: subgraph.nodes.size,
  112. edgeCount: subgraph.edges.length,
  113. fileCount: relatedFiles.length,
  114. codeBlockCount: codeBlocks.length,
  115. totalCodeSize: codeBlocks.reduce((sum, block) => sum + block.content.length, 0),
  116. };
  117. const context: TaskContext = {
  118. query,
  119. subgraph,
  120. entryPoints,
  121. codeBlocks,
  122. relatedFiles,
  123. summary,
  124. stats,
  125. };
  126. // Return formatted output or raw context
  127. if (opts.format === 'markdown') {
  128. return formatContextAsMarkdown(context);
  129. } else if (opts.format === 'json') {
  130. return formatContextAsJson(context);
  131. }
  132. return context;
  133. }
  134. /**
  135. * Find relevant subgraph for a query
  136. *
  137. * Combines semantic search with graph traversal:
  138. * 1. Use semantic search to find relevant entry points
  139. * 2. Traverse graph from entry points
  140. * 3. Merge results into a unified subgraph
  141. *
  142. * @param query - Natural language query
  143. * @param options - Search and traversal options
  144. * @returns Subgraph of relevant nodes and edges
  145. */
  146. async findRelevantContext(
  147. query: string,
  148. options: FindRelevantContextOptions = {}
  149. ): Promise<Subgraph> {
  150. const opts = { ...DEFAULT_FIND_OPTIONS, ...options };
  151. // Start with empty subgraph
  152. const nodes = new Map<string, Node>();
  153. const edges: Edge[] = [];
  154. const roots: string[] = [];
  155. // Handle empty query - return empty subgraph
  156. if (!query || query.trim().length === 0) {
  157. return { nodes, edges, roots };
  158. }
  159. // Try semantic search if vector manager is available
  160. let searchResults: SearchResult[] = [];
  161. if (this.vectorManager && this.vectorManager.isInitialized()) {
  162. try {
  163. searchResults = await this.vectorManager.search(query, {
  164. limit: opts.searchLimit,
  165. kinds: opts.nodeKinds && opts.nodeKinds.length > 0 ? opts.nodeKinds : undefined,
  166. });
  167. } catch (error) {
  168. logDebug('Semantic search failed, falling back to text search', { query, error: String(error) });
  169. }
  170. }
  171. // Fall back to text search if no semantic results
  172. if (searchResults.length === 0) {
  173. try {
  174. const textResults = this.queries.searchNodes(query, {
  175. limit: opts.searchLimit,
  176. kinds: opts.nodeKinds && opts.nodeKinds.length > 0 ? opts.nodeKinds : undefined,
  177. });
  178. searchResults = textResults;
  179. } catch (error) {
  180. logWarn('Text search failed', { query, error: String(error) });
  181. // Return empty results
  182. }
  183. }
  184. // Filter by minimum score
  185. const filteredResults = searchResults.filter((r) => r.score >= opts.minScore);
  186. // Add entry points to subgraph
  187. for (const result of filteredResults) {
  188. nodes.set(result.node.id, result.node);
  189. roots.push(result.node.id);
  190. }
  191. // Traverse from each entry point
  192. for (const result of filteredResults) {
  193. const traversalResult = this.traverser.traverseBFS(result.node.id, {
  194. maxDepth: opts.traversalDepth,
  195. edgeKinds: opts.edgeKinds && opts.edgeKinds.length > 0 ? opts.edgeKinds : undefined,
  196. nodeKinds: opts.nodeKinds && opts.nodeKinds.length > 0 ? opts.nodeKinds : undefined,
  197. direction: 'both',
  198. limit: Math.ceil(opts.maxNodes / Math.max(1, filteredResults.length)),
  199. });
  200. // Merge nodes
  201. for (const [id, node] of traversalResult.nodes) {
  202. if (!nodes.has(id)) {
  203. nodes.set(id, node);
  204. }
  205. }
  206. // Merge edges (avoid duplicates)
  207. for (const edge of traversalResult.edges) {
  208. const exists = edges.some(
  209. (e) => e.source === edge.source && e.target === edge.target && e.kind === edge.kind
  210. );
  211. if (!exists) {
  212. edges.push(edge);
  213. }
  214. }
  215. }
  216. // Trim to max nodes if needed
  217. if (nodes.size > opts.maxNodes) {
  218. // Prioritize entry points and their direct neighbors
  219. const priorityIds = new Set(roots);
  220. for (const edge of edges) {
  221. if (priorityIds.has(edge.source)) {
  222. priorityIds.add(edge.target);
  223. }
  224. if (priorityIds.has(edge.target)) {
  225. priorityIds.add(edge.source);
  226. }
  227. }
  228. // Keep priority nodes, then fill remaining slots
  229. const trimmedNodes = new Map<string, Node>();
  230. for (const id of priorityIds) {
  231. const node = nodes.get(id);
  232. if (node && trimmedNodes.size < opts.maxNodes) {
  233. trimmedNodes.set(id, node);
  234. }
  235. }
  236. // Fill remaining from other nodes
  237. for (const [id, node] of nodes) {
  238. if (trimmedNodes.size >= opts.maxNodes) break;
  239. if (!trimmedNodes.has(id)) {
  240. trimmedNodes.set(id, node);
  241. }
  242. }
  243. // Filter edges to only include kept nodes
  244. const trimmedEdges = edges.filter(
  245. (e) => trimmedNodes.has(e.source) && trimmedNodes.has(e.target)
  246. );
  247. return { nodes: trimmedNodes, edges: trimmedEdges, roots };
  248. }
  249. return { nodes, edges, roots };
  250. }
  251. /**
  252. * Get the source code for a node
  253. *
  254. * Reads the file and extracts the code between startLine and endLine.
  255. *
  256. * @param nodeId - ID of the node
  257. * @returns Code string or null if not found
  258. */
  259. async getCode(nodeId: string): Promise<string | null> {
  260. const node = this.queries.getNodeById(nodeId);
  261. if (!node) {
  262. return null;
  263. }
  264. return this.extractNodeCode(node);
  265. }
  266. /**
  267. * Extract code from a node's source file
  268. */
  269. private async extractNodeCode(node: Node): Promise<string | null> {
  270. const filePath = path.join(this.projectRoot, node.filePath);
  271. if (!fs.existsSync(filePath)) {
  272. return null;
  273. }
  274. try {
  275. const content = fs.readFileSync(filePath, 'utf-8');
  276. const lines = content.split('\n');
  277. // Extract lines (1-indexed to 0-indexed)
  278. const startIdx = Math.max(0, node.startLine - 1);
  279. const endIdx = Math.min(lines.length, node.endLine);
  280. return lines.slice(startIdx, endIdx).join('\n');
  281. } catch (error) {
  282. logDebug('Failed to extract code from node', { nodeId: node.id, filePath: node.filePath, error: String(error) });
  283. return null;
  284. }
  285. }
  286. /**
  287. * Get entry points from a subgraph (the root nodes)
  288. */
  289. private getEntryPoints(subgraph: Subgraph): Node[] {
  290. return subgraph.roots
  291. .map((id) => subgraph.nodes.get(id))
  292. .filter((n): n is Node => n !== undefined);
  293. }
  294. /**
  295. * Extract code blocks for key nodes in the subgraph
  296. */
  297. private async extractCodeBlocks(
  298. subgraph: Subgraph,
  299. maxBlocks: number,
  300. maxBlockSize: number
  301. ): Promise<CodeBlock[]> {
  302. const blocks: CodeBlock[] = [];
  303. // Prioritize entry points, then functions/methods
  304. const priorityNodes: Node[] = [];
  305. // First: entry points
  306. for (const id of subgraph.roots) {
  307. const node = subgraph.nodes.get(id);
  308. if (node) {
  309. priorityNodes.push(node);
  310. }
  311. }
  312. // Then: functions and methods
  313. for (const node of subgraph.nodes.values()) {
  314. if (!subgraph.roots.includes(node.id)) {
  315. if (node.kind === 'function' || node.kind === 'method') {
  316. priorityNodes.push(node);
  317. }
  318. }
  319. }
  320. // Then: classes
  321. for (const node of subgraph.nodes.values()) {
  322. if (!subgraph.roots.includes(node.id)) {
  323. if (node.kind === 'class') {
  324. priorityNodes.push(node);
  325. }
  326. }
  327. }
  328. // Extract code for priority nodes
  329. for (const node of priorityNodes) {
  330. if (blocks.length >= maxBlocks) break;
  331. const code = await this.extractNodeCode(node);
  332. if (code) {
  333. // Truncate if too long
  334. const truncated = code.length > maxBlockSize
  335. ? code.slice(0, maxBlockSize) + '\n// ... truncated ...'
  336. : code;
  337. blocks.push({
  338. content: truncated,
  339. filePath: node.filePath,
  340. startLine: node.startLine,
  341. endLine: node.endLine,
  342. language: node.language,
  343. node,
  344. });
  345. }
  346. }
  347. return blocks;
  348. }
  349. /**
  350. * Get unique files from a subgraph
  351. */
  352. private getRelatedFiles(subgraph: Subgraph): string[] {
  353. const files = new Set<string>();
  354. for (const node of subgraph.nodes.values()) {
  355. files.add(node.filePath);
  356. }
  357. return Array.from(files).sort();
  358. }
  359. /**
  360. * Generate a summary of the context
  361. */
  362. private generateSummary(_query: string, subgraph: Subgraph, entryPoints: Node[]): string {
  363. const nodeCount = subgraph.nodes.size;
  364. const edgeCount = subgraph.edges.length;
  365. const files = this.getRelatedFiles(subgraph);
  366. const entryPointNames = entryPoints
  367. .slice(0, 3)
  368. .map((n) => n.name)
  369. .join(', ');
  370. const remaining = entryPoints.length > 3 ? ` and ${entryPoints.length - 3} more` : '';
  371. return `Found ${nodeCount} relevant code symbols across ${files.length} files. ` +
  372. `Key entry points: ${entryPointNames}${remaining}. ` +
  373. `${edgeCount} relationships identified.`;
  374. }
  375. }
  376. /**
  377. * Create a context builder
  378. */
  379. export function createContextBuilder(
  380. projectRoot: string,
  381. queries: QueryBuilder,
  382. traverser: GraphTraverser,
  383. vectorManager: VectorManager | null
  384. ): ContextBuilder {
  385. return new ContextBuilder(projectRoot, queries, traverser, vectorManager);
  386. }
  387. // Re-export formatter
  388. export { formatContextAsMarkdown, formatContextAsJson } from './formatter';