svelte-extractor.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. import { Node, Edge, ExtractionResult, ExtractionError, UnresolvedReference, Language } from '../types';
  2. import { generateNodeId } from './tree-sitter-helpers';
  3. import { TreeSitterExtractor } from './tree-sitter';
  4. import { isLanguageSupported } from './grammars';
  5. /** Svelte 5 rune names — compiler builtins, not real functions */
  6. const SVELTE_RUNES = new Set([
  7. '$props', '$state', '$derived', '$effect', '$bindable',
  8. '$inspect', '$host', '$snippet',
  9. ]);
  10. /**
  11. * SvelteExtractor - Extracts code relationships from Svelte component files
  12. *
  13. * Svelte files are multi-language (script + template + style). Rather than
  14. * parsing the full Svelte grammar, we extract the <script> block content
  15. * and delegate it to the TypeScript/JavaScript TreeSitterExtractor.
  16. *
  17. * Also extracts function calls from template expressions (`{fn(...)}`) so
  18. * cross-file call edges are captured even when calls live in markup.
  19. *
  20. * Every .svelte file produces a component node (Svelte components are always importable).
  21. */
  22. export class SvelteExtractor {
  23. private filePath: string;
  24. private source: string;
  25. private nodes: Node[] = [];
  26. private edges: Edge[] = [];
  27. private unresolvedReferences: UnresolvedReference[] = [];
  28. private errors: ExtractionError[] = [];
  29. constructor(filePath: string, source: string) {
  30. this.filePath = filePath;
  31. this.source = source;
  32. }
  33. /**
  34. * Extract from Svelte source
  35. */
  36. extract(): ExtractionResult {
  37. const startTime = Date.now();
  38. try {
  39. // Create component node for the .svelte file itself
  40. const componentNode = this.createComponentNode();
  41. // Extract and process script blocks
  42. const scriptBlocks = this.extractScriptBlocks();
  43. for (const block of scriptBlocks) {
  44. this.processScriptBlock(block, componentNode.id);
  45. }
  46. // Extract function calls from template expressions ({fn(...)})
  47. this.extractTemplateCalls(componentNode.id, scriptBlocks);
  48. // Extract component usages from template (<ComponentName>)
  49. this.extractTemplateComponents(componentNode.id);
  50. // Filter out Svelte rune calls ($state, $props, $derived, etc.)
  51. this.unresolvedReferences = this.unresolvedReferences.filter(
  52. ref => !SVELTE_RUNES.has(ref.referenceName)
  53. );
  54. } catch (error) {
  55. this.errors.push({
  56. message: `Svelte extraction error: ${error instanceof Error ? error.message : String(error)}`,
  57. severity: 'error',
  58. code: 'parse_error',
  59. });
  60. }
  61. return {
  62. nodes: this.nodes,
  63. edges: this.edges,
  64. unresolvedReferences: this.unresolvedReferences,
  65. errors: this.errors,
  66. durationMs: Date.now() - startTime,
  67. };
  68. }
  69. /**
  70. * Create a component node for the .svelte file
  71. */
  72. private createComponentNode(): Node {
  73. const lines = this.source.split('\n');
  74. const fileName = this.filePath.split(/[/\\]/).pop() || this.filePath;
  75. const componentName = fileName.replace(/\.svelte$/, '');
  76. const id = generateNodeId(this.filePath, 'component', componentName, 1);
  77. const node: Node = {
  78. id,
  79. kind: 'component',
  80. name: componentName,
  81. qualifiedName: `${this.filePath}::${componentName}`,
  82. filePath: this.filePath,
  83. language: 'svelte',
  84. startLine: 1,
  85. endLine: lines.length,
  86. startColumn: 0,
  87. endColumn: lines[lines.length - 1]?.length || 0,
  88. isExported: true, // Svelte components are always importable
  89. updatedAt: Date.now(),
  90. };
  91. this.nodes.push(node);
  92. return node;
  93. }
  94. /**
  95. * Extract <script> blocks from the Svelte source
  96. */
  97. private extractScriptBlocks(): Array<{
  98. content: string;
  99. startLine: number;
  100. isModule: boolean;
  101. isTypeScript: boolean;
  102. }> {
  103. const blocks: Array<{
  104. content: string;
  105. startLine: number;
  106. isModule: boolean;
  107. isTypeScript: boolean;
  108. }> = [];
  109. const scriptRegex = /<script(\s[^>]*)?>(?<content>[\s\S]*?)<\/script>/g;
  110. let match;
  111. while ((match = scriptRegex.exec(this.source)) !== null) {
  112. const attrs = match[1] || '';
  113. const content = match.groups?.content || match[2] || '';
  114. // Detect TypeScript from lang attribute
  115. const isTypeScript = /lang\s*=\s*["'](ts|typescript)["']/.test(attrs);
  116. // Detect module script
  117. const isModule = /context\s*=\s*["']module["']/.test(attrs);
  118. // Calculate start line of the script content (line after <script>)
  119. const beforeScript = this.source.substring(0, match.index);
  120. const scriptTagLine = (beforeScript.match(/\n/g) || []).length;
  121. // The content starts on the line after the opening <script> tag
  122. const openingTag = match[0].substring(0, match[0].indexOf('>') + 1);
  123. const openingTagLines = (openingTag.match(/\n/g) || []).length;
  124. const contentStartLine = scriptTagLine + openingTagLines + 1; // 0-indexed line
  125. blocks.push({
  126. content,
  127. startLine: contentStartLine,
  128. isModule,
  129. isTypeScript,
  130. });
  131. }
  132. return blocks;
  133. }
  134. /**
  135. * Process a script block by delegating to TreeSitterExtractor
  136. */
  137. private processScriptBlock(
  138. block: { content: string; startLine: number; isModule: boolean; isTypeScript: boolean },
  139. componentNodeId: string
  140. ): void {
  141. const scriptLanguage: Language = block.isTypeScript ? 'typescript' : 'javascript';
  142. // Check if the script language parser is available
  143. if (!isLanguageSupported(scriptLanguage)) {
  144. this.errors.push({
  145. message: `Parser for ${scriptLanguage} not available, cannot parse Svelte script block`,
  146. severity: 'warning',
  147. });
  148. return;
  149. }
  150. // Delegate to TreeSitterExtractor
  151. const extractor = new TreeSitterExtractor(this.filePath, block.content, scriptLanguage);
  152. const result = extractor.extract();
  153. // Offset line numbers from script block back to .svelte file positions
  154. for (const node of result.nodes) {
  155. node.startLine += block.startLine;
  156. node.endLine += block.startLine;
  157. node.language = 'svelte'; // Mark as svelte, not TS/JS
  158. this.nodes.push(node);
  159. // Add containment edge from component to this node
  160. this.edges.push({
  161. source: componentNodeId,
  162. target: node.id,
  163. kind: 'contains',
  164. });
  165. }
  166. // Offset edges (they reference line numbers)
  167. for (const edge of result.edges) {
  168. if (edge.line) {
  169. edge.line += block.startLine;
  170. }
  171. this.edges.push(edge);
  172. }
  173. // Offset unresolved references
  174. for (const ref of result.unresolvedReferences) {
  175. ref.line += block.startLine;
  176. ref.filePath = this.filePath;
  177. ref.language = 'svelte';
  178. this.unresolvedReferences.push(ref);
  179. }
  180. // Carry over errors
  181. for (const error of result.errors) {
  182. if (error.line) {
  183. error.line += block.startLine;
  184. }
  185. this.errors.push(error);
  186. }
  187. }
  188. /**
  189. * Extract function calls from Svelte template expressions.
  190. *
  191. * In Svelte, many function calls happen in markup (e.g., `class={cn(...)}`),
  192. * not inside `<script>` blocks. We scan the template portion for `{expression}`
  193. * blocks and extract call patterns from them.
  194. */
  195. private extractTemplateCalls(
  196. componentNodeId: string,
  197. _scriptBlocks: Array<{ content: string; startLine: number }>
  198. ): void {
  199. // Build a set of line ranges covered by <script> and <style> blocks so we skip them
  200. const coveredRanges: Array<[number, number]> = [];
  201. // Find all <script>...</script> and <style>...</style> ranges
  202. const tagRegex = /<(script|style)(\s[^>]*)?>[\s\S]*?<\/\1>/g;
  203. let tagMatch;
  204. while ((tagMatch = tagRegex.exec(this.source)) !== null) {
  205. const startLine = (this.source.substring(0, tagMatch.index).match(/\n/g) || []).length;
  206. const endLine = startLine + (tagMatch[0].match(/\n/g) || []).length;
  207. coveredRanges.push([startLine, endLine]);
  208. }
  209. // Find template expressions: {...} outside of script/style blocks
  210. // Matches curly-brace expressions, excluding Svelte block syntax ({#if}, {:else}, {/if}, {@html}, {@render})
  211. const lines = this.source.split('\n');
  212. const exprRegex = /\{([^}#/:@][^}]*)\}/g;
  213. for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
  214. // Skip lines inside script/style blocks
  215. if (coveredRanges.some(([start, end]) => lineIdx >= start && lineIdx <= end)) continue;
  216. const line = lines[lineIdx]!;
  217. let exprMatch;
  218. while ((exprMatch = exprRegex.exec(line)) !== null) {
  219. const expr = exprMatch[1]!;
  220. // Extract function calls: identifiers followed by (
  221. // Matches: cn(...), buttonVariants(...), obj.method(...)
  222. const callRegex = /\b([a-zA-Z_$][\w$.]*)\s*\(/g;
  223. let callMatch;
  224. while ((callMatch = callRegex.exec(expr)) !== null) {
  225. const calleeName = callMatch[1]!;
  226. // Skip Svelte runes, control flow keywords, and common non-function patterns
  227. if (SVELTE_RUNES.has(calleeName)) continue;
  228. if (calleeName === 'if' || calleeName === 'else' || calleeName === 'each' || calleeName === 'await') continue;
  229. this.unresolvedReferences.push({
  230. fromNodeId: componentNodeId,
  231. referenceName: calleeName,
  232. referenceKind: 'calls',
  233. line: lineIdx + 1, // 1-indexed
  234. column: exprMatch.index + callMatch.index,
  235. filePath: this.filePath,
  236. language: 'svelte',
  237. });
  238. }
  239. }
  240. }
  241. }
  242. /**
  243. * Extract component usages from the Svelte template.
  244. *
  245. * PascalCase tags like <Modal>, <Button />, <DevServerPreview> represent
  246. * component instantiations — analogous to function calls in imperative code.
  247. * Capturing these creates graph edges from parent to child components and
  248. * gives codegraph_explore anchor points in the template markup.
  249. */
  250. private extractTemplateComponents(componentNodeId: string): void {
  251. // Build ranges covered by <script> and <style> blocks to skip them
  252. const coveredRanges: Array<[number, number]> = [];
  253. const tagRegex = /<(script|style)(\s[^>]*)?>[\s\S]*?<\/\1>/g;
  254. let tagMatch;
  255. while ((tagMatch = tagRegex.exec(this.source)) !== null) {
  256. const startLine = (this.source.substring(0, tagMatch.index).match(/\n/g) || []).length;
  257. const endLine = startLine + (tagMatch[0].match(/\n/g) || []).length;
  258. coveredRanges.push([startLine, endLine]);
  259. }
  260. const lines = this.source.split('\n');
  261. // Match PascalCase opening/self-closing tags (closing tags </Foo> start with </ so won't match)
  262. const componentTagRegex = /<([A-Z][a-zA-Z0-9_$]*)\b/g;
  263. for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
  264. if (coveredRanges.some(([start, end]) => lineIdx >= start && lineIdx <= end)) continue;
  265. const line = lines[lineIdx]!;
  266. let match;
  267. while ((match = componentTagRegex.exec(line)) !== null) {
  268. const componentName = match[1]!;
  269. this.unresolvedReferences.push({
  270. fromNodeId: componentNodeId,
  271. referenceName: componentName,
  272. referenceKind: 'references',
  273. line: lineIdx + 1, // 1-indexed
  274. column: match.index + 1,
  275. filePath: this.filePath,
  276. language: 'svelte',
  277. });
  278. }
  279. }
  280. }
  281. }