razor-extractor.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. import { Node, Edge, ExtractionResult, ExtractionError, UnresolvedReference } from '../types';
  2. import { generateNodeId } from './tree-sitter-helpers';
  3. import { TreeSitterExtractor } from './tree-sitter';
  4. import { isLanguageSupported } from './grammars';
  5. /**
  6. * RazorExtractor — extracts code relationships from ASP.NET Razor (`.cshtml`)
  7. * and Blazor (`.razor`) markup.
  8. *
  9. * Markup-driven code-behind, view-models, components, and DTOs are referenced
  10. * only from markup the engine otherwise doesn't parse, so they look like nothing
  11. * depends on them. This extractor links the markup → the C# types it names:
  12. *
  13. * - `@model Foo` / `@inherits Bar<Foo>` → the view-model / base type (.cshtml + .razor)
  14. * - `@inject IService svc` → the injected service type
  15. * - `@typeof(MainLayout)` → the referenced type
  16. * - `<MyComponent .../>` (Blazor only) → the component class (.razor or `.cs : ComponentBase`)
  17. * - `<Grid TItem="CatalogItem">` → the generic type argument
  18. *
  19. * Risk mitigations (see docs/design/template-markup-parser.md):
  20. * - Only PascalCase (`[A-Z]`-initial) tags are treated as components — HTML
  21. * elements are lowercase, so they never match. Known Blazor framework
  22. * components are skipped (they aren't in-repo, so a ref would just dangle).
  23. * - Exactly ONE `component` node per file; component tags become `references`
  24. * EDGES, never nodes — no per-tag node explosion.
  25. * - Emitted refs are ordinary by-name `references` resolved by the name-matcher;
  26. * `razor` shares the `dotnet` language family with `csharp` (name-matcher.ts)
  27. * so the cross-family gate doesn't drop them.
  28. * - `.cshtml`/`.razor` are registered in grammars.ts so they're indexed.
  29. *
  30. * Out of scope (data-flow / low-value): `asp-for`/`th:field` property-string
  31. * bindings; the C# inside `@code { }` / `@{ }` blocks (noisy regex on embedded C#).
  32. */
  33. /**
  34. * Blazor framework-provided components — invoked by the runtime, not defined
  35. * in-repo, so a reference to them would never resolve. Skip to avoid dangling refs.
  36. */
  37. const BLAZOR_BUILTIN_COMPONENTS = new Set([
  38. 'Router', 'Found', 'NotFound', 'RouteView', 'AuthorizeRouteView', 'LayoutView',
  39. 'CascadingValue', 'CascadingAuthenticationState', 'AuthorizeView', 'Authorized',
  40. 'NotAuthorized', 'Authorizing', 'EditForm', 'DataAnnotationsValidator',
  41. 'ValidationSummary', 'ValidationMessage', 'InputText', 'InputNumber',
  42. 'InputCheckbox', 'InputSelect', 'InputDate', 'InputTextArea', 'InputRadio',
  43. 'InputRadioGroup', 'InputFile', 'PageTitle', 'HeadContent', 'HeadOutlet',
  44. 'Virtualize', 'DynamicComponent', 'ErrorBoundary', 'SectionContent',
  45. 'SectionOutlet', 'FocusOnNavigate', 'NavLink', 'Microsoft',
  46. ]);
  47. export class RazorExtractor {
  48. private filePath: string;
  49. private source: string;
  50. private nodes: Node[] = [];
  51. private edges: Edge[] = [];
  52. private unresolvedReferences: UnresolvedReference[] = [];
  53. private errors: ExtractionError[] = [];
  54. constructor(filePath: string, source: string) {
  55. this.filePath = filePath;
  56. this.source = source;
  57. }
  58. extract(): ExtractionResult {
  59. const startTime = Date.now();
  60. try {
  61. const componentId = this.createComponentNode().id;
  62. this.extractDirectives(componentId);
  63. // Blazor component tags only — `.cshtml` uses HTML + tag helpers, not
  64. // PascalCase component elements.
  65. if (this.filePath.toLowerCase().endsWith('.razor')) {
  66. this.extractComponentTags(componentId);
  67. }
  68. // Delegate the C# in `@code { }` / `@functions { }` / `@{ }` blocks to the
  69. // C# tree-sitter extractor (the Blazor analog of Svelte's <script> block) —
  70. // this is where component logic uses services/DTOs, so it covers the types
  71. // referenced only from component code.
  72. this.processCodeBlocks(componentId);
  73. } catch (error) {
  74. this.errors.push({
  75. message: `Razor extraction error: ${error instanceof Error ? error.message : String(error)}`,
  76. severity: 'error',
  77. code: 'parse_error',
  78. });
  79. }
  80. return {
  81. nodes: this.nodes,
  82. edges: this.edges,
  83. unresolvedReferences: this.unresolvedReferences,
  84. errors: this.errors,
  85. durationMs: Date.now() - startTime,
  86. };
  87. }
  88. private createComponentNode(): Node {
  89. const lines = this.source.split('\n');
  90. const fileName = this.filePath.split(/[/\\]/).pop() || this.filePath;
  91. const componentName = fileName.replace(/\.(razor|cshtml)$/i, '');
  92. const node: Node = {
  93. id: generateNodeId(this.filePath, 'component', componentName, 1),
  94. kind: 'component',
  95. name: componentName,
  96. qualifiedName: `${this.filePath}::${componentName}`,
  97. filePath: this.filePath,
  98. language: 'razor',
  99. startLine: 1,
  100. endLine: lines.length,
  101. startColumn: 0,
  102. endColumn: lines[lines.length - 1]?.length || 0,
  103. isExported: true,
  104. updatedAt: Date.now(),
  105. };
  106. this.nodes.push(node);
  107. return node;
  108. }
  109. /** Last `.`-segment (`App.ViewModels.RegisterModel` → `RegisterModel`). */
  110. private lastSegment(s: string): string {
  111. const i = s.lastIndexOf('.');
  112. return i >= 0 ? s.slice(i + 1) : s;
  113. }
  114. /**
  115. * Split a type expression into the capitalized type names it contains — base
  116. * type plus any generic arguments (`Bar<Foo, Baz>` → `Bar`, `Foo`, `Baz`),
  117. * each reduced to its last namespace segment. Lowercase/keyword tokens drop out.
  118. */
  119. private typeNames(expr: string): string[] {
  120. const out: string[] = [];
  121. for (const raw of expr.split(/[<>,\s]+/)) {
  122. const seg = this.lastSegment(raw.trim());
  123. if (/^[A-Z][A-Za-z0-9_]*$/.test(seg)) out.push(seg);
  124. }
  125. return out;
  126. }
  127. private pushRef(componentId: string, name: string, line: number, column: number): void {
  128. this.unresolvedReferences.push({
  129. fromNodeId: componentId,
  130. referenceName: name,
  131. referenceKind: 'references',
  132. line,
  133. column,
  134. filePath: this.filePath,
  135. language: 'razor',
  136. });
  137. }
  138. private extractDirectives(componentId: string): void {
  139. const lines = this.source.split('\n');
  140. for (let i = 0; i < lines.length; i++) {
  141. const line = lines[i]!;
  142. // `@model Foo` / `@inherits Bar<Foo>` — directive followed by a type.
  143. const dir = line.match(/^\s*@(?:model|inherits)\s+([A-Za-z_][\w.]*(?:\s*<[^>]+>)?)/);
  144. if (dir) for (const t of this.typeNames(dir[1]!)) this.pushRef(componentId, t, i + 1, 0);
  145. // `@inject IService name` — the type is the first token, a name follows.
  146. const inj = line.match(/^\s*@inject\s+([A-Za-z_][\w.]*(?:\s*<[^>]+>)?)\s+[A-Za-z_]/);
  147. if (inj) for (const t of this.typeNames(inj[1]!)) this.pushRef(componentId, t, i + 1, 0);
  148. // `@typeof(X)` anywhere on the line.
  149. for (const m of line.matchAll(/@typeof\(\s*([A-Za-z_][\w.]*)\s*\)/g)) {
  150. const seg = this.lastSegment(m[1]!);
  151. if (/^[A-Z]/.test(seg)) this.pushRef(componentId, seg, i + 1, m.index ?? 0);
  152. }
  153. }
  154. }
  155. private extractComponentTags(componentId: string): void {
  156. const lines = this.source.split('\n');
  157. // PascalCase opening / self-closing tags. Closing tags (`</Foo>`) start with
  158. // `</` and are skipped. HTML elements are lowercase → never match.
  159. const tagRe = /<([A-Z][A-Za-z0-9_]*)\b([^>]*)>/g;
  160. for (let i = 0; i < lines.length; i++) {
  161. const line = lines[i]!;
  162. let m: RegExpExecArray | null;
  163. while ((m = tagRe.exec(line)) !== null) {
  164. const name = m[1]!;
  165. if (BLAZOR_BUILTIN_COMPONENTS.has(name)) continue;
  166. this.pushRef(componentId, name, i + 1, m.index + 1);
  167. // Generic component type arg: `<Grid TItem="CatalogItem">`.
  168. for (const t of (m[2] || '').matchAll(/\bT[A-Za-z]*\s*=\s*"([A-Za-z_][\w.]*)"/g)) {
  169. const seg = this.lastSegment(t[1]!);
  170. if (/^[A-Z]/.test(seg)) this.pushRef(componentId, seg, i + 1, 0);
  171. }
  172. }
  173. }
  174. }
  175. /**
  176. * Find the matching `}` for the `{` at `openIdx`, skipping string literals and
  177. * comments so a brace inside `"{"` / `// }` doesn't throw off the count.
  178. * Returns the index of the closing brace, or -1 if unbalanced.
  179. */
  180. private matchBrace(src: string, openIdx: number): number {
  181. let depth = 0;
  182. for (let i = openIdx; i < src.length; i++) {
  183. const ch = src[i];
  184. if (ch === '"' || ch === "'") {
  185. const quote = ch;
  186. i++;
  187. while (i < src.length && src[i] !== quote) {
  188. if (src[i] === '\\') i++;
  189. i++;
  190. }
  191. continue;
  192. }
  193. if (ch === '/' && src[i + 1] === '/') {
  194. while (i < src.length && src[i] !== '\n') i++;
  195. continue;
  196. }
  197. if (ch === '/' && src[i + 1] === '*') {
  198. i += 2;
  199. while (i < src.length && !(src[i] === '*' && src[i + 1] === '/')) i++;
  200. i++;
  201. continue;
  202. }
  203. if (ch === '{') depth++;
  204. else if (ch === '}') {
  205. depth--;
  206. if (depth === 0) return i;
  207. }
  208. }
  209. return -1;
  210. }
  211. /** `@code { … }` / `@functions { … }` (Blazor) and `@{ … }` (Razor) C# blocks. */
  212. private extractCodeBlocks(): Array<{ content: string; lineOffset: number }> {
  213. const blocks: Array<{ content: string; lineOffset: number }> = [];
  214. const re = /@(?:code|functions)\b\s*\{|@\{/g;
  215. let m: RegExpExecArray | null;
  216. while ((m = re.exec(this.source)) !== null) {
  217. const openIdx = this.source.indexOf('{', m.index);
  218. if (openIdx < 0) continue;
  219. const close = this.matchBrace(this.source, openIdx);
  220. if (close < 0) continue;
  221. const content = this.source.slice(openIdx + 1, close);
  222. // newlines before the content's first char → 0-indexed line of content start
  223. const lineOffset = (this.source.slice(0, openIdx + 1).match(/\n/g) || []).length;
  224. blocks.push({ content, lineOffset });
  225. re.lastIndex = close;
  226. }
  227. return blocks;
  228. }
  229. /**
  230. * Delegate each `@code`/`@functions`/`@{` block's C# to the tree-sitter C#
  231. * extractor and attribute the block's external references (service/DTO calls,
  232. * `new X()`, type uses) to the component. The block is wrapped in a synthetic
  233. * class so tree-sitter parses the component's fields/methods in a class context
  234. * (a Blazor `@code` body compiles into the component's partial class). We keep
  235. * only the dependency references — coverage just needs the edges to external
  236. * types, not per-member nodes. Degrades gracefully if the C# grammar isn't loaded.
  237. */
  238. private processCodeBlocks(componentId: string): void {
  239. if (!isLanguageSupported('csharp')) return;
  240. for (const block of this.extractCodeBlocks()) {
  241. if (!block.content.trim()) continue;
  242. let result: ExtractionResult;
  243. try {
  244. result = new TreeSitterExtractor(
  245. this.filePath,
  246. `class __RazorCode__ {\n${block.content}\n}`,
  247. 'csharp'
  248. ).extract();
  249. } catch {
  250. continue; // grammar not loaded / parse failure — skip this block
  251. }
  252. // The synthetic wrapper adds one line before the block content; map ref
  253. // lines back to the .razor file (display only — coverage is line-agnostic).
  254. for (const ref of result.unresolvedReferences) {
  255. this.unresolvedReferences.push({
  256. ...ref,
  257. fromNodeId: componentId,
  258. line: ref.line + block.lineOffset - 1,
  259. column: ref.column,
  260. filePath: this.filePath,
  261. language: 'razor',
  262. });
  263. }
  264. }
  265. }
  266. }