tree-sitter.ts 104 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733
  1. /**
  2. * Tree-sitter Parser Wrapper
  3. *
  4. * Handles parsing source code and extracting structural information.
  5. */
  6. import { Node as SyntaxNode, Tree } from 'web-tree-sitter';
  7. import * as path from 'path';
  8. import {
  9. Language,
  10. Node,
  11. Edge,
  12. NodeKind,
  13. ExtractionResult,
  14. ExtractionError,
  15. UnresolvedReference,
  16. } from '../types';
  17. import { getParser, detectLanguage, isLanguageSupported } from './grammars';
  18. import { generateNodeId, getNodeText, getChildByField, getPrecedingDocstring } from './tree-sitter-helpers';
  19. import type { LanguageExtractor, ExtractorContext } from './tree-sitter-types';
  20. import { EXTRACTORS } from './languages';
  21. import { LiquidExtractor } from './liquid-extractor';
  22. import { SvelteExtractor } from './svelte-extractor';
  23. import { DfmExtractor } from './dfm-extractor';
  24. import { VueExtractor } from './vue-extractor';
  25. import {
  26. getAllFrameworkResolvers,
  27. getApplicableFrameworks,
  28. } from '../resolution/frameworks';
  29. // Re-export for backward compatibility
  30. export { generateNodeId } from './tree-sitter-helpers';
  31. /**
  32. * Extract the name from a node based on language
  33. */
  34. function extractName(node: SyntaxNode, source: string, extractor: LanguageExtractor): string {
  35. const hookName = extractor.resolveName?.(node, source);
  36. if (hookName) return hookName;
  37. // Try field name first
  38. const nameNode = getChildByField(node, extractor.nameField);
  39. if (nameNode) {
  40. // Unwrap pointer_declarator(s) for C/C++ pointer return types
  41. let resolved = nameNode;
  42. while (resolved.type === 'pointer_declarator') {
  43. const inner = getChildByField(resolved, 'declarator') || resolved.namedChild(0);
  44. if (!inner) break;
  45. resolved = inner;
  46. }
  47. // Handle complex declarators (C/C++)
  48. if (resolved.type === 'function_declarator' || resolved.type === 'declarator') {
  49. const innerName = getChildByField(resolved, 'declarator') || resolved.namedChild(0);
  50. return innerName ? getNodeText(innerName, source) : getNodeText(resolved, source);
  51. }
  52. // Lua: `function t.f()` / `function t:m()` — the name node is a dot/method
  53. // index expression; the simple name is the trailing field/method (the table
  54. // receiver is captured separately via getReceiverType).
  55. if (resolved.type === 'dot_index_expression') {
  56. const field = getChildByField(resolved, 'field');
  57. if (field) return getNodeText(field, source);
  58. }
  59. if (resolved.type === 'method_index_expression') {
  60. const method = getChildByField(resolved, 'method');
  61. if (method) return getNodeText(method, source);
  62. }
  63. return getNodeText(resolved, source);
  64. }
  65. // For Dart method_signature, look inside inner signature types
  66. if (node.type === 'method_signature') {
  67. for (let i = 0; i < node.namedChildCount; i++) {
  68. const child = node.namedChild(i);
  69. if (child && (
  70. child.type === 'function_signature' ||
  71. child.type === 'getter_signature' ||
  72. child.type === 'setter_signature' ||
  73. child.type === 'constructor_signature' ||
  74. child.type === 'factory_constructor_signature'
  75. )) {
  76. // Find identifier inside the inner signature
  77. for (let j = 0; j < child.namedChildCount; j++) {
  78. const inner = child.namedChild(j);
  79. if (inner?.type === 'identifier') {
  80. return getNodeText(inner, source);
  81. }
  82. }
  83. }
  84. }
  85. }
  86. // Arrow/function expressions get their name from the parent variable_declarator,
  87. // not from identifiers in their body. Without this, single-expression arrow
  88. // functions like `const fn = () => someIdentifier` get named "someIdentifier"
  89. // instead of "fn", because the fallback below finds the body identifier.
  90. if (node.type === 'arrow_function' || node.type === 'function_expression') {
  91. return '<anonymous>';
  92. }
  93. // Fall back to first identifier child
  94. for (let i = 0; i < node.namedChildCount; i++) {
  95. const child = node.namedChild(i);
  96. if (
  97. child &&
  98. (child.type === 'identifier' ||
  99. child.type === 'type_identifier' ||
  100. child.type === 'simple_identifier' ||
  101. child.type === 'constant')
  102. ) {
  103. return getNodeText(child, source);
  104. }
  105. }
  106. return '<anonymous>';
  107. }
  108. /**
  109. * Tree-sitter node kinds that represent constructor invocations
  110. * (`new Foo()` and friends). Used by extractInstantiation to emit
  111. * an `instantiates` reference targeting the class name.
  112. */
  113. const INSTANTIATION_KINDS: ReadonlySet<string> = new Set([
  114. 'new_expression', // typescript / javascript / tsx / jsx
  115. 'object_creation_expression', // java / c#
  116. 'instance_creation_expression', // some grammars
  117. ]);
  118. /**
  119. * TreeSitterExtractor - Main extraction class
  120. */
  121. export class TreeSitterExtractor {
  122. private filePath: string;
  123. private language: Language;
  124. private source: string;
  125. private tree: Tree | null = null;
  126. private nodes: Node[] = [];
  127. private edges: Edge[] = [];
  128. private unresolvedReferences: UnresolvedReference[] = [];
  129. private errors: ExtractionError[] = [];
  130. private extractor: LanguageExtractor | null = null;
  131. private nodeStack: string[] = []; // Stack of parent node IDs
  132. private methodIndex: Map<string, string> | null = null; // lookup key → node ID for Pascal defProc lookup
  133. constructor(filePath: string, source: string, language?: Language) {
  134. this.filePath = filePath;
  135. this.source = source;
  136. this.language = language || detectLanguage(filePath, source);
  137. this.extractor = EXTRACTORS[this.language] || null;
  138. }
  139. /**
  140. * Parse and extract from the source code
  141. */
  142. extract(): ExtractionResult {
  143. const startTime = Date.now();
  144. if (!isLanguageSupported(this.language)) {
  145. return {
  146. nodes: [],
  147. edges: [],
  148. unresolvedReferences: [],
  149. errors: [
  150. {
  151. message: `Unsupported language: ${this.language}`,
  152. filePath: this.filePath,
  153. severity: 'error',
  154. code: 'unsupported_language',
  155. },
  156. ],
  157. durationMs: Date.now() - startTime,
  158. };
  159. }
  160. const parser = getParser(this.language);
  161. if (!parser) {
  162. return {
  163. nodes: [],
  164. edges: [],
  165. unresolvedReferences: [],
  166. errors: [
  167. {
  168. message: `Failed to get parser for language: ${this.language}`,
  169. filePath: this.filePath,
  170. severity: 'error',
  171. code: 'parser_error',
  172. },
  173. ],
  174. durationMs: Date.now() - startTime,
  175. };
  176. }
  177. try {
  178. this.tree = parser.parse(this.source) ?? null;
  179. if (!this.tree) {
  180. throw new Error('Parser returned null tree');
  181. }
  182. // Create file node representing the source file
  183. const fileNode: Node = {
  184. id: `file:${this.filePath}`,
  185. kind: 'file',
  186. name: path.basename(this.filePath),
  187. qualifiedName: this.filePath,
  188. filePath: this.filePath,
  189. language: this.language,
  190. startLine: 1,
  191. endLine: this.source.split('\n').length,
  192. startColumn: 0,
  193. endColumn: 0,
  194. isExported: false,
  195. updatedAt: Date.now(),
  196. };
  197. this.nodes.push(fileNode);
  198. // Push file node onto stack so top-level declarations get contains edges
  199. this.nodeStack.push(fileNode.id);
  200. this.visitNode(this.tree.rootNode);
  201. this.nodeStack.pop();
  202. } catch (error) {
  203. const msg = error instanceof Error ? error.message : String(error);
  204. // WASM memory errors leave the module in a corrupted state — all subsequent
  205. // parses would also fail. Re-throw so the worker can detect and crash,
  206. // forcing a clean restart with a fresh heap.
  207. if (msg.includes('memory access out of bounds') || msg.includes('out of memory')) {
  208. throw error;
  209. }
  210. this.errors.push({
  211. message: `Parse error: ${msg}`,
  212. filePath: this.filePath,
  213. severity: 'error',
  214. code: 'parse_error',
  215. });
  216. } finally {
  217. // Free tree-sitter WASM memory immediately — trees hold native heap memory
  218. // invisible to V8's GC that accumulates across thousands of files.
  219. if (this.tree) {
  220. this.tree.delete();
  221. this.tree = null;
  222. }
  223. // Release source string to reduce GC pressure
  224. this.source = '';
  225. }
  226. return {
  227. nodes: this.nodes,
  228. edges: this.edges,
  229. unresolvedReferences: this.unresolvedReferences,
  230. errors: this.errors,
  231. durationMs: Date.now() - startTime,
  232. };
  233. }
  234. /**
  235. * Visit a node and extract information
  236. */
  237. private visitNode(node: SyntaxNode): void {
  238. if (!this.extractor) return;
  239. const nodeType = node.type;
  240. let skipChildren = false;
  241. // Language-specific custom visitor hook
  242. if (this.extractor.visitNode) {
  243. const ctx = this.makeExtractorContext();
  244. const handled = this.extractor.visitNode(node, ctx);
  245. if (handled) return;
  246. }
  247. // Pascal-specific AST handling
  248. if (this.language === 'pascal') {
  249. skipChildren = this.visitPascalNode(node);
  250. if (skipChildren) return;
  251. }
  252. // Check for function declarations
  253. // For Python/Ruby, function_definition inside a class should be treated as method
  254. if (this.extractor.functionTypes.includes(nodeType)) {
  255. if (this.isInsideClassLikeNode() && this.extractor.methodTypes.includes(nodeType)) {
  256. // Inside a class - treat as method
  257. this.extractMethod(node);
  258. skipChildren = true; // extractMethod visits children via visitFunctionBody
  259. } else {
  260. this.extractFunction(node);
  261. skipChildren = true; // extractFunction visits children via visitFunctionBody
  262. }
  263. }
  264. // Check for class declarations
  265. else if (this.extractor.classTypes.includes(nodeType)) {
  266. // Some languages reuse class_declaration for structs/enums (e.g. Swift)
  267. const classification = this.extractor.classifyClassNode?.(node) ?? 'class';
  268. if (classification === 'struct') {
  269. this.extractStruct(node);
  270. } else if (classification === 'enum') {
  271. this.extractEnum(node);
  272. } else if (classification === 'interface') {
  273. this.extractInterface(node);
  274. } else if (classification === 'trait') {
  275. this.extractClass(node, 'trait');
  276. } else {
  277. this.extractClass(node);
  278. }
  279. skipChildren = true; // extractClass visits body children
  280. }
  281. // Extra class node types (e.g. Dart mixin_declaration, extension_declaration)
  282. else if (this.extractor.extraClassNodeTypes?.includes(nodeType)) {
  283. this.extractClass(node);
  284. skipChildren = true;
  285. }
  286. // Check for method declarations (only if not already handled by functionTypes)
  287. else if (this.extractor.methodTypes.includes(nodeType)) {
  288. this.extractMethod(node);
  289. skipChildren = true; // extractMethod visits children via visitFunctionBody
  290. }
  291. // Check for interface/protocol/trait declarations
  292. else if (this.extractor.interfaceTypes.includes(nodeType)) {
  293. this.extractInterface(node);
  294. skipChildren = true; // extractInterface visits body children
  295. }
  296. // Check for struct declarations
  297. else if (this.extractor.structTypes.includes(nodeType)) {
  298. this.extractStruct(node);
  299. skipChildren = true; // extractStruct visits body children
  300. }
  301. // Check for enum declarations
  302. else if (this.extractor.enumTypes.includes(nodeType)) {
  303. this.extractEnum(node);
  304. skipChildren = true; // extractEnum visits body children
  305. }
  306. // Check for type alias declarations (e.g. `type X = ...` in TypeScript)
  307. // For Go, type_spec wraps struct/interface definitions — resolveTypeAliasKind
  308. // detects these and extractTypeAlias creates the correct node kind.
  309. else if (this.extractor.typeAliasTypes.includes(nodeType)) {
  310. skipChildren = this.extractTypeAlias(node);
  311. }
  312. // Check for class properties (e.g. C# property_declaration)
  313. else if (this.extractor.propertyTypes?.includes(nodeType) && this.isInsideClassLikeNode()) {
  314. this.extractProperty(node);
  315. skipChildren = true;
  316. }
  317. // Check for class fields (e.g. Java field_declaration, C# field_declaration)
  318. else if (this.extractor.fieldTypes?.includes(nodeType) && this.isInsideClassLikeNode()) {
  319. this.extractField(node);
  320. skipChildren = true;
  321. }
  322. // Check for variable declarations (const, let, var, etc.)
  323. // Only extract top-level variables (not inside functions/methods)
  324. else if (this.extractor.variableTypes.includes(nodeType) && !this.isInsideClassLikeNode()) {
  325. this.extractVariable(node);
  326. skipChildren = true; // extractVariable handles children
  327. }
  328. // `export_statement` itself is not extracted — the walker descends
  329. // into children, where the inner declaration (lexical_declaration,
  330. // function_declaration, class_declaration, etc.) is dispatched to
  331. // its own extractor. `isExported` walks the parent chain, so the
  332. // exported flag is preserved automatically.
  333. //
  334. // Calling extractExportedVariables here AND descending caused every
  335. // `export const X = ...` to produce two nodes for the same symbol —
  336. // one kind:'variable' from extractExportedVariables and one
  337. // kind:'constant' from extractVariable. The dedicated dispatch is
  338. // the correct one (it picks kind from isConst, captures the
  339. // initializer signature, and walks type annotations); the
  340. // export-statement helper was redundant.
  341. // Check for imports
  342. else if (this.extractor.importTypes.includes(nodeType)) {
  343. this.extractImport(node);
  344. }
  345. // Check for function calls
  346. else if (this.extractor.callTypes.includes(nodeType)) {
  347. this.extractCall(node);
  348. }
  349. // `new Foo(...)` / `Foo::new(...)` / object_creation_expression —
  350. // produce an `instantiates` reference. Children still walked so
  351. // nested calls inside the constructor args (`new Foo(bar())`) get
  352. // their own `calls` refs.
  353. else if (INSTANTIATION_KINDS.has(nodeType)) {
  354. this.extractInstantiation(node);
  355. }
  356. // (Decorator handling lives inside the symbol-creating extractors
  357. // — extractClass / extractFunction / extractProperty — because the
  358. // decorator node sits BEFORE the symbol in the AST and the walker
  359. // would otherwise see the wrong nodeStack head.)
  360. // Rust: `impl Trait for Type { ... }` — creates implements edge from Type to Trait
  361. else if (nodeType === 'impl_item') {
  362. this.extractRustImplItem(node);
  363. }
  364. // Visit children (unless the extract method already visited them)
  365. if (!skipChildren) {
  366. for (let i = 0; i < node.namedChildCount; i++) {
  367. const child = node.namedChild(i);
  368. if (child) {
  369. this.visitNode(child);
  370. }
  371. }
  372. }
  373. }
  374. /**
  375. * Create a Node object
  376. */
  377. private createNode(
  378. kind: NodeKind,
  379. name: string,
  380. node: SyntaxNode,
  381. extra?: Partial<Node>
  382. ): Node | null {
  383. // Skip nodes with empty/missing names — they are not meaningful symbols
  384. // and would cause FK violations when edges reference them (see issue #42)
  385. if (!name) {
  386. return null;
  387. }
  388. const id = generateNodeId(this.filePath, kind, name, node.startPosition.row + 1);
  389. // Some grammars (e.g. Dart) model a function/method body as a *sibling* of
  390. // the signature node, so the declaration node's own range is just the
  391. // signature line. Extend endLine to the resolved body when it sits beyond
  392. // the node so the node spans its body — required for any body-level analysis
  393. // (callees, the callback synthesizer's body scan, context slices). Guarded to
  394. // only ever extend: for child-body grammars the body is within range (no-op).
  395. let endLine = node.endPosition.row + 1;
  396. if (kind === 'function' || kind === 'method') {
  397. const body = this.extractor?.resolveBody?.(node, this.extractor.bodyField);
  398. if (body && body.endPosition.row + 1 > endLine) {
  399. endLine = body.endPosition.row + 1;
  400. }
  401. }
  402. const newNode: Node = {
  403. id,
  404. kind,
  405. name,
  406. qualifiedName: this.buildQualifiedName(name),
  407. filePath: this.filePath,
  408. language: this.language,
  409. startLine: node.startPosition.row + 1,
  410. endLine,
  411. startColumn: node.startPosition.column,
  412. endColumn: node.endPosition.column,
  413. updatedAt: Date.now(),
  414. ...extra,
  415. };
  416. this.nodes.push(newNode);
  417. // Add containment edge from parent
  418. if (this.nodeStack.length > 0) {
  419. const parentId = this.nodeStack[this.nodeStack.length - 1];
  420. if (parentId) {
  421. this.edges.push({
  422. source: parentId,
  423. target: id,
  424. kind: 'contains',
  425. });
  426. }
  427. }
  428. return newNode;
  429. }
  430. /**
  431. * Find first named child whose type is in the given list.
  432. * Used to locate inner type nodes (e.g. enum_specifier inside a typedef).
  433. */
  434. private findChildByTypes(node: SyntaxNode, types: string[]): SyntaxNode | null {
  435. for (let i = 0; i < node.namedChildCount; i++) {
  436. const child = node.namedChild(i);
  437. if (child && types.includes(child.type)) return child;
  438. }
  439. return null;
  440. }
  441. /**
  442. * Build qualified name from node stack
  443. */
  444. private buildQualifiedName(name: string): string {
  445. // Build a qualified name from the semantic hierarchy only (no file path).
  446. // The file path is stored separately in filePath and pollutes FTS if included here.
  447. const parts: string[] = [];
  448. for (const nodeId of this.nodeStack) {
  449. const node = this.nodes.find((n) => n.id === nodeId);
  450. if (node && node.kind !== 'file') {
  451. parts.push(node.name);
  452. }
  453. }
  454. parts.push(name);
  455. return parts.join('::');
  456. }
  457. /**
  458. * Build an ExtractorContext for passing to language-specific visitNode hooks.
  459. */
  460. private makeExtractorContext(): ExtractorContext {
  461. // eslint-disable-next-line @typescript-eslint/no-this-alias
  462. const self = this;
  463. return {
  464. createNode: (kind, name, node, extra) => self.createNode(kind, name, node, extra),
  465. visitNode: (node) => self.visitNode(node),
  466. visitFunctionBody: (body, functionId) => self.visitFunctionBody(body, functionId),
  467. addUnresolvedReference: (ref) => self.unresolvedReferences.push(ref),
  468. pushScope: (nodeId) => self.nodeStack.push(nodeId),
  469. popScope: () => self.nodeStack.pop(),
  470. get filePath() { return self.filePath; },
  471. get source() { return self.source; },
  472. get nodeStack() { return self.nodeStack; },
  473. get nodes() { return self.nodes; },
  474. };
  475. }
  476. /**
  477. * Check if the current node stack indicates we are inside a class-like node
  478. * (class, struct, interface, trait). File nodes do not count as class-like.
  479. */
  480. private isInsideClassLikeNode(): boolean {
  481. if (this.nodeStack.length === 0) return false;
  482. const parentId = this.nodeStack[this.nodeStack.length - 1];
  483. if (!parentId) return false;
  484. const parentNode = this.nodes.find((n) => n.id === parentId);
  485. if (!parentNode) return false;
  486. return (
  487. parentNode.kind === 'class' ||
  488. parentNode.kind === 'struct' ||
  489. parentNode.kind === 'interface' ||
  490. parentNode.kind === 'trait' ||
  491. parentNode.kind === 'enum' ||
  492. parentNode.kind === 'module'
  493. );
  494. }
  495. /**
  496. * Extract a function
  497. */
  498. private extractFunction(node: SyntaxNode, nameOverride?: string): void {
  499. if (!this.extractor) return;
  500. // If the language provides getReceiverType and this function has a receiver
  501. // (e.g., Rust function_item inside an impl block), extract as method instead
  502. if (this.extractor.getReceiverType?.(node, this.source)) {
  503. this.extractMethod(node);
  504. return;
  505. }
  506. // nameOverride is supplied only for explicitly-named anonymous functions the
  507. // caller resolved itself (e.g. arrow values of exported-const object members
  508. // — SvelteKit actions). Inline-object arrows reached by the general walker
  509. // get no override, so they still fall through to the <anonymous> skip below.
  510. let name = nameOverride ?? extractName(node, this.source, this.extractor);
  511. // For arrow functions and function expressions assigned to variables,
  512. // resolve the name from the parent variable_declarator.
  513. // e.g. `export const useAuth = () => { ... }` — the arrow_function node
  514. // has no `name` field; the name lives on the variable_declarator.
  515. if (
  516. !nameOverride &&
  517. name === '<anonymous>' &&
  518. (node.type === 'arrow_function' || node.type === 'function_expression')
  519. ) {
  520. const parent = node.parent;
  521. if (parent?.type === 'variable_declarator') {
  522. const varName = getChildByField(parent, 'name');
  523. if (varName) {
  524. name = getNodeText(varName, this.source);
  525. }
  526. }
  527. }
  528. if (name === '<anonymous>') return; // Skip anonymous functions
  529. // Check for misparse artifacts (e.g. C++ macros causing "namespace detail" functions)
  530. // Skip the node but still visit the body for calls and structural nodes
  531. if (this.extractor.isMisparsedFunction?.(name, node)) {
  532. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  533. ?? getChildByField(node, this.extractor.bodyField);
  534. if (body) {
  535. this.visitFunctionBody(body, '');
  536. }
  537. return;
  538. }
  539. const docstring = getPrecedingDocstring(node, this.source);
  540. const signature = this.extractor.getSignature?.(node, this.source);
  541. const visibility = this.extractor.getVisibility?.(node);
  542. const isExported = this.extractor.isExported?.(node, this.source);
  543. const isAsync = this.extractor.isAsync?.(node);
  544. const isStatic = this.extractor.isStatic?.(node);
  545. const funcNode = this.createNode('function', name, node, {
  546. docstring,
  547. signature,
  548. visibility,
  549. isExported,
  550. isAsync,
  551. isStatic,
  552. });
  553. if (!funcNode) return;
  554. // Extract type annotations (parameter types and return type)
  555. this.extractTypeAnnotations(node, funcNode.id);
  556. // Extract decorators applied to the function (rare in JS/TS but
  557. // present in Python `@decorator def f():` and Java/Kotlin
  558. // annotations on free functions).
  559. this.extractDecoratorsFor(node, funcNode.id);
  560. // Push to stack and visit body
  561. this.nodeStack.push(funcNode.id);
  562. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  563. ?? getChildByField(node, this.extractor.bodyField);
  564. if (body) {
  565. this.visitFunctionBody(body, funcNode.id);
  566. }
  567. this.nodeStack.pop();
  568. }
  569. /**
  570. * Extract a class
  571. */
  572. private extractClass(node: SyntaxNode, kind: NodeKind = 'class'): void {
  573. if (!this.extractor) return;
  574. const name = extractName(node, this.source, this.extractor);
  575. const docstring = getPrecedingDocstring(node, this.source);
  576. const visibility = this.extractor.getVisibility?.(node);
  577. const isExported = this.extractor.isExported?.(node, this.source);
  578. const classNode = this.createNode(kind, name, node, {
  579. docstring,
  580. visibility,
  581. isExported,
  582. });
  583. if (!classNode) return;
  584. // Extract extends/implements
  585. this.extractInheritance(node, classNode.id);
  586. // Extract decorators applied to the class (`@Foo class X {}`).
  587. this.extractDecoratorsFor(node, classNode.id);
  588. // Push to stack and visit body
  589. this.nodeStack.push(classNode.id);
  590. let body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  591. ?? getChildByField(node, this.extractor.bodyField);
  592. if (!body) body = node;
  593. // Visit all children for methods and properties
  594. for (let i = 0; i < body.namedChildCount; i++) {
  595. const child = body.namedChild(i);
  596. if (child) {
  597. this.visitNode(child);
  598. }
  599. }
  600. this.nodeStack.pop();
  601. }
  602. /**
  603. * Extract a method
  604. */
  605. private extractMethod(node: SyntaxNode): void {
  606. if (!this.extractor) return;
  607. // For languages with receiver types (Go, Rust), include receiver in qualified name
  608. // so FTS can match "scrapeLoop.run" → qualified_name "...::scrapeLoop::run"
  609. const receiverType = this.extractor.getReceiverType?.(node, this.source);
  610. // For most languages, only extract as method if inside a class-like node
  611. // Languages with methodsAreTopLevel (e.g. Go) always treat them as methods
  612. // Languages with getReceiverType (e.g. Rust) extract as method when receiver is found
  613. if (!this.isInsideClassLikeNode() && !this.extractor.methodsAreTopLevel && !receiverType) {
  614. // Skip method_definition nodes inside object literals (getters/setters/methods
  615. // in inline objects). These are ephemeral and create noise (e.g., Svelte context
  616. // objects: `ctx.set({ get view() { ... } })`).
  617. if (node.parent?.type === 'object' || node.parent?.type === 'object_expression') {
  618. return;
  619. }
  620. // Not inside a class-like node and no receiver type, treat as function
  621. this.extractFunction(node);
  622. return;
  623. }
  624. const name = extractName(node, this.source, this.extractor);
  625. // Check for misparse artifacts (e.g. C++ "switch" inside macro-confused class body)
  626. if (this.extractor.isMisparsedFunction?.(name, node)) {
  627. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  628. ?? getChildByField(node, this.extractor.bodyField);
  629. if (body) {
  630. this.visitFunctionBody(body, '');
  631. }
  632. return;
  633. }
  634. const docstring = getPrecedingDocstring(node, this.source);
  635. const signature = this.extractor.getSignature?.(node, this.source);
  636. const visibility = this.extractor.getVisibility?.(node);
  637. const isAsync = this.extractor.isAsync?.(node);
  638. const isStatic = this.extractor.isStatic?.(node);
  639. const extraProps: Partial<Node> = {
  640. docstring,
  641. signature,
  642. visibility,
  643. isAsync,
  644. isStatic,
  645. };
  646. if (receiverType) {
  647. extraProps.qualifiedName = `${receiverType}::${name}`;
  648. }
  649. const methodNode = this.createNode('method', name, node, extraProps);
  650. if (!methodNode) return;
  651. // For methods with a receiver type but no class-like parent on the stack
  652. // (e.g., Rust impl blocks), add a contains edge from the owning struct/trait
  653. if (receiverType && !this.isInsideClassLikeNode()) {
  654. const ownerNode = this.nodes.find(
  655. (n) =>
  656. n.name === receiverType &&
  657. n.filePath === this.filePath &&
  658. (n.kind === 'struct' || n.kind === 'class' || n.kind === 'enum' || n.kind === 'trait')
  659. );
  660. if (ownerNode) {
  661. this.edges.push({
  662. source: ownerNode.id,
  663. target: methodNode.id,
  664. kind: 'contains',
  665. });
  666. }
  667. }
  668. // Extract type annotations (parameter types and return type)
  669. this.extractTypeAnnotations(node, methodNode.id);
  670. // Extract decorators (`@Get('/list') list() {}`).
  671. this.extractDecoratorsFor(node, methodNode.id);
  672. // Push to stack and visit body
  673. this.nodeStack.push(methodNode.id);
  674. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  675. ?? getChildByField(node, this.extractor.bodyField);
  676. if (body) {
  677. this.visitFunctionBody(body, methodNode.id);
  678. }
  679. this.nodeStack.pop();
  680. }
  681. /**
  682. * Extract an interface/protocol/trait
  683. */
  684. private extractInterface(node: SyntaxNode): void {
  685. if (!this.extractor) return;
  686. const name = extractName(node, this.source, this.extractor);
  687. const docstring = getPrecedingDocstring(node, this.source);
  688. const isExported = this.extractor.isExported?.(node, this.source);
  689. const kind: NodeKind = this.extractor.interfaceKind ?? 'interface';
  690. const interfaceNode = this.createNode(kind, name, node, {
  691. docstring,
  692. isExported,
  693. });
  694. if (!interfaceNode) return;
  695. // Extract extends (interface inheritance)
  696. this.extractInheritance(node, interfaceNode.id);
  697. // Visit body children for interface methods and nested types
  698. this.nodeStack.push(interfaceNode.id);
  699. let body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  700. ?? getChildByField(node, this.extractor.bodyField);
  701. if (!body) body = node;
  702. for (let i = 0; i < body.namedChildCount; i++) {
  703. const child = body.namedChild(i);
  704. if (child) {
  705. this.visitNode(child);
  706. }
  707. }
  708. this.nodeStack.pop();
  709. }
  710. /**
  711. * Extract a struct
  712. */
  713. private extractStruct(node: SyntaxNode): void {
  714. if (!this.extractor) return;
  715. // Skip forward declarations and type references (no body = not a definition)
  716. const body = getChildByField(node, this.extractor.bodyField);
  717. if (!body) return;
  718. const name = extractName(node, this.source, this.extractor);
  719. const docstring = getPrecedingDocstring(node, this.source);
  720. const visibility = this.extractor.getVisibility?.(node);
  721. const isExported = this.extractor.isExported?.(node, this.source);
  722. const structNode = this.createNode('struct', name, node, {
  723. docstring,
  724. visibility,
  725. isExported,
  726. });
  727. if (!structNode) return;
  728. // Extract inheritance (e.g. Swift: struct HTTPMethod: RawRepresentable)
  729. this.extractInheritance(node, structNode.id);
  730. // Push to stack for field extraction
  731. this.nodeStack.push(structNode.id);
  732. for (let i = 0; i < body.namedChildCount; i++) {
  733. const child = body.namedChild(i);
  734. if (child) {
  735. this.visitNode(child);
  736. }
  737. }
  738. this.nodeStack.pop();
  739. }
  740. /**
  741. * Extract an enum
  742. */
  743. private extractEnum(node: SyntaxNode): void {
  744. if (!this.extractor) return;
  745. // Skip forward declarations and type references (no body = not a definition)
  746. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  747. ?? getChildByField(node, this.extractor.bodyField);
  748. if (!body) return;
  749. const name = extractName(node, this.source, this.extractor);
  750. const docstring = getPrecedingDocstring(node, this.source);
  751. const visibility = this.extractor.getVisibility?.(node);
  752. const isExported = this.extractor.isExported?.(node, this.source);
  753. const enumNode = this.createNode('enum', name, node, {
  754. docstring,
  755. visibility,
  756. isExported,
  757. });
  758. if (!enumNode) return;
  759. // Extract inheritance (e.g. Swift: enum AFError: Error)
  760. this.extractInheritance(node, enumNode.id);
  761. // Push to stack and visit body children (enum members, nested types, methods)
  762. this.nodeStack.push(enumNode.id);
  763. const memberTypes = this.extractor.enumMemberTypes;
  764. for (let i = 0; i < body.namedChildCount; i++) {
  765. const child = body.namedChild(i);
  766. if (!child) continue;
  767. if (memberTypes?.includes(child.type)) {
  768. this.extractEnumMembers(child);
  769. } else {
  770. this.visitNode(child);
  771. }
  772. }
  773. this.nodeStack.pop();
  774. }
  775. /**
  776. * Extract enum member names from an enum member node.
  777. * Handles multi-case declarations (Swift: `case put, delete`) and single-case patterns.
  778. */
  779. private extractEnumMembers(node: SyntaxNode): void {
  780. // Try field-based name first (e.g. Rust enum_variant has a 'name' field)
  781. const nameNode = getChildByField(node, 'name');
  782. if (nameNode) {
  783. this.createNode('enum_member', getNodeText(nameNode, this.source), node);
  784. return;
  785. }
  786. // Check for identifier-like children (Swift: simple_identifier, TS: property_identifier)
  787. let found = false;
  788. for (let i = 0; i < node.namedChildCount; i++) {
  789. const child = node.namedChild(i);
  790. if (child && (child.type === 'simple_identifier' || child.type === 'identifier' || child.type === 'property_identifier')) {
  791. this.createNode('enum_member', getNodeText(child, this.source), child);
  792. found = true;
  793. }
  794. }
  795. // If the node itself IS the identifier (e.g. TS property_identifier directly in enum body)
  796. if (!found && node.namedChildCount === 0) {
  797. this.createNode('enum_member', getNodeText(node, this.source), node);
  798. }
  799. }
  800. /**
  801. * Extract a class property declaration (e.g. C# `public string Name { get; set; }`).
  802. * Extracts as 'property' kind node inside the owning class.
  803. */
  804. private extractProperty(node: SyntaxNode): void {
  805. if (!this.extractor) return;
  806. const docstring = getPrecedingDocstring(node, this.source);
  807. const visibility = this.extractor.getVisibility?.(node);
  808. const isStatic = this.extractor.isStatic?.(node) ?? false;
  809. const hookName = this.extractor.extractPropertyName?.(node, this.source);
  810. const nameNode = hookName
  811. ? null
  812. : getChildByField(node, 'name') || node.namedChildren.find(c => c.type === 'identifier');
  813. const name = hookName ?? (nameNode ? getNodeText(nameNode, this.source) : null);
  814. if (!name) return;
  815. // Get property type from the type child (first named child that isn't modifier or identifier)
  816. const typeNode = node.namedChildren.find(
  817. c => c.type !== 'modifier' && c.type !== 'modifiers'
  818. && c.type !== 'identifier' && c.type !== 'accessor_list'
  819. && c.type !== 'accessors' && c.type !== 'equals_value_clause'
  820. );
  821. const typeText = typeNode ? getNodeText(typeNode, this.source) : undefined;
  822. const signature = typeText ? `${typeText} ${name}` : name;
  823. const propNode = this.createNode('property', name, node, {
  824. docstring,
  825. signature,
  826. visibility,
  827. isStatic,
  828. });
  829. // `@Inject() private svc: Foo` and similar — capture the
  830. // decorator->target relationship for class properties too.
  831. if (propNode) {
  832. this.extractDecoratorsFor(node, propNode.id);
  833. }
  834. }
  835. /**
  836. * Extract a class field declaration (e.g. Java field_declaration, C# field_declaration).
  837. * Extracts each declarator as a 'field' kind node inside the owning class.
  838. */
  839. private extractField(node: SyntaxNode): void {
  840. if (!this.extractor) return;
  841. const docstring = getPrecedingDocstring(node, this.source);
  842. const visibility = this.extractor.getVisibility?.(node);
  843. const isStatic = this.extractor.isStatic?.(node) ?? false;
  844. // Java field_declaration: "private final String name = value;" → variable_declarator(s) are direct children
  845. // C# field_declaration: wraps in variable_declaration → variable_declarator(s)
  846. let declarators = node.namedChildren.filter(
  847. c => c.type === 'variable_declarator'
  848. );
  849. // C#: look inside variable_declaration wrapper
  850. if (declarators.length === 0) {
  851. const varDecl = node.namedChildren.find(c => c.type === 'variable_declaration');
  852. if (varDecl) {
  853. declarators = varDecl.namedChildren.filter(c => c.type === 'variable_declarator');
  854. }
  855. }
  856. // PHP property_declaration: property_element → variable_name → name
  857. if (declarators.length === 0) {
  858. const propElements = node.namedChildren.filter(c => c.type === 'property_element');
  859. if (propElements.length > 0) {
  860. // Get type annotation if present (e.g. "string", "int", "?Foo")
  861. const typeNode = node.namedChildren.find(
  862. c => c.type !== 'visibility_modifier' && c.type !== 'static_modifier'
  863. && c.type !== 'readonly_modifier' && c.type !== 'property_element'
  864. && c.type !== 'var_modifier'
  865. );
  866. const typeText = typeNode ? getNodeText(typeNode, this.source) : undefined;
  867. for (const elem of propElements) {
  868. const varName = elem.namedChildren.find(c => c.type === 'variable_name');
  869. const nameNode = varName?.namedChildren.find(c => c.type === 'name');
  870. if (!nameNode) continue;
  871. const name = getNodeText(nameNode, this.source);
  872. const signature = typeText ? `${typeText} $${name}` : `$${name}`;
  873. this.createNode('field', name, elem, {
  874. docstring,
  875. signature,
  876. visibility,
  877. isStatic,
  878. });
  879. }
  880. return;
  881. }
  882. }
  883. if (declarators.length > 0) {
  884. // Get field type from the type child
  885. // Java: type is a direct child of field_declaration
  886. // C#: type is inside variable_declaration wrapper
  887. const varDecl = node.namedChildren.find(c => c.type === 'variable_declaration');
  888. const typeSearchNode = varDecl ?? node;
  889. const typeNode = typeSearchNode.namedChildren.find(
  890. c => c.type !== 'modifiers' && c.type !== 'modifier' && c.type !== 'variable_declarator'
  891. && c.type !== 'variable_declaration' && c.type !== 'marker_annotation' && c.type !== 'annotation'
  892. );
  893. const typeText = typeNode ? getNodeText(typeNode, this.source) : undefined;
  894. for (const decl of declarators) {
  895. const nameNode = getChildByField(decl, 'name')
  896. || decl.namedChildren.find(c => c.type === 'identifier');
  897. if (!nameNode) continue;
  898. const name = getNodeText(nameNode, this.source);
  899. const signature = typeText ? `${typeText} ${name}` : name;
  900. const fieldNode = this.createNode('field', name, decl, {
  901. docstring,
  902. signature,
  903. visibility,
  904. isStatic,
  905. });
  906. // Java/Kotlin annotations / TS field decorators sit on the
  907. // outer field_declaration, not on the individual declarator.
  908. if (fieldNode) this.extractDecoratorsFor(node, fieldNode.id);
  909. }
  910. } else {
  911. // Fallback: try to find an identifier child directly
  912. const nameNode = getChildByField(node, 'name')
  913. || node.namedChildren.find(c => c.type === 'identifier');
  914. if (nameNode) {
  915. const name = getNodeText(nameNode, this.source);
  916. this.createNode('field', name, node, {
  917. docstring,
  918. visibility,
  919. isStatic,
  920. });
  921. }
  922. }
  923. }
  924. /**
  925. * Extract a variable declaration (const, let, var, etc.)
  926. *
  927. * Extracts top-level and module-level variable declarations.
  928. * Captures the variable name and first 100 chars of initializer in signature for searchability.
  929. */
  930. private extractVariable(node: SyntaxNode): void {
  931. if (!this.extractor) return;
  932. // Different languages have different variable declaration structures
  933. // TypeScript/JavaScript: lexical_declaration contains variable_declarator children
  934. // Python: assignment has left (identifier) and right (value)
  935. // Go: var_declaration, short_var_declaration, const_declaration
  936. const isConst = this.extractor.isConst?.(node) ?? false;
  937. const kind: NodeKind = isConst ? 'constant' : 'variable';
  938. const docstring = getPrecedingDocstring(node, this.source);
  939. const isExported = this.extractor.isExported?.(node, this.source) ?? false;
  940. // Extract variable declarators based on language
  941. if (this.language === 'typescript' || this.language === 'javascript' ||
  942. this.language === 'tsx' || this.language === 'jsx') {
  943. // Handle lexical_declaration and variable_declaration
  944. // These contain one or more variable_declarator children
  945. for (let i = 0; i < node.namedChildCount; i++) {
  946. const child = node.namedChild(i);
  947. if (child?.type === 'variable_declarator') {
  948. const nameNode = getChildByField(child, 'name');
  949. const valueNode = getChildByField(child, 'value');
  950. if (nameNode) {
  951. // Skip destructured patterns (e.g., `let { x, y } = $props()` in Svelte)
  952. // These produce ugly multi-line names like "{ class: className }"
  953. if (nameNode.type === 'object_pattern' || nameNode.type === 'array_pattern') {
  954. continue;
  955. }
  956. const name = getNodeText(nameNode, this.source);
  957. // Arrow functions / function expressions: extract as function instead of variable
  958. if (valueNode && (valueNode.type === 'arrow_function' || valueNode.type === 'function_expression')) {
  959. this.extractFunction(valueNode);
  960. continue;
  961. }
  962. // Capture first 100 chars of initializer for context (stored in signature for searchability)
  963. const initValue = valueNode ? getNodeText(valueNode, this.source).slice(0, 100) : undefined;
  964. const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
  965. const varNode = this.createNode(kind, name, child, {
  966. docstring,
  967. signature: initSignature,
  968. isExported,
  969. });
  970. // Extract type annotation references (e.g., const x: ITextModel = ...)
  971. if (varNode) {
  972. this.extractVariableTypeAnnotation(child, varNode.id);
  973. }
  974. // Exported const object-of-functions: `export const actions =
  975. // { default: async () => {} }` (SvelteKit form actions / handler maps
  976. // / route tables). Extract each function-valued property as a function
  977. // named by its key + walk its body so its calls (e.g. api.post) are
  978. // captured. Scoped to EXPORTED consts to exclude the inline-object
  979. // noise (`ctx.set({...})`) the object-method skip deliberately avoids.
  980. if (isExported && valueNode &&
  981. (valueNode.type === 'object' || valueNode.type === 'object_expression')) {
  982. for (let j = 0; j < valueNode.namedChildCount; j++) {
  983. const pair = valueNode.namedChild(j);
  984. if (pair?.type !== 'pair') continue;
  985. const v = getChildByField(pair, 'value');
  986. const k = getChildByField(pair, 'key');
  987. if (k && v && (v.type === 'arrow_function' || v.type === 'function_expression')) {
  988. this.extractFunction(v, getNodeText(k, this.source).replace(/^['"`]|['"`]$/g, ''));
  989. }
  990. }
  991. }
  992. }
  993. }
  994. }
  995. } else if (this.language === 'python' || this.language === 'ruby') {
  996. // Python/Ruby assignment: left = right
  997. const left = getChildByField(node, 'left') || node.namedChild(0);
  998. const right = getChildByField(node, 'right') || node.namedChild(1);
  999. if (left && left.type === 'identifier') {
  1000. const name = getNodeText(left, this.source);
  1001. // Skip if name starts with lowercase and looks like a function call result
  1002. // Python constants are usually UPPER_CASE
  1003. const initValue = right ? getNodeText(right, this.source).slice(0, 100) : undefined;
  1004. const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
  1005. this.createNode(kind, name, node, {
  1006. docstring,
  1007. signature: initSignature,
  1008. });
  1009. }
  1010. } else if (this.language === 'go') {
  1011. // Go: var_declaration, short_var_declaration, const_declaration
  1012. // These can have multiple identifiers on the left
  1013. const specs = node.namedChildren.filter(c =>
  1014. c.type === 'var_spec' || c.type === 'const_spec'
  1015. );
  1016. for (const spec of specs) {
  1017. const nameNode = spec.namedChild(0);
  1018. if (nameNode && nameNode.type === 'identifier') {
  1019. const name = getNodeText(nameNode, this.source);
  1020. const valueNode = spec.namedChildCount > 1 ? spec.namedChild(spec.namedChildCount - 1) : null;
  1021. const initValue = valueNode ? getNodeText(valueNode, this.source).slice(0, 100) : undefined;
  1022. const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
  1023. this.createNode(node.type === 'const_declaration' ? 'constant' : 'variable', name, spec, {
  1024. docstring,
  1025. signature: initSignature,
  1026. });
  1027. }
  1028. }
  1029. // Handle short_var_declaration (:=)
  1030. if (node.type === 'short_var_declaration') {
  1031. const left = getChildByField(node, 'left');
  1032. const right = getChildByField(node, 'right');
  1033. if (left) {
  1034. // Can be expression_list with multiple identifiers
  1035. const identifiers = left.type === 'expression_list'
  1036. ? left.namedChildren.filter(c => c.type === 'identifier')
  1037. : [left];
  1038. for (const id of identifiers) {
  1039. const name = getNodeText(id, this.source);
  1040. const initValue = right ? getNodeText(right, this.source).slice(0, 100) : undefined;
  1041. const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
  1042. this.createNode('variable', name, node, {
  1043. docstring,
  1044. signature: initSignature,
  1045. });
  1046. }
  1047. }
  1048. }
  1049. } else if (this.language === 'lua' || this.language === 'luau') {
  1050. // Lua/Luau: variable_declaration → assignment_statement → variable_list
  1051. // (name: identifier...) = expression_list. `local x, y = 1, 2`
  1052. // declares multiple names; only plain identifiers are locals.
  1053. const assign = node.namedChildren.find((c) => c.type === 'assignment_statement') ?? node;
  1054. const varList = assign.namedChildren.find((c) => c.type === 'variable_list');
  1055. const exprList = assign.namedChildren.find((c) => c.type === 'expression_list');
  1056. const values = exprList ? exprList.namedChildren : [];
  1057. const names = varList ? varList.namedChildren.filter((c) => c.type === 'identifier') : [];
  1058. names.forEach((nameNode, i) => {
  1059. const name = getNodeText(nameNode, this.source);
  1060. if (!name) return;
  1061. const valueNode = values[i];
  1062. const initValue = valueNode ? getNodeText(valueNode, this.source).slice(0, 100) : undefined;
  1063. const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
  1064. this.createNode(kind, name, nameNode, { docstring, signature: initSignature, isExported });
  1065. });
  1066. } else {
  1067. // Generic fallback for other languages
  1068. // Try to find identifier children
  1069. for (let i = 0; i < node.namedChildCount; i++) {
  1070. const child = node.namedChild(i);
  1071. if (child?.type === 'identifier' || child?.type === 'variable_declarator') {
  1072. const name = child.type === 'identifier'
  1073. ? getNodeText(child, this.source)
  1074. : extractName(child, this.source, this.extractor);
  1075. if (name && name !== '<anonymous>') {
  1076. this.createNode(kind, name, child, {
  1077. docstring,
  1078. isExported,
  1079. });
  1080. }
  1081. }
  1082. }
  1083. }
  1084. }
  1085. /**
  1086. * Extract a type alias (e.g. `export type X = ...` in TypeScript).
  1087. * For languages like Go, resolveTypeAliasKind detects when the type_spec
  1088. * wraps a struct or interface definition and creates the correct node kind.
  1089. * Returns true if children should be skipped (struct/interface handled body visiting).
  1090. */
  1091. private extractTypeAlias(node: SyntaxNode): boolean {
  1092. if (!this.extractor) return false;
  1093. const name = extractName(node, this.source, this.extractor);
  1094. if (name === '<anonymous>') return false;
  1095. const docstring = getPrecedingDocstring(node, this.source);
  1096. const isExported = this.extractor.isExported?.(node, this.source);
  1097. // Check if this type alias is actually a struct or interface definition
  1098. // (e.g. Go: `type Foo struct { ... }` is a type_spec wrapping struct_type)
  1099. const resolvedKind = this.extractor.resolveTypeAliasKind?.(node, this.source);
  1100. if (resolvedKind === 'struct') {
  1101. const structNode = this.createNode('struct', name, node, { docstring, isExported });
  1102. if (!structNode) return true;
  1103. // Visit body children for field extraction
  1104. this.nodeStack.push(structNode.id);
  1105. // Try Go-style 'type' field first, then find inner struct child (C typedef struct)
  1106. const typeChild = getChildByField(node, 'type')
  1107. || this.findChildByTypes(node, this.extractor.structTypes);
  1108. if (typeChild) {
  1109. // Extract struct embedding (e.g. Go: `type DB struct { *Head; Queryable }`)
  1110. this.extractInheritance(typeChild, structNode.id);
  1111. const body = getChildByField(typeChild, this.extractor.bodyField) || typeChild;
  1112. for (let i = 0; i < body.namedChildCount; i++) {
  1113. const child = body.namedChild(i);
  1114. if (child) this.visitNode(child);
  1115. }
  1116. }
  1117. this.nodeStack.pop();
  1118. return true;
  1119. }
  1120. if (resolvedKind === 'enum') {
  1121. const enumNode = this.createNode('enum', name, node, { docstring, isExported });
  1122. if (!enumNode) return true;
  1123. this.nodeStack.push(enumNode.id);
  1124. // Find the inner enum type child (e.g. C: typedef enum { ... } name)
  1125. const innerEnum = this.findChildByTypes(node, this.extractor.enumTypes);
  1126. if (innerEnum) {
  1127. this.extractInheritance(innerEnum, enumNode.id);
  1128. const body = this.extractor.resolveBody?.(innerEnum, this.extractor.bodyField)
  1129. ?? getChildByField(innerEnum, this.extractor.bodyField);
  1130. if (body) {
  1131. const memberTypes = this.extractor.enumMemberTypes;
  1132. for (let i = 0; i < body.namedChildCount; i++) {
  1133. const child = body.namedChild(i);
  1134. if (!child) continue;
  1135. if (memberTypes?.includes(child.type)) {
  1136. this.extractEnumMembers(child);
  1137. } else {
  1138. this.visitNode(child);
  1139. }
  1140. }
  1141. }
  1142. }
  1143. this.nodeStack.pop();
  1144. return true;
  1145. }
  1146. if (resolvedKind === 'interface') {
  1147. const kind: NodeKind = this.extractor.interfaceKind ?? 'interface';
  1148. const interfaceNode = this.createNode(kind, name, node, { docstring, isExported });
  1149. if (!interfaceNode) return true;
  1150. // Extract interface inheritance from the inner type node
  1151. const typeChild = getChildByField(node, 'type');
  1152. if (typeChild) this.extractInheritance(typeChild, interfaceNode.id);
  1153. return true;
  1154. }
  1155. const typeAliasNode = this.createNode('type_alias', name, node, {
  1156. docstring,
  1157. isExported,
  1158. });
  1159. // Extract type references from the alias value (e.g., `type X = ITextModel | null`)
  1160. if (typeAliasNode && this.TYPE_ANNOTATION_LANGUAGES.has(this.language)) {
  1161. // The value is everything after the `=`, which is typically the last named child
  1162. // In tree-sitter TS: type_alias_declaration has name + value children
  1163. const value = getChildByField(node, 'value');
  1164. if (value) {
  1165. this.extractTypeRefsFromSubtree(value, typeAliasNode.id);
  1166. }
  1167. }
  1168. return false;
  1169. }
  1170. // extractExportedVariables removed — the walker now descends into
  1171. // export_statement children and the inner declaration's dedicated
  1172. // extractor (extractVariable, extractFunction, extractClass, etc.)
  1173. // handles the symbol with isExported=true via parent-walk in the
  1174. // language extractor's isExported predicate.
  1175. /**
  1176. * Extract an import
  1177. *
  1178. * Creates an import node with the full import statement stored in signature for searchability.
  1179. * Also creates unresolved references for resolution purposes.
  1180. */
  1181. private extractImport(node: SyntaxNode): void {
  1182. if (!this.extractor) return;
  1183. const importText = getNodeText(node, this.source).trim();
  1184. // Try language-specific hook first
  1185. if (this.extractor.extractImport) {
  1186. const info = this.extractor.extractImport(node, this.source);
  1187. if (info) {
  1188. this.createNode('import', info.moduleName, node, {
  1189. signature: info.signature,
  1190. });
  1191. // Create unresolved reference unless the hook handled it
  1192. if (!info.handledRefs && info.moduleName && this.nodeStack.length > 0) {
  1193. const parentId = this.nodeStack[this.nodeStack.length - 1];
  1194. if (parentId) {
  1195. this.unresolvedReferences.push({
  1196. fromNodeId: parentId,
  1197. referenceName: info.moduleName,
  1198. referenceKind: 'imports',
  1199. line: node.startPosition.row + 1,
  1200. column: node.startPosition.column,
  1201. });
  1202. }
  1203. }
  1204. return;
  1205. }
  1206. // Hook returned null — fall through to multi-import inline handlers only
  1207. // (hook returning null means "I didn't handle this" for multi-import cases,
  1208. // NOT "use generic fallback" — the hook already declined)
  1209. }
  1210. // Multi-import cases that create multiple nodes (can't be expressed with single-return hook)
  1211. // Python import_statement: import os, sys (creates one import per module)
  1212. if (this.language === 'python' && node.type === 'import_statement') {
  1213. for (let i = 0; i < node.namedChildCount; i++) {
  1214. const child = node.namedChild(i);
  1215. if (child?.type === 'dotted_name') {
  1216. this.createNode('import', getNodeText(child, this.source), node, {
  1217. signature: importText,
  1218. });
  1219. } else if (child?.type === 'aliased_import') {
  1220. const dottedName = child.namedChildren.find(c => c.type === 'dotted_name');
  1221. if (dottedName) {
  1222. this.createNode('import', getNodeText(dottedName, this.source), node, {
  1223. signature: importText,
  1224. });
  1225. }
  1226. }
  1227. }
  1228. return;
  1229. }
  1230. // Go imports: single or grouped (creates one import per spec)
  1231. if (this.language === 'go') {
  1232. const parentId = this.nodeStack.length > 0 ? this.nodeStack[this.nodeStack.length - 1] : null;
  1233. const extractFromSpec = (spec: SyntaxNode): void => {
  1234. const stringLiteral = spec.namedChildren.find(c => c.type === 'interpreted_string_literal');
  1235. if (stringLiteral) {
  1236. const importPath = getNodeText(stringLiteral, this.source).replace(/['"]/g, '');
  1237. if (importPath) {
  1238. this.createNode('import', importPath, spec, {
  1239. signature: getNodeText(spec, this.source).trim(),
  1240. });
  1241. // Create unresolved reference so the resolver can create imports edges
  1242. if (parentId) {
  1243. this.unresolvedReferences.push({
  1244. fromNodeId: parentId,
  1245. referenceName: importPath,
  1246. referenceKind: 'imports',
  1247. line: spec.startPosition.row + 1,
  1248. column: spec.startPosition.column,
  1249. });
  1250. }
  1251. }
  1252. }
  1253. };
  1254. const importSpecList = node.namedChildren.find(c => c.type === 'import_spec_list');
  1255. if (importSpecList) {
  1256. for (const spec of importSpecList.namedChildren.filter(c => c.type === 'import_spec')) {
  1257. extractFromSpec(spec);
  1258. }
  1259. } else {
  1260. const importSpec = node.namedChildren.find(c => c.type === 'import_spec');
  1261. if (importSpec) {
  1262. extractFromSpec(importSpec);
  1263. }
  1264. }
  1265. return;
  1266. }
  1267. // PHP grouped imports: use X\{A, B} (creates one import per item)
  1268. if (this.language === 'php') {
  1269. const namespacePrefix = node.namedChildren.find(c => c.type === 'namespace_name');
  1270. const useGroup = node.namedChildren.find(c => c.type === 'namespace_use_group');
  1271. if (namespacePrefix && useGroup) {
  1272. const prefix = getNodeText(namespacePrefix, this.source);
  1273. const useClauses = useGroup.namedChildren.filter((c: SyntaxNode) =>
  1274. c.type === 'namespace_use_group_clause' || c.type === 'namespace_use_clause'
  1275. );
  1276. for (const clause of useClauses) {
  1277. const nsName = clause.namedChildren.find((c: SyntaxNode) => c.type === 'namespace_name');
  1278. const name = nsName
  1279. ? nsName.namedChildren.find((c: SyntaxNode) => c.type === 'name')
  1280. : clause.namedChildren.find((c: SyntaxNode) => c.type === 'name');
  1281. if (name) {
  1282. const fullPath = `${prefix}\\${getNodeText(name, this.source)}`;
  1283. this.createNode('import', fullPath, node, {
  1284. signature: importText,
  1285. });
  1286. }
  1287. }
  1288. return;
  1289. }
  1290. }
  1291. // If a hook exists but returned null, it intentionally declined this node — don't create fallback
  1292. if (this.extractor.extractImport) return;
  1293. // Generic fallback for languages without hooks
  1294. this.createNode('import', importText, node, {
  1295. signature: importText,
  1296. });
  1297. }
  1298. /**
  1299. * Extract a function call
  1300. */
  1301. private extractCall(node: SyntaxNode): void {
  1302. if (this.nodeStack.length === 0) return;
  1303. const callerId = this.nodeStack[this.nodeStack.length - 1];
  1304. if (!callerId) return;
  1305. // Get the function/method being called
  1306. let calleeName = '';
  1307. // Java/Kotlin method_invocation has 'object' + 'name' fields instead of 'function'
  1308. // PHP member_call_expression has 'object' + 'name', scoped_call_expression has 'scope' + 'name'
  1309. const nameField = getChildByField(node, 'name');
  1310. const objectField = getChildByField(node, 'object') || getChildByField(node, 'scope');
  1311. if (nameField && objectField && (node.type === 'method_invocation' || node.type === 'member_call_expression' || node.type === 'scoped_call_expression')) {
  1312. // Method call with explicit receiver: receiver.method() / $receiver->method() / ClassName::method()
  1313. const methodName = getNodeText(nameField, this.source);
  1314. let receiverName = getNodeText(objectField, this.source);
  1315. // Strip PHP $ prefix from variable names
  1316. receiverName = receiverName.replace(/^\$/, '');
  1317. if (methodName) {
  1318. // Skip self/this/parent/static receivers — they don't aid resolution
  1319. const SKIP_RECEIVERS = new Set(['self', 'this', 'cls', 'super', 'parent', 'static']);
  1320. if (SKIP_RECEIVERS.has(receiverName)) {
  1321. calleeName = methodName;
  1322. } else {
  1323. calleeName = `${receiverName}.${methodName}`;
  1324. }
  1325. }
  1326. } else if (node.type === 'message_expression') {
  1327. // ObjC message expressions emit one `method` field child per selector
  1328. // keyword: `[obj a:1 b:2 c:3]` has three `method=identifier` siblings.
  1329. // Joining them with `:` reconstructs the full selector and matches the
  1330. // multi-part selector names produced by the ObjC method_definition
  1331. // extractor (`extractObjcMethodName` in languages/objc.ts). Without this
  1332. // join, multi-keyword call sites only emitted the first keyword and never
  1333. // resolved to their target methods (e.g. `GET:parameters:headers:...` had
  1334. // zero callers despite obviously being called).
  1335. const methodKeywords: string[] = [];
  1336. for (let i = 0; i < node.namedChildCount; i++) {
  1337. if (node.fieldNameForNamedChild(i) === 'method') {
  1338. const kw = node.namedChild(i);
  1339. if (kw) methodKeywords.push(getNodeText(kw, this.source));
  1340. }
  1341. }
  1342. if (methodKeywords.length > 0) {
  1343. const methodName: string =
  1344. methodKeywords.length === 1
  1345. ? (methodKeywords[0] as string)
  1346. : methodKeywords.map((k) => `${k}:`).join('');
  1347. const receiverField = getChildByField(node, 'receiver');
  1348. const SKIP_RECEIVERS = new Set(['self', 'super']);
  1349. if (receiverField && receiverField.type !== 'message_expression') {
  1350. const receiverName = getNodeText(receiverField, this.source);
  1351. if (receiverName && !SKIP_RECEIVERS.has(receiverName)) {
  1352. calleeName = `${receiverName}.${methodName}`;
  1353. } else {
  1354. calleeName = methodName;
  1355. }
  1356. } else {
  1357. calleeName = methodName;
  1358. }
  1359. }
  1360. } else {
  1361. const func = getChildByField(node, 'function') || node.namedChild(0);
  1362. if (func) {
  1363. if (func.type === 'member_expression' || func.type === 'attribute' || func.type === 'selector_expression' || func.type === 'navigation_expression' || func.type === 'field_expression') {
  1364. // Method call: obj.method() or obj.field.method()
  1365. // Go uses selector_expression with 'field', JS/TS uses member_expression with 'property'
  1366. // Kotlin uses navigation_expression with navigation_suffix > simple_identifier
  1367. // C/C++ use field_expression for both `obj.method()` and `ptr->method()`
  1368. let property = getChildByField(func, 'property') || getChildByField(func, 'field');
  1369. if (!property) {
  1370. const child1 = func.namedChild(1);
  1371. // Kotlin: navigation_suffix wraps the method name — extract simple_identifier from it
  1372. if (child1?.type === 'navigation_suffix') {
  1373. property = child1.namedChildren.find((c: SyntaxNode) => c.type === 'simple_identifier') ?? child1;
  1374. } else {
  1375. property = child1;
  1376. }
  1377. }
  1378. if (property) {
  1379. const methodName = getNodeText(property, this.source);
  1380. // Include receiver name for qualified resolution (e.g., console.print → "console.print")
  1381. // This helps the resolver distinguish method calls from bare function calls
  1382. // (e.g., Python's console.print() vs builtin print())
  1383. // Skip self/this/cls as they don't aid resolution
  1384. const receiver =
  1385. getChildByField(func, 'object') ||
  1386. getChildByField(func, 'operand') ||
  1387. getChildByField(func, 'argument') ||
  1388. func.namedChild(0);
  1389. const SKIP_RECEIVERS = new Set(['self', 'this', 'cls', 'super']);
  1390. if (receiver && (receiver.type === 'identifier' || receiver.type === 'simple_identifier' || receiver.type === 'field_identifier')) {
  1391. const receiverName = getNodeText(receiver, this.source);
  1392. if (!SKIP_RECEIVERS.has(receiverName)) {
  1393. calleeName = `${receiverName}.${methodName}`;
  1394. } else {
  1395. calleeName = methodName;
  1396. }
  1397. } else {
  1398. calleeName = methodName;
  1399. }
  1400. }
  1401. } else if (func.type === 'scoped_identifier' || func.type === 'scoped_call_expression') {
  1402. // Scoped call: Module::function()
  1403. calleeName = getNodeText(func, this.source);
  1404. } else {
  1405. calleeName = getNodeText(func, this.source);
  1406. }
  1407. }
  1408. }
  1409. if (calleeName) {
  1410. this.unresolvedReferences.push({
  1411. fromNodeId: callerId,
  1412. referenceName: calleeName,
  1413. referenceKind: 'calls',
  1414. line: node.startPosition.row + 1,
  1415. column: node.startPosition.column,
  1416. });
  1417. }
  1418. }
  1419. /**
  1420. * `new Foo(...)` / `Foo::new(...)` / object_creation_expression —
  1421. * emit an `instantiates` reference to the class name. The resolver
  1422. * then links it to the class node, producing the `instantiates`
  1423. * edge that powers "what creates instances of X" queries.
  1424. *
  1425. * Children are still walked so nested calls inside the constructor
  1426. * arguments (`new Foo(bar())`) get their own `calls` references.
  1427. */
  1428. private extractInstantiation(node: SyntaxNode): void {
  1429. if (this.nodeStack.length === 0) return;
  1430. const fromId = this.nodeStack[this.nodeStack.length - 1];
  1431. if (!fromId) return;
  1432. // The class name is in the `constructor`/`type`/first-named-child
  1433. // depending on grammar.
  1434. const ctor =
  1435. getChildByField(node, 'constructor') ||
  1436. getChildByField(node, 'type') ||
  1437. getChildByField(node, 'name') ||
  1438. node.namedChild(0);
  1439. if (!ctor) return;
  1440. let className = getNodeText(ctor, this.source);
  1441. // Strip type-argument suffix first: `new Map<K, V>()` would
  1442. // otherwise produce className 'Map<K, V>' (the constructor
  1443. // field is a `generic_type` node) and resolution would fail
  1444. // because no class is named with the angle-bracket suffix.
  1445. const ltIdx = className.indexOf('<');
  1446. if (ltIdx > 0) className = className.slice(0, ltIdx);
  1447. // For namespaced/qualified constructors (`new ns.Foo()`,
  1448. // `new ns::Foo()`) keep the trailing identifier — that's what
  1449. // matches a class node in the index.
  1450. const lastDot = Math.max(
  1451. className.lastIndexOf('.'),
  1452. className.lastIndexOf('::')
  1453. );
  1454. if (lastDot >= 0) className = className.slice(lastDot + 1).replace(/^[:.]/, '');
  1455. className = className.trim();
  1456. if (className) {
  1457. this.unresolvedReferences.push({
  1458. fromNodeId: fromId,
  1459. referenceName: className,
  1460. referenceKind: 'instantiates',
  1461. line: node.startPosition.row + 1,
  1462. column: node.startPosition.column,
  1463. });
  1464. }
  1465. }
  1466. /**
  1467. * Scan `declNode` and its preceding siblings (within the parent's
  1468. * named children) for decorator nodes, emitting a `decorates`
  1469. * reference from `decoratedId` to each decorator's function name.
  1470. *
  1471. * Why preceding siblings: in TypeScript, `@Foo class Bar {}` parses
  1472. * as an `export_statement` (or top-level wrapper) with the
  1473. * `decorator` as a child *before* the `class_declaration` — so the
  1474. * decorator isn't a child of the class itself. For methods/
  1475. * properties, the decorator IS a direct child of the declaration,
  1476. * so we also scan declNode.namedChildren.
  1477. *
  1478. * Idempotent across grammars: if neither location yields decorators
  1479. * (most non-decorator-using languages), the function is a no-op.
  1480. */
  1481. private extractDecoratorsFor(declNode: SyntaxNode, decoratedId: string): void {
  1482. const consider = (n: SyntaxNode | null): void => {
  1483. if (!n) return;
  1484. // `marker_annotation` is Java's grammar for arg-less annotations
  1485. // (`@Override`, `@Deprecated`); without including it, every
  1486. // such Java annotation would be silently skipped.
  1487. if (
  1488. n.type !== 'decorator' &&
  1489. n.type !== 'annotation' &&
  1490. n.type !== 'marker_annotation'
  1491. ) {
  1492. return;
  1493. }
  1494. // Find the leading identifier: skip the `@` punct, unwrap
  1495. // a call_expression if the decorator is invoked with args.
  1496. let target: SyntaxNode | null = null;
  1497. for (let i = 0; i < n.namedChildCount; i++) {
  1498. const child = n.namedChild(i);
  1499. if (!child) continue;
  1500. if (child.type === 'call_expression') {
  1501. const fn = getChildByField(child, 'function') ?? child.namedChild(0);
  1502. if (fn) target = fn;
  1503. if (target) break;
  1504. }
  1505. if (
  1506. child.type === 'identifier' ||
  1507. child.type === 'member_expression' ||
  1508. child.type === 'scoped_identifier' ||
  1509. child.type === 'navigation_expression'
  1510. ) {
  1511. target = child;
  1512. break;
  1513. }
  1514. }
  1515. if (!target) return;
  1516. let name = getNodeText(target, this.source);
  1517. const lastDot = Math.max(name.lastIndexOf('.'), name.lastIndexOf('::'));
  1518. if (lastDot >= 0) name = name.slice(lastDot + 1).replace(/^[:.]/, '');
  1519. if (!name) return;
  1520. this.unresolvedReferences.push({
  1521. fromNodeId: decoratedId,
  1522. referenceName: name,
  1523. referenceKind: 'decorates',
  1524. line: n.startPosition.row + 1,
  1525. column: n.startPosition.column,
  1526. });
  1527. };
  1528. // 1. Decorators that are direct children of the declaration
  1529. // (method/property style, also some grammars for class).
  1530. for (let i = 0; i < declNode.namedChildCount; i++) {
  1531. consider(declNode.namedChild(i));
  1532. }
  1533. // 2. Decorators that are PRECEDING siblings of the declaration
  1534. // inside the parent's children (TypeScript class style).
  1535. // Walk BACKWARDS from the declaration and stop at the first
  1536. // non-decorator sibling — without that stop, decorators
  1537. // belonging to an EARLIER unrelated declaration leak in
  1538. // (e.g. `@A class Foo {} @B class Bar {}` would otherwise
  1539. // attribute @A to Bar).
  1540. //
  1541. // Note on identity: tree-sitter web bindings return fresh JS
  1542. // wrapper objects from `parent`/`namedChild` navigation, so
  1543. // `sibling === declNode` is unreliable — `startIndex` does
  1544. // the matching instead.
  1545. const parent = declNode.parent;
  1546. if (parent) {
  1547. const declStart = declNode.startIndex;
  1548. let declIdx = -1;
  1549. for (let i = 0; i < parent.namedChildCount; i++) {
  1550. const sibling = parent.namedChild(i);
  1551. if (sibling && sibling.startIndex === declStart) {
  1552. declIdx = i;
  1553. break;
  1554. }
  1555. }
  1556. if (declIdx > 0) {
  1557. for (let j = declIdx - 1; j >= 0; j--) {
  1558. const sibling = parent.namedChild(j);
  1559. if (!sibling) continue;
  1560. if (sibling.type !== 'decorator' && sibling.type !== 'annotation' && sibling.type !== 'marker_annotation') {
  1561. break; // non-decorator separator → stop consuming
  1562. }
  1563. consider(sibling);
  1564. }
  1565. }
  1566. }
  1567. }
  1568. /**
  1569. * Visit function body and extract calls (and structural nodes).
  1570. *
  1571. * In addition to call expressions, this also detects class/struct/enum
  1572. * definitions inside function bodies. This handles two cases:
  1573. * 1. Local class/struct/enum definitions (valid in C++, Java, etc.)
  1574. * 2. C++ macro misparsing — macros like NLOHMANN_JSON_NAMESPACE_BEGIN cause
  1575. * tree-sitter to interpret the namespace block as a function_definition,
  1576. * hiding real class/struct/enum nodes inside the "function body".
  1577. */
  1578. private visitFunctionBody(body: SyntaxNode, _functionId: string): void {
  1579. if (!this.extractor) return;
  1580. const visitForCallsAndStructure = (node: SyntaxNode): void => {
  1581. const nodeType = node.type;
  1582. if (this.extractor!.callTypes.includes(nodeType)) {
  1583. this.extractCall(node);
  1584. } else if (INSTANTIATION_KINDS.has(nodeType)) {
  1585. // `new Foo()` inside a function body — emit an `instantiates`
  1586. // reference. Without this branch the body walker only knew
  1587. // about `call_expression`, so constructor invocations
  1588. // produced no graph edges at all.
  1589. this.extractInstantiation(node);
  1590. } else if (this.extractor!.extractBareCall) {
  1591. const calleeName = this.extractor!.extractBareCall(node, this.source);
  1592. if (calleeName && this.nodeStack.length > 0) {
  1593. const callerId = this.nodeStack[this.nodeStack.length - 1];
  1594. if (callerId) {
  1595. this.unresolvedReferences.push({
  1596. fromNodeId: callerId,
  1597. referenceName: calleeName,
  1598. referenceKind: 'calls',
  1599. line: node.startPosition.row + 1,
  1600. column: node.startPosition.column,
  1601. });
  1602. }
  1603. }
  1604. }
  1605. // Nested NAMED functions inside a body — function declarations and named
  1606. // function expressions like `.on('mount', function onmount(){})` — become
  1607. // their own nodes so the graph can link to them (callback handlers, local
  1608. // helpers). Anonymous arrows/expressions fall through to the default
  1609. // recursion below, keeping their inner calls attributed to the enclosing
  1610. // function: this bounds the new nodes to NAMED functions only (no explosion,
  1611. // no lost edges). extractFunction walks the nested body itself, so we return.
  1612. if (this.extractor!.functionTypes.includes(nodeType)) {
  1613. const nestedName = extractName(node, this.source, this.extractor!);
  1614. if (nestedName && nestedName !== '<anonymous>') {
  1615. this.extractFunction(node);
  1616. return;
  1617. }
  1618. }
  1619. // Extract structural nodes found inside function bodies.
  1620. // Each extract method visits its own children, so we return after extracting.
  1621. if (this.extractor!.classTypes.includes(nodeType)) {
  1622. const classification = this.extractor!.classifyClassNode?.(node) ?? 'class';
  1623. if (classification === 'struct') this.extractStruct(node);
  1624. else if (classification === 'enum') this.extractEnum(node);
  1625. else if (classification === 'interface') this.extractInterface(node);
  1626. else if (classification === 'trait') this.extractClass(node, 'trait');
  1627. else this.extractClass(node);
  1628. return;
  1629. }
  1630. if (this.extractor!.structTypes.includes(nodeType)) {
  1631. this.extractStruct(node);
  1632. return;
  1633. }
  1634. if (this.extractor!.enumTypes.includes(nodeType)) {
  1635. this.extractEnum(node);
  1636. return;
  1637. }
  1638. if (this.extractor!.interfaceTypes.includes(nodeType)) {
  1639. this.extractInterface(node);
  1640. return;
  1641. }
  1642. for (let i = 0; i < node.namedChildCount; i++) {
  1643. const child = node.namedChild(i);
  1644. if (child) {
  1645. visitForCallsAndStructure(child);
  1646. }
  1647. }
  1648. };
  1649. visitForCallsAndStructure(body);
  1650. }
  1651. /**
  1652. * Extract inheritance relationships
  1653. */
  1654. private extractInheritance(node: SyntaxNode, classId: string): void {
  1655. // Objective-C @interface MyClass : NSObject <ProtoA, ProtoB>
  1656. if (node.type === 'class_interface') {
  1657. const superclass = getChildByField(node, 'superclass');
  1658. if (superclass) {
  1659. const name = getNodeText(superclass, this.source);
  1660. this.unresolvedReferences.push({
  1661. fromNodeId: classId,
  1662. referenceName: name,
  1663. referenceKind: 'extends',
  1664. line: superclass.startPosition.row + 1,
  1665. column: superclass.startPosition.column,
  1666. });
  1667. }
  1668. for (let j = 0; j < node.namedChildCount; j++) {
  1669. const argList = node.namedChild(j);
  1670. if (argList?.type !== 'parameterized_arguments') continue;
  1671. for (let k = 0; k < argList.namedChildCount; k++) {
  1672. const typeName = argList.namedChild(k);
  1673. if (!typeName) continue;
  1674. const typeId = typeName.namedChildren.find(
  1675. (c: SyntaxNode) => c.type === 'type_identifier' || c.type === 'identifier'
  1676. );
  1677. if (!typeId) continue;
  1678. const protocolName = getNodeText(typeId, this.source);
  1679. this.unresolvedReferences.push({
  1680. fromNodeId: classId,
  1681. referenceName: protocolName,
  1682. referenceKind: 'implements',
  1683. line: typeId.startPosition.row + 1,
  1684. column: typeId.startPosition.column,
  1685. });
  1686. }
  1687. }
  1688. return;
  1689. }
  1690. // Look for extends/implements clauses
  1691. for (let i = 0; i < node.namedChildCount; i++) {
  1692. const child = node.namedChild(i);
  1693. if (!child) continue;
  1694. if (
  1695. child.type === 'extends_clause' ||
  1696. child.type === 'superclass' ||
  1697. child.type === 'base_clause' || // PHP class extends
  1698. child.type === 'extends_interfaces' // Java interface extends
  1699. ) {
  1700. // Extract parent class/interface names
  1701. // Java uses type_list wrapper: superclass -> type_identifier, extends_interfaces -> type_list -> type_identifier
  1702. const typeList = child.namedChildren.find((c: SyntaxNode) => c.type === 'type_list');
  1703. const targets = typeList ? typeList.namedChildren : [child.namedChild(0)];
  1704. for (const target of targets) {
  1705. if (target) {
  1706. const name = getNodeText(target, this.source);
  1707. this.unresolvedReferences.push({
  1708. fromNodeId: classId,
  1709. referenceName: name,
  1710. referenceKind: 'extends',
  1711. line: target.startPosition.row + 1,
  1712. column: target.startPosition.column,
  1713. });
  1714. }
  1715. }
  1716. }
  1717. // C++ base classes: `class Derived : public Base, private Other` →
  1718. // base_class_clause holds access specifiers + base type(s). Emit an extends
  1719. // ref per base type (skip the public/private/protected keywords).
  1720. if (child.type === 'base_class_clause') {
  1721. for (const t of child.namedChildren) {
  1722. if (
  1723. t.type === 'type_identifier' ||
  1724. t.type === 'qualified_identifier' ||
  1725. t.type === 'template_type'
  1726. ) {
  1727. this.unresolvedReferences.push({
  1728. fromNodeId: classId,
  1729. referenceName: getNodeText(t, this.source),
  1730. referenceKind: 'extends',
  1731. line: t.startPosition.row + 1,
  1732. column: t.startPosition.column,
  1733. });
  1734. }
  1735. }
  1736. }
  1737. if (
  1738. child.type === 'implements_clause' ||
  1739. child.type === 'class_interface_clause' ||
  1740. child.type === 'super_interfaces' || // Java class implements
  1741. child.type === 'interfaces' // Dart
  1742. ) {
  1743. // Extract implemented interfaces
  1744. // Java uses type_list wrapper: super_interfaces -> type_list -> type_identifier
  1745. const typeList = child.namedChildren.find((c: SyntaxNode) => c.type === 'type_list');
  1746. const targets = typeList ? typeList.namedChildren : child.namedChildren;
  1747. for (const iface of targets) {
  1748. if (iface) {
  1749. const name = getNodeText(iface, this.source);
  1750. this.unresolvedReferences.push({
  1751. fromNodeId: classId,
  1752. referenceName: name,
  1753. referenceKind: 'implements',
  1754. line: iface.startPosition.row + 1,
  1755. column: iface.startPosition.column,
  1756. });
  1757. }
  1758. }
  1759. }
  1760. // Python superclass list: `class Flask(Scaffold, Mixin):`
  1761. // argument_list contains identifier children for each parent class
  1762. if (child.type === 'argument_list' && node.type === 'class_definition') {
  1763. for (const arg of child.namedChildren) {
  1764. if (arg.type === 'identifier' || arg.type === 'attribute') {
  1765. const name = getNodeText(arg, this.source);
  1766. this.unresolvedReferences.push({
  1767. fromNodeId: classId,
  1768. referenceName: name,
  1769. referenceKind: 'extends',
  1770. line: arg.startPosition.row + 1,
  1771. column: arg.startPosition.column,
  1772. });
  1773. }
  1774. }
  1775. }
  1776. // Go interface embedding: `type Querier interface { LabelQuerier; ... }`
  1777. // constraint_elem wraps the embedded interface type identifier
  1778. if (child.type === 'constraint_elem') {
  1779. const typeId = child.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
  1780. if (typeId) {
  1781. const name = getNodeText(typeId, this.source);
  1782. this.unresolvedReferences.push({
  1783. fromNodeId: classId,
  1784. referenceName: name,
  1785. referenceKind: 'extends',
  1786. line: typeId.startPosition.row + 1,
  1787. column: typeId.startPosition.column,
  1788. });
  1789. }
  1790. }
  1791. // Go struct embedding: field_declaration without field_identifier
  1792. // e.g. `type DB struct { *Head; Queryable }` — no field name means embedded type
  1793. if (child.type === 'field_declaration') {
  1794. const hasFieldIdentifier = child.namedChildren.some((c: SyntaxNode) => c.type === 'field_identifier');
  1795. if (!hasFieldIdentifier) {
  1796. const typeId = child.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
  1797. if (typeId) {
  1798. const name = getNodeText(typeId, this.source);
  1799. this.unresolvedReferences.push({
  1800. fromNodeId: classId,
  1801. referenceName: name,
  1802. referenceKind: 'extends',
  1803. line: typeId.startPosition.row + 1,
  1804. column: typeId.startPosition.column,
  1805. });
  1806. }
  1807. }
  1808. }
  1809. // Rust trait supertraits: `trait SubTrait: SuperTrait + Display { ... }`
  1810. // trait_bounds contains type_identifier, generic_type, or higher_ranked_trait_bound children
  1811. if (child.type === 'trait_bounds') {
  1812. for (const bound of child.namedChildren) {
  1813. let typeName: string | undefined;
  1814. let posNode: SyntaxNode | undefined;
  1815. if (bound.type === 'type_identifier') {
  1816. typeName = getNodeText(bound, this.source);
  1817. posNode = bound;
  1818. } else if (bound.type === 'generic_type') {
  1819. // e.g. `Deserialize<'de>`
  1820. const inner = bound.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
  1821. if (inner) { typeName = getNodeText(inner, this.source); posNode = inner; }
  1822. } else if (bound.type === 'higher_ranked_trait_bound') {
  1823. // e.g. `for<'de> Deserialize<'de>`
  1824. const generic = bound.namedChildren.find((c: SyntaxNode) => c.type === 'generic_type');
  1825. const typeId = generic?.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier')
  1826. ?? bound.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
  1827. if (typeId) { typeName = getNodeText(typeId, this.source); posNode = typeId; }
  1828. }
  1829. if (typeName && posNode) {
  1830. this.unresolvedReferences.push({
  1831. fromNodeId: classId,
  1832. referenceName: typeName,
  1833. referenceKind: 'extends',
  1834. line: posNode.startPosition.row + 1,
  1835. column: posNode.startPosition.column,
  1836. });
  1837. }
  1838. }
  1839. }
  1840. // C#: `class Movie : BaseItem, IPlugin` → base_list with identifier children
  1841. // base_list combines both base class and interfaces in a single colon-separated list.
  1842. // We emit all as 'extends' since the syntax doesn't distinguish them.
  1843. if (child.type === 'base_list') {
  1844. for (const baseType of child.namedChildren) {
  1845. if (baseType) {
  1846. // For generic base types like `ClientBase<T>`, extract just the type name
  1847. const name = baseType.type === 'generic_name'
  1848. ? getNodeText(baseType.namedChildren.find((c: SyntaxNode) => c.type === 'identifier') ?? baseType, this.source)
  1849. : getNodeText(baseType, this.source);
  1850. this.unresolvedReferences.push({
  1851. fromNodeId: classId,
  1852. referenceName: name,
  1853. referenceKind: 'extends',
  1854. line: baseType.startPosition.row + 1,
  1855. column: baseType.startPosition.column,
  1856. });
  1857. }
  1858. }
  1859. }
  1860. // Kotlin: `class Foo : Bar, Baz` → delegation_specifier > user_type > type_identifier
  1861. // Also handles `class Foo : Bar()` → delegation_specifier > constructor_invocation > user_type
  1862. if (child.type === 'delegation_specifier') {
  1863. const userType = child.namedChildren.find((c: SyntaxNode) => c.type === 'user_type');
  1864. const constructorInvocation = child.namedChildren.find((c: SyntaxNode) => c.type === 'constructor_invocation');
  1865. const target = userType ?? constructorInvocation;
  1866. if (target) {
  1867. const typeId = target.type === 'user_type'
  1868. ? target.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier') ?? target
  1869. : target.namedChildren.find((c: SyntaxNode) => c.type === 'user_type')?.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier')
  1870. ?? target.namedChildren.find((c: SyntaxNode) => c.type === 'user_type') ?? target;
  1871. const name = getNodeText(typeId, this.source);
  1872. this.unresolvedReferences.push({
  1873. fromNodeId: classId,
  1874. referenceName: name,
  1875. referenceKind: 'extends',
  1876. line: typeId.startPosition.row + 1,
  1877. column: typeId.startPosition.column,
  1878. });
  1879. }
  1880. }
  1881. // Swift: inheritance_specifier > user_type > type_identifier
  1882. // Used for class inheritance, protocol conformance, and protocol inheritance
  1883. if (child.type === 'inheritance_specifier') {
  1884. const userType = child.namedChildren.find((c: SyntaxNode) => c.type === 'user_type');
  1885. const typeId = userType?.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
  1886. if (typeId) {
  1887. const name = getNodeText(typeId, this.source);
  1888. this.unresolvedReferences.push({
  1889. fromNodeId: classId,
  1890. referenceName: name,
  1891. referenceKind: 'extends',
  1892. line: typeId.startPosition.row + 1,
  1893. column: typeId.startPosition.column,
  1894. });
  1895. }
  1896. }
  1897. // JavaScript class_heritage has bare identifier without extends_clause wrapper
  1898. // e.g. `class Foo extends Bar {}` → class_heritage → identifier("Bar")
  1899. if (
  1900. (child.type === 'identifier' || child.type === 'type_identifier') &&
  1901. node.type === 'class_heritage'
  1902. ) {
  1903. const name = getNodeText(child, this.source);
  1904. this.unresolvedReferences.push({
  1905. fromNodeId: classId,
  1906. referenceName: name,
  1907. referenceKind: 'extends',
  1908. line: child.startPosition.row + 1,
  1909. column: child.startPosition.column,
  1910. });
  1911. }
  1912. // Recurse into container nodes (e.g. field_declaration_list in Go structs,
  1913. // class_heritage in TypeScript which wraps extends_clause/implements_clause)
  1914. if (child.type === 'field_declaration_list' || child.type === 'class_heritage') {
  1915. this.extractInheritance(child, classId);
  1916. }
  1917. }
  1918. }
  1919. /**
  1920. * Rust `impl Trait for Type` — creates an implements edge from Type to Trait.
  1921. * For plain `impl Type { ... }` (no trait), no inheritance edge is needed.
  1922. */
  1923. private extractRustImplItem(node: SyntaxNode): void {
  1924. // Check if this is `impl Trait for Type` by looking for a `for` keyword
  1925. const hasFor = node.children.some(
  1926. (c: SyntaxNode) => c.type === 'for' && !c.isNamed
  1927. );
  1928. if (!hasFor) return;
  1929. // In `impl Trait for Type`, the type_identifiers are:
  1930. // first = Trait name, last = implementing Type name
  1931. // Also handle generic types like `impl<T> Trait for MyStruct<T>`
  1932. const typeIdents = node.namedChildren.filter(
  1933. (c: SyntaxNode) => c.type === 'type_identifier' || c.type === 'generic_type' || c.type === 'scoped_type_identifier'
  1934. );
  1935. if (typeIdents.length < 2) return;
  1936. const traitNode = typeIdents[0]!;
  1937. const typeNode = typeIdents[typeIdents.length - 1]!;
  1938. // Get the trait name (handle scoped paths like std::fmt::Display)
  1939. const traitName = traitNode.type === 'scoped_type_identifier'
  1940. ? this.source.substring(traitNode.startIndex, traitNode.endIndex)
  1941. : getNodeText(traitNode, this.source);
  1942. // Get the implementing type name (extract inner type_identifier for generics)
  1943. let typeName: string;
  1944. if (typeNode.type === 'generic_type') {
  1945. const inner = typeNode.namedChildren.find(
  1946. (c: SyntaxNode) => c.type === 'type_identifier'
  1947. );
  1948. typeName = inner ? getNodeText(inner, this.source) : getNodeText(typeNode, this.source);
  1949. } else {
  1950. typeName = getNodeText(typeNode, this.source);
  1951. }
  1952. // Find the struct/type node for the implementing type
  1953. const typeNodeId = this.findNodeByName(typeName);
  1954. if (typeNodeId) {
  1955. this.unresolvedReferences.push({
  1956. fromNodeId: typeNodeId,
  1957. referenceName: traitName,
  1958. referenceKind: 'implements',
  1959. line: traitNode.startPosition.row + 1,
  1960. column: traitNode.startPosition.column,
  1961. });
  1962. }
  1963. }
  1964. /**
  1965. * Find a previously-extracted node by name (used for back-references like impl blocks)
  1966. */
  1967. private findNodeByName(name: string): string | undefined {
  1968. for (const node of this.nodes) {
  1969. if (node.name === name && (node.kind === 'struct' || node.kind === 'enum' || node.kind === 'class')) {
  1970. return node.id;
  1971. }
  1972. }
  1973. return undefined;
  1974. }
  1975. /**
  1976. * Languages that support type annotations (TypeScript, etc.)
  1977. */
  1978. private readonly TYPE_ANNOTATION_LANGUAGES = new Set([
  1979. 'typescript', 'tsx', 'dart', 'kotlin', 'swift', 'rust', 'go', 'java', 'csharp',
  1980. ]);
  1981. /**
  1982. * Built-in/primitive type names that shouldn't create references
  1983. */
  1984. private readonly BUILTIN_TYPES = new Set([
  1985. 'string', 'number', 'boolean', 'void', 'null', 'undefined', 'never', 'any', 'unknown',
  1986. 'object', 'symbol', 'bigint', 'true', 'false',
  1987. // Rust
  1988. 'str', 'bool', 'i8', 'i16', 'i32', 'i64', 'i128', 'isize',
  1989. 'u8', 'u16', 'u32', 'u64', 'u128', 'usize', 'f32', 'f64', 'char',
  1990. // Java/C#
  1991. 'int', 'long', 'short', 'byte', 'float', 'double', 'char',
  1992. // Go
  1993. 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64',
  1994. 'float32', 'float64', 'complex64', 'complex128', 'rune', 'error',
  1995. ]);
  1996. /**
  1997. * Extract type references from type annotations on a function/method/field node.
  1998. * Creates 'references' edges for parameter types, return types, and field types.
  1999. */
  2000. private extractTypeAnnotations(node: SyntaxNode, nodeId: string): void {
  2001. if (!this.extractor) return;
  2002. if (!this.TYPE_ANNOTATION_LANGUAGES.has(this.language)) return;
  2003. // Extract parameter type annotations
  2004. const params = getChildByField(node, this.extractor.paramsField || 'parameters');
  2005. if (params) {
  2006. this.extractTypeRefsFromSubtree(params, nodeId);
  2007. }
  2008. // Extract return type annotation
  2009. const returnType = getChildByField(node, this.extractor.returnField || 'return_type');
  2010. if (returnType) {
  2011. this.extractTypeRefsFromSubtree(returnType, nodeId);
  2012. }
  2013. // Extract direct type annotation (for class fields like `model: ITextModel`)
  2014. const typeAnnotation = node.namedChildren.find(
  2015. (c: SyntaxNode) => c.type === 'type_annotation'
  2016. );
  2017. if (typeAnnotation) {
  2018. this.extractTypeRefsFromSubtree(typeAnnotation, nodeId);
  2019. }
  2020. }
  2021. /**
  2022. * Extract type references from a variable's type annotation.
  2023. */
  2024. private extractVariableTypeAnnotation(node: SyntaxNode, nodeId: string): void {
  2025. if (!this.TYPE_ANNOTATION_LANGUAGES.has(this.language)) return;
  2026. // Find type_annotation child (covers TS `: Type`, Rust `: Type`, etc.)
  2027. const typeAnnotation = node.namedChildren.find(
  2028. (c: SyntaxNode) => c.type === 'type_annotation'
  2029. );
  2030. if (typeAnnotation) {
  2031. this.extractTypeRefsFromSubtree(typeAnnotation, nodeId);
  2032. }
  2033. }
  2034. /**
  2035. * Recursively walk a subtree and extract all type_identifier references.
  2036. * Handles unions, intersections, generics, arrays, etc.
  2037. */
  2038. private extractTypeRefsFromSubtree(node: SyntaxNode, fromNodeId: string): void {
  2039. if (node.type === 'type_identifier') {
  2040. const typeName = getNodeText(node, this.source);
  2041. if (typeName && !this.BUILTIN_TYPES.has(typeName)) {
  2042. this.unresolvedReferences.push({
  2043. fromNodeId,
  2044. referenceName: typeName,
  2045. referenceKind: 'references',
  2046. line: node.startPosition.row + 1,
  2047. column: node.startPosition.column,
  2048. });
  2049. }
  2050. return; // type_identifier is a leaf
  2051. }
  2052. // Recurse into children (handles union_type, intersection_type, generic_type, etc.)
  2053. for (let i = 0; i < node.namedChildCount; i++) {
  2054. const child = node.namedChild(i);
  2055. if (child) {
  2056. this.extractTypeRefsFromSubtree(child, fromNodeId);
  2057. }
  2058. }
  2059. }
  2060. /**
  2061. * Handle Pascal-specific AST structures.
  2062. * Returns true if the node was fully handled and children should be skipped.
  2063. */
  2064. private visitPascalNode(node: SyntaxNode): boolean {
  2065. const nodeType = node.type;
  2066. // Unit/Program/Library → module node
  2067. if (nodeType === 'unit' || nodeType === 'program' || nodeType === 'library') {
  2068. const moduleNameNode = node.namedChildren.find(
  2069. (c: SyntaxNode) => c.type === 'moduleName'
  2070. );
  2071. const name = moduleNameNode ? getNodeText(moduleNameNode, this.source) : '';
  2072. // Fallback to filename without extension if module name is empty
  2073. const moduleName = name || path.basename(this.filePath).replace(/\.[^.]+$/, '');
  2074. this.createNode('module', moduleName, node);
  2075. // Continue visiting children (interface/implementation sections)
  2076. for (let i = 0; i < node.namedChildCount; i++) {
  2077. const child = node.namedChild(i);
  2078. if (child) this.visitNode(child);
  2079. }
  2080. return true;
  2081. }
  2082. // declType wraps declClass/declIntf/declEnum/type-alias
  2083. // The name lives on declType, the inner node determines the kind
  2084. if (nodeType === 'declType') {
  2085. this.extractPascalDeclType(node);
  2086. return true;
  2087. }
  2088. // declUses → import nodes for each unit name
  2089. if (nodeType === 'declUses') {
  2090. this.extractPascalUses(node);
  2091. return true;
  2092. }
  2093. // declConsts → container; visit children for individual declConst
  2094. if (nodeType === 'declConsts') {
  2095. for (let i = 0; i < node.namedChildCount; i++) {
  2096. const child = node.namedChild(i);
  2097. if (child?.type === 'declConst') {
  2098. this.extractPascalConst(child);
  2099. }
  2100. }
  2101. return true;
  2102. }
  2103. // declConst at top level (outside declConsts)
  2104. if (nodeType === 'declConst') {
  2105. this.extractPascalConst(node);
  2106. return true;
  2107. }
  2108. // declTypes → container for type declarations
  2109. if (nodeType === 'declTypes') {
  2110. for (let i = 0; i < node.namedChildCount; i++) {
  2111. const child = node.namedChild(i);
  2112. if (child) this.visitNode(child);
  2113. }
  2114. return true;
  2115. }
  2116. // declVars → container for variable declarations
  2117. if (nodeType === 'declVars') {
  2118. for (let i = 0; i < node.namedChildCount; i++) {
  2119. const child = node.namedChild(i);
  2120. if (child?.type === 'declVar') {
  2121. const nameNode = getChildByField(child, 'name');
  2122. if (nameNode) {
  2123. const name = getNodeText(nameNode, this.source);
  2124. this.createNode('variable', name, child);
  2125. }
  2126. }
  2127. }
  2128. return true;
  2129. }
  2130. // defProc in implementation section → extract calls but don't create duplicate nodes
  2131. if (nodeType === 'defProc') {
  2132. this.extractPascalDefProc(node);
  2133. return true;
  2134. }
  2135. // declProp → property node
  2136. if (nodeType === 'declProp') {
  2137. const nameNode = getChildByField(node, 'name');
  2138. if (nameNode) {
  2139. const name = getNodeText(nameNode, this.source);
  2140. const visibility = this.extractor!.getVisibility?.(node);
  2141. this.createNode('property', name, node, { visibility });
  2142. }
  2143. return true;
  2144. }
  2145. // declField → field node
  2146. if (nodeType === 'declField') {
  2147. const nameNode = getChildByField(node, 'name');
  2148. if (nameNode) {
  2149. const name = getNodeText(nameNode, this.source);
  2150. const visibility = this.extractor!.getVisibility?.(node);
  2151. this.createNode('field', name, node, { visibility });
  2152. }
  2153. return true;
  2154. }
  2155. // declSection → visit children (propagates visibility via getVisibility)
  2156. if (nodeType === 'declSection') {
  2157. for (let i = 0; i < node.namedChildCount; i++) {
  2158. const child = node.namedChild(i);
  2159. if (child) this.visitNode(child);
  2160. }
  2161. return true;
  2162. }
  2163. // exprCall → extract function call reference
  2164. if (nodeType === 'exprCall') {
  2165. this.extractPascalCall(node);
  2166. return true;
  2167. }
  2168. // interface/implementation sections → visit children
  2169. if (nodeType === 'interface' || nodeType === 'implementation') {
  2170. for (let i = 0; i < node.namedChildCount; i++) {
  2171. const child = node.namedChild(i);
  2172. if (child) this.visitNode(child);
  2173. }
  2174. return true;
  2175. }
  2176. // block (begin..end) → visit for calls
  2177. if (nodeType === 'block') {
  2178. this.visitPascalBlock(node);
  2179. return true;
  2180. }
  2181. return false;
  2182. }
  2183. /**
  2184. * Extract a Pascal declType node (class, interface, enum, or type alias)
  2185. */
  2186. private extractPascalDeclType(node: SyntaxNode): void {
  2187. const nameNode = getChildByField(node, 'name');
  2188. if (!nameNode) return;
  2189. const name = getNodeText(nameNode, this.source);
  2190. // Find the inner type declaration
  2191. const declClass = node.namedChildren.find(
  2192. (c: SyntaxNode) => c.type === 'declClass'
  2193. );
  2194. const declIntf = node.namedChildren.find(
  2195. (c: SyntaxNode) => c.type === 'declIntf'
  2196. );
  2197. const typeChild = node.namedChildren.find(
  2198. (c: SyntaxNode) => c.type === 'type'
  2199. );
  2200. if (declClass) {
  2201. const classNode = this.createNode('class', name, node);
  2202. if (classNode) {
  2203. // Extract inheritance from typeref children of declClass
  2204. this.extractPascalInheritance(declClass, classNode.id);
  2205. // Visit class body
  2206. this.nodeStack.push(classNode.id);
  2207. for (let i = 0; i < declClass.namedChildCount; i++) {
  2208. const child = declClass.namedChild(i);
  2209. if (child) this.visitNode(child);
  2210. }
  2211. this.nodeStack.pop();
  2212. }
  2213. } else if (declIntf) {
  2214. const ifaceNode = this.createNode('interface', name, node);
  2215. if (ifaceNode) {
  2216. // Visit interface members
  2217. this.nodeStack.push(ifaceNode.id);
  2218. for (let i = 0; i < declIntf.namedChildCount; i++) {
  2219. const child = declIntf.namedChild(i);
  2220. if (child) this.visitNode(child);
  2221. }
  2222. this.nodeStack.pop();
  2223. }
  2224. } else if (typeChild) {
  2225. // Check if it contains a declEnum
  2226. const declEnum = typeChild.namedChildren.find(
  2227. (c: SyntaxNode) => c.type === 'declEnum'
  2228. );
  2229. if (declEnum) {
  2230. const enumNode = this.createNode('enum', name, node);
  2231. if (enumNode) {
  2232. // Extract enum members
  2233. this.nodeStack.push(enumNode.id);
  2234. for (let i = 0; i < declEnum.namedChildCount; i++) {
  2235. const child = declEnum.namedChild(i);
  2236. if (child?.type === 'declEnumValue') {
  2237. const memberName = getChildByField(child, 'name');
  2238. if (memberName) {
  2239. this.createNode('enum_member', getNodeText(memberName, this.source), child);
  2240. }
  2241. }
  2242. }
  2243. this.nodeStack.pop();
  2244. }
  2245. } else {
  2246. // Simple type alias: type TFoo = string / type TFoo = Integer
  2247. this.createNode('type_alias', name, node);
  2248. }
  2249. } else {
  2250. // Fallback: could be a forward declaration or simple alias
  2251. this.createNode('type_alias', name, node);
  2252. }
  2253. }
  2254. /**
  2255. * Extract Pascal uses clause into individual import nodes
  2256. */
  2257. private extractPascalUses(node: SyntaxNode): void {
  2258. const importText = getNodeText(node, this.source).trim();
  2259. for (let i = 0; i < node.namedChildCount; i++) {
  2260. const child = node.namedChild(i);
  2261. if (child?.type === 'moduleName') {
  2262. const unitName = getNodeText(child, this.source);
  2263. this.createNode('import', unitName, child, {
  2264. signature: importText,
  2265. });
  2266. // Create unresolved reference for resolution
  2267. if (this.nodeStack.length > 0) {
  2268. const parentId = this.nodeStack[this.nodeStack.length - 1];
  2269. if (parentId) {
  2270. this.unresolvedReferences.push({
  2271. fromNodeId: parentId,
  2272. referenceName: unitName,
  2273. referenceKind: 'imports',
  2274. line: child.startPosition.row + 1,
  2275. column: child.startPosition.column,
  2276. });
  2277. }
  2278. }
  2279. }
  2280. }
  2281. }
  2282. /**
  2283. * Extract a Pascal constant declaration
  2284. */
  2285. private extractPascalConst(node: SyntaxNode): void {
  2286. const nameNode = getChildByField(node, 'name');
  2287. if (!nameNode) return;
  2288. const name = getNodeText(nameNode, this.source);
  2289. const defaultValue = node.namedChildren.find(
  2290. (c: SyntaxNode) => c.type === 'defaultValue'
  2291. );
  2292. const sig = defaultValue ? getNodeText(defaultValue, this.source) : undefined;
  2293. this.createNode('constant', name, node, { signature: sig });
  2294. }
  2295. /**
  2296. * Extract Pascal inheritance (extends/implements) from declClass typeref children
  2297. */
  2298. private extractPascalInheritance(declClass: SyntaxNode, classId: string): void {
  2299. const typerefs = declClass.namedChildren.filter(
  2300. (c: SyntaxNode) => c.type === 'typeref'
  2301. );
  2302. for (let i = 0; i < typerefs.length; i++) {
  2303. const ref = typerefs[i]!;
  2304. const name = getNodeText(ref, this.source);
  2305. this.unresolvedReferences.push({
  2306. fromNodeId: classId,
  2307. referenceName: name,
  2308. referenceKind: i === 0 ? 'extends' : 'implements',
  2309. line: ref.startPosition.row + 1,
  2310. column: ref.startPosition.column,
  2311. });
  2312. }
  2313. }
  2314. /**
  2315. * Extract calls and resolve method context from a Pascal defProc (implementation body).
  2316. * Does not create a new node — the declaration was already captured from the interface section.
  2317. */
  2318. private extractPascalDefProc(node: SyntaxNode): void {
  2319. // Find the matching declaration node by name to use as call parent
  2320. const declProc = node.namedChildren.find(
  2321. (c: SyntaxNode) => c.type === 'declProc'
  2322. );
  2323. if (!declProc) return;
  2324. const nameNode = getChildByField(declProc, 'name');
  2325. if (!nameNode) return;
  2326. const fullName = getNodeText(nameNode, this.source).trim();
  2327. // fullName is like "TAuthService.Create"
  2328. const shortName = fullName.includes('.') ? fullName.split('.').pop()! : fullName;
  2329. const fullNameKey = fullName.toLowerCase();
  2330. const shortNameKey = shortName.toLowerCase();
  2331. // Build method index on first use (O(n) once, then O(1) per lookup)
  2332. if (!this.methodIndex) {
  2333. this.methodIndex = new Map();
  2334. for (const n of this.nodes) {
  2335. if (n.kind === 'method' || n.kind === 'function') {
  2336. const nameKey = n.name.toLowerCase();
  2337. // Keep first seen short-name mapping to avoid silently overwriting earlier entries.
  2338. if (!this.methodIndex.has(nameKey)) {
  2339. this.methodIndex.set(nameKey, n.id);
  2340. }
  2341. // For Pascal methods, also index qualified forms (e.g. TAuthService.Create).
  2342. if (n.kind === 'method') {
  2343. const qualifiedParts = n.qualifiedName.split('::');
  2344. if (qualifiedParts.length >= 2) {
  2345. // Create suffix keys so both "Module.Class.Method" and "Class.Method" can resolve.
  2346. for (let i = 0; i < qualifiedParts.length - 1; i++) {
  2347. const scopedName = qualifiedParts.slice(i).join('.').toLowerCase();
  2348. this.methodIndex.set(scopedName, n.id);
  2349. }
  2350. }
  2351. }
  2352. }
  2353. }
  2354. }
  2355. const parentId =
  2356. this.methodIndex.get(fullNameKey) ||
  2357. this.methodIndex.get(shortNameKey) ||
  2358. this.nodeStack[this.nodeStack.length - 1];
  2359. if (!parentId) return;
  2360. // Visit the block for calls
  2361. const block = node.namedChildren.find(
  2362. (c: SyntaxNode) => c.type === 'block'
  2363. );
  2364. if (block) {
  2365. this.nodeStack.push(parentId);
  2366. this.visitPascalBlock(block);
  2367. this.nodeStack.pop();
  2368. }
  2369. }
  2370. /**
  2371. * Extract function calls from a Pascal expression
  2372. */
  2373. private extractPascalCall(node: SyntaxNode): void {
  2374. if (this.nodeStack.length === 0) return;
  2375. const callerId = this.nodeStack[this.nodeStack.length - 1];
  2376. if (!callerId) return;
  2377. // Get the callee name — first child is typically the identifier or exprDot
  2378. const firstChild = node.namedChild(0);
  2379. if (!firstChild) return;
  2380. let calleeName = '';
  2381. if (firstChild.type === 'exprDot') {
  2382. // Qualified call: Obj.Method(...)
  2383. const identifiers = firstChild.namedChildren.filter(
  2384. (c: SyntaxNode) => c.type === 'identifier'
  2385. );
  2386. if (identifiers.length > 0) {
  2387. calleeName = identifiers.map((id: SyntaxNode) => getNodeText(id, this.source)).join('.');
  2388. }
  2389. } else if (firstChild.type === 'identifier') {
  2390. calleeName = getNodeText(firstChild, this.source);
  2391. }
  2392. if (calleeName) {
  2393. this.unresolvedReferences.push({
  2394. fromNodeId: callerId,
  2395. referenceName: calleeName,
  2396. referenceKind: 'calls',
  2397. line: node.startPosition.row + 1,
  2398. column: node.startPosition.column,
  2399. });
  2400. }
  2401. // Also visit arguments for nested calls
  2402. const args = node.namedChildren.find(
  2403. (c: SyntaxNode) => c.type === 'exprArgs'
  2404. );
  2405. if (args) {
  2406. this.visitPascalBlock(args);
  2407. }
  2408. }
  2409. /**
  2410. * Recursively visit a Pascal block/statement tree for call expressions
  2411. */
  2412. private visitPascalBlock(node: SyntaxNode): void {
  2413. for (let i = 0; i < node.namedChildCount; i++) {
  2414. const child = node.namedChild(i);
  2415. if (!child) continue;
  2416. if (child.type === 'exprCall') {
  2417. this.extractPascalCall(child);
  2418. } else if (child.type === 'exprDot') {
  2419. // Check if exprDot contains an exprCall
  2420. for (let j = 0; j < child.namedChildCount; j++) {
  2421. const grandchild = child.namedChild(j);
  2422. if (grandchild?.type === 'exprCall') {
  2423. this.extractPascalCall(grandchild);
  2424. }
  2425. }
  2426. } else {
  2427. this.visitPascalBlock(child);
  2428. }
  2429. }
  2430. }
  2431. }
  2432. /**
  2433. * Extract nodes and edges from source code.
  2434. *
  2435. * If `frameworkNames` is provided, framework-specific extractors matching
  2436. * those names and the file's language are run after the tree-sitter pass.
  2437. * Their nodes/references/errors are merged into the returned result.
  2438. */
  2439. export function extractFromSource(
  2440. filePath: string,
  2441. source: string,
  2442. language?: Language,
  2443. frameworkNames?: string[]
  2444. ): ExtractionResult {
  2445. const detectedLanguage = language || detectLanguage(filePath, source);
  2446. const fileExtension = path.extname(filePath).toLowerCase();
  2447. let result: ExtractionResult;
  2448. // Use custom extractor for Svelte
  2449. if (detectedLanguage === 'svelte') {
  2450. const extractor = new SvelteExtractor(filePath, source);
  2451. result = extractor.extract();
  2452. } else if (detectedLanguage === 'vue') {
  2453. // Use custom extractor for Vue
  2454. const extractor = new VueExtractor(filePath, source);
  2455. result = extractor.extract();
  2456. } else if (detectedLanguage === 'liquid') {
  2457. // Use custom extractor for Liquid
  2458. const extractor = new LiquidExtractor(filePath, source);
  2459. result = extractor.extract();
  2460. } else if (detectedLanguage === 'yaml' || detectedLanguage === 'twig') {
  2461. // No symbol extraction — file is tracked at the file-record level only.
  2462. // Framework extractors (e.g. Drupal routing resolver) run below and may
  2463. // add route nodes / references for yaml files such as *.routing.yml.
  2464. result = { nodes: [], edges: [], unresolvedReferences: [], errors: [], durationMs: 0 };
  2465. } else if (
  2466. detectedLanguage === 'pascal' &&
  2467. (fileExtension === '.dfm' || fileExtension === '.fmx')
  2468. ) {
  2469. // Use custom extractor for DFM/FMX form files
  2470. const extractor = new DfmExtractor(filePath, source);
  2471. result = extractor.extract();
  2472. } else {
  2473. const extractor = new TreeSitterExtractor(filePath, source, detectedLanguage);
  2474. result = extractor.extract();
  2475. }
  2476. // Framework-specific extraction (routes, middleware, etc.)
  2477. if (frameworkNames && frameworkNames.length > 0) {
  2478. const allResolvers = getAllFrameworkResolvers();
  2479. const applicable = getApplicableFrameworks(
  2480. allResolvers.filter((r) => frameworkNames.includes(r.name)),
  2481. detectedLanguage
  2482. );
  2483. for (const fw of applicable) {
  2484. if (!fw.extract) continue;
  2485. try {
  2486. const fwResult = fw.extract(filePath, source);
  2487. result.nodes.push(...fwResult.nodes);
  2488. result.unresolvedReferences.push(...fwResult.references);
  2489. } catch (err) {
  2490. result.errors.push({
  2491. message: `Framework extractor '${fw.name}' failed: ${
  2492. err instanceof Error ? err.message : String(err)
  2493. }`,
  2494. filePath,
  2495. severity: 'warning',
  2496. });
  2497. }
  2498. }
  2499. }
  2500. return result;
  2501. }