tree-sitter.ts 143 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567
  1. /**
  2. * Tree-sitter Parser Wrapper
  3. *
  4. * Handles parsing source code and extracting structural information.
  5. */
  6. import { Node as SyntaxNode, Tree } from 'web-tree-sitter';
  7. import * as path from 'path';
  8. import {
  9. Language,
  10. Node,
  11. Edge,
  12. NodeKind,
  13. ExtractionResult,
  14. ExtractionError,
  15. UnresolvedReference,
  16. } from '../types';
  17. import { getParser, detectLanguage, isLanguageSupported, isFileLevelOnlyLanguage } from './grammars';
  18. import { generateNodeId, getNodeText, getChildByField, getPrecedingDocstring } from './tree-sitter-helpers';
  19. import type { LanguageExtractor, ExtractorContext } from './tree-sitter-types';
  20. import { EXTRACTORS } from './languages';
  21. import { LiquidExtractor } from './liquid-extractor';
  22. import { SvelteExtractor } from './svelte-extractor';
  23. import { DfmExtractor } from './dfm-extractor';
  24. import { VueExtractor } from './vue-extractor';
  25. import { MyBatisExtractor } from './mybatis-extractor';
  26. import {
  27. getAllFrameworkResolvers,
  28. getApplicableFrameworks,
  29. } from '../resolution/frameworks';
  30. // Re-export for backward compatibility
  31. export { generateNodeId } from './tree-sitter-helpers';
  32. /**
  33. * Extract the name from a node based on language
  34. */
  35. function extractName(node: SyntaxNode, source: string, extractor: LanguageExtractor): string {
  36. const hookName = extractor.resolveName?.(node, source);
  37. if (hookName) return hookName;
  38. // Try field name first
  39. const nameNode = getChildByField(node, extractor.nameField);
  40. if (nameNode) {
  41. // Unwrap pointer_declarator(s) for C/C++ pointer return types
  42. let resolved = nameNode;
  43. while (resolved.type === 'pointer_declarator') {
  44. const inner = getChildByField(resolved, 'declarator') || resolved.namedChild(0);
  45. if (!inner) break;
  46. resolved = inner;
  47. }
  48. // Handle complex declarators (C/C++)
  49. if (resolved.type === 'function_declarator' || resolved.type === 'declarator') {
  50. const innerName = getChildByField(resolved, 'declarator') || resolved.namedChild(0);
  51. return innerName ? getNodeText(innerName, source) : getNodeText(resolved, source);
  52. }
  53. // Lua: `function t.f()` / `function t:m()` — the name node is a dot/method
  54. // index expression; the simple name is the trailing field/method (the table
  55. // receiver is captured separately via getReceiverType).
  56. if (resolved.type === 'dot_index_expression') {
  57. const field = getChildByField(resolved, 'field');
  58. if (field) return getNodeText(field, source);
  59. }
  60. if (resolved.type === 'method_index_expression') {
  61. const method = getChildByField(resolved, 'method');
  62. if (method) return getNodeText(method, source);
  63. }
  64. return getNodeText(resolved, source);
  65. }
  66. // For Dart method_signature, look inside inner signature types
  67. if (node.type === 'method_signature') {
  68. for (let i = 0; i < node.namedChildCount; i++) {
  69. const child = node.namedChild(i);
  70. if (child && (
  71. child.type === 'function_signature' ||
  72. child.type === 'getter_signature' ||
  73. child.type === 'setter_signature' ||
  74. child.type === 'constructor_signature' ||
  75. child.type === 'factory_constructor_signature'
  76. )) {
  77. // Find identifier inside the inner signature
  78. for (let j = 0; j < child.namedChildCount; j++) {
  79. const inner = child.namedChild(j);
  80. if (inner?.type === 'identifier') {
  81. return getNodeText(inner, source);
  82. }
  83. }
  84. }
  85. }
  86. }
  87. // Arrow/function expressions get their name from the parent variable_declarator,
  88. // not from identifiers in their body. Without this, single-expression arrow
  89. // functions like `const fn = () => someIdentifier` get named "someIdentifier"
  90. // instead of "fn", because the fallback below finds the body identifier.
  91. if (node.type === 'arrow_function' || node.type === 'function_expression') {
  92. return '<anonymous>';
  93. }
  94. // Fall back to first identifier child
  95. for (let i = 0; i < node.namedChildCount; i++) {
  96. const child = node.namedChild(i);
  97. if (
  98. child &&
  99. (child.type === 'identifier' ||
  100. child.type === 'type_identifier' ||
  101. child.type === 'simple_identifier' ||
  102. child.type === 'constant')
  103. ) {
  104. return getNodeText(child, source);
  105. }
  106. }
  107. return '<anonymous>';
  108. }
  109. /**
  110. * Tree-sitter node kinds that represent constructor invocations
  111. * (`new Foo()` and friends). Used by extractInstantiation to emit
  112. * an `instantiates` reference targeting the class name.
  113. */
  114. const INSTANTIATION_KINDS: ReadonlySet<string> = new Set([
  115. 'new_expression', // typescript / javascript / tsx / jsx
  116. 'object_creation_expression', // java / c#
  117. 'instance_creation_expression', // some grammars
  118. 'composite_literal', // go — `Widget{...}` / `pkga.Widget{...}`
  119. 'struct_expression', // rust — `Widget { n: 1 }` / `m::Widget { .. }`
  120. ]);
  121. /**
  122. * TreeSitterExtractor - Main extraction class
  123. */
  124. export class TreeSitterExtractor {
  125. private filePath: string;
  126. private language: Language;
  127. private source: string;
  128. private tree: Tree | null = null;
  129. private nodes: Node[] = [];
  130. private edges: Edge[] = [];
  131. private unresolvedReferences: UnresolvedReference[] = [];
  132. private errors: ExtractionError[] = [];
  133. private extractor: LanguageExtractor | null = null;
  134. private nodeStack: string[] = []; // Stack of parent node IDs
  135. private methodIndex: Map<string, string> | null = null; // lookup key → node ID for Pascal defProc lookup
  136. constructor(filePath: string, source: string, language?: Language) {
  137. this.filePath = filePath;
  138. this.source = source;
  139. this.language = language || detectLanguage(filePath, source);
  140. this.extractor = EXTRACTORS[this.language] || null;
  141. }
  142. /**
  143. * Parse and extract from the source code
  144. */
  145. extract(): ExtractionResult {
  146. const startTime = Date.now();
  147. if (!isLanguageSupported(this.language)) {
  148. return {
  149. nodes: [],
  150. edges: [],
  151. unresolvedReferences: [],
  152. errors: [
  153. {
  154. message: `Unsupported language: ${this.language}`,
  155. filePath: this.filePath,
  156. severity: 'error',
  157. code: 'unsupported_language',
  158. },
  159. ],
  160. durationMs: Date.now() - startTime,
  161. };
  162. }
  163. const parser = getParser(this.language);
  164. if (!parser) {
  165. return {
  166. nodes: [],
  167. edges: [],
  168. unresolvedReferences: [],
  169. errors: [
  170. {
  171. message: `Failed to get parser for language: ${this.language}`,
  172. filePath: this.filePath,
  173. severity: 'error',
  174. code: 'parser_error',
  175. },
  176. ],
  177. durationMs: Date.now() - startTime,
  178. };
  179. }
  180. try {
  181. this.tree = parser.parse(this.source) ?? null;
  182. if (!this.tree) {
  183. throw new Error('Parser returned null tree');
  184. }
  185. // Create file node representing the source file
  186. const fileNode: Node = {
  187. id: `file:${this.filePath}`,
  188. kind: 'file',
  189. name: path.basename(this.filePath),
  190. qualifiedName: this.filePath,
  191. filePath: this.filePath,
  192. language: this.language,
  193. startLine: 1,
  194. endLine: this.source.split('\n').length,
  195. startColumn: 0,
  196. endColumn: 0,
  197. isExported: false,
  198. updatedAt: Date.now(),
  199. };
  200. this.nodes.push(fileNode);
  201. // Push file node onto stack so top-level declarations get contains edges
  202. this.nodeStack.push(fileNode.id);
  203. // File-level package declaration (Kotlin/Java). Creates an implicit
  204. // `namespace` node wrapping every top-level declaration so their
  205. // qualifiedName carries the FQN — required for cross-file import
  206. // resolution on JVM languages where filename ≠ class name.
  207. const packageNodeId = this.extractFilePackage(this.tree.rootNode);
  208. if (packageNodeId) this.nodeStack.push(packageNodeId);
  209. this.visitNode(this.tree.rootNode);
  210. if (packageNodeId) this.nodeStack.pop();
  211. this.nodeStack.pop();
  212. } catch (error) {
  213. const msg = error instanceof Error ? error.message : String(error);
  214. // WASM memory errors leave the module in a corrupted state — all subsequent
  215. // parses would also fail. Re-throw so the worker can detect and crash,
  216. // forcing a clean restart with a fresh heap.
  217. if (msg.includes('memory access out of bounds') || msg.includes('out of memory')) {
  218. throw error;
  219. }
  220. this.errors.push({
  221. message: `Parse error: ${msg}`,
  222. filePath: this.filePath,
  223. severity: 'error',
  224. code: 'parse_error',
  225. });
  226. } finally {
  227. // Free tree-sitter WASM memory immediately — trees hold native heap memory
  228. // invisible to V8's GC that accumulates across thousands of files.
  229. if (this.tree) {
  230. this.tree.delete();
  231. this.tree = null;
  232. }
  233. // Release source string to reduce GC pressure
  234. this.source = '';
  235. }
  236. return {
  237. nodes: this.nodes,
  238. edges: this.edges,
  239. unresolvedReferences: this.unresolvedReferences,
  240. errors: this.errors,
  241. durationMs: Date.now() - startTime,
  242. };
  243. }
  244. /**
  245. * Visit a node and extract information
  246. */
  247. private visitNode(node: SyntaxNode): void {
  248. if (!this.extractor) return;
  249. const nodeType = node.type;
  250. let skipChildren = false;
  251. // Language-specific custom visitor hook
  252. if (this.extractor.visitNode) {
  253. const ctx = this.makeExtractorContext();
  254. const handled = this.extractor.visitNode(node, ctx);
  255. if (handled) return;
  256. }
  257. // Pascal-specific AST handling
  258. if (this.language === 'pascal') {
  259. skipChildren = this.visitPascalNode(node);
  260. if (skipChildren) return;
  261. }
  262. // Check for function declarations
  263. // For Python/Ruby, function_definition inside a class should be treated as method
  264. if (this.extractor.functionTypes.includes(nodeType)) {
  265. if (this.isInsideClassLikeNode() && this.extractor.methodTypes.includes(nodeType)) {
  266. // Inside a class - treat as method
  267. this.extractMethod(node);
  268. skipChildren = true; // extractMethod visits children via visitFunctionBody
  269. } else {
  270. this.extractFunction(node);
  271. skipChildren = true; // extractFunction visits children via visitFunctionBody
  272. }
  273. }
  274. // Check for class declarations
  275. else if (this.extractor.classTypes.includes(nodeType)) {
  276. // Some languages reuse class_declaration for structs/enums (e.g. Swift)
  277. const classification = this.extractor.classifyClassNode?.(node) ?? 'class';
  278. if (classification === 'struct') {
  279. this.extractStruct(node);
  280. } else if (classification === 'enum') {
  281. this.extractEnum(node);
  282. } else if (classification === 'interface') {
  283. this.extractInterface(node);
  284. } else if (classification === 'trait') {
  285. this.extractClass(node, 'trait');
  286. } else {
  287. this.extractClass(node);
  288. }
  289. skipChildren = true; // extractClass visits body children
  290. }
  291. // Extra class node types (e.g. Dart mixin_declaration, extension_declaration)
  292. else if (this.extractor.extraClassNodeTypes?.includes(nodeType)) {
  293. this.extractClass(node);
  294. skipChildren = true;
  295. }
  296. // Check for method declarations (only if not already handled by functionTypes)
  297. else if (this.extractor.methodTypes.includes(nodeType)) {
  298. this.extractMethod(node);
  299. skipChildren = true; // extractMethod visits children via visitFunctionBody
  300. }
  301. // Check for interface/protocol/trait declarations
  302. else if (this.extractor.interfaceTypes.includes(nodeType)) {
  303. this.extractInterface(node);
  304. skipChildren = true; // extractInterface visits body children
  305. }
  306. // Check for struct declarations
  307. else if (this.extractor.structTypes.includes(nodeType)) {
  308. this.extractStruct(node);
  309. skipChildren = true; // extractStruct visits body children
  310. }
  311. // Check for enum declarations
  312. else if (this.extractor.enumTypes.includes(nodeType)) {
  313. this.extractEnum(node);
  314. skipChildren = true; // extractEnum visits body children
  315. }
  316. // Check for type alias declarations (e.g. `type X = ...` in TypeScript)
  317. // For Go, type_spec wraps struct/interface definitions — resolveTypeAliasKind
  318. // detects these and extractTypeAlias creates the correct node kind.
  319. else if (this.extractor.typeAliasTypes.includes(nodeType)) {
  320. skipChildren = this.extractTypeAlias(node);
  321. }
  322. // Check for class properties (e.g. C# property_declaration)
  323. else if (this.extractor.propertyTypes?.includes(nodeType) && this.isInsideClassLikeNode()) {
  324. this.extractProperty(node);
  325. skipChildren = true;
  326. }
  327. // Check for class fields (e.g. Java field_declaration, C# field_declaration)
  328. else if (this.extractor.fieldTypes?.includes(nodeType) && this.isInsideClassLikeNode()) {
  329. this.extractField(node);
  330. skipChildren = true;
  331. }
  332. // Check for variable declarations (const, let, var, etc.)
  333. // Only extract top-level variables (not inside functions/methods)
  334. else if (this.extractor.variableTypes.includes(nodeType) && !this.isInsideClassLikeNode()) {
  335. this.extractVariable(node);
  336. skipChildren = true; // extractVariable handles children
  337. }
  338. // `export_statement` itself is not extracted — the walker descends
  339. // into children, where the inner declaration (lexical_declaration,
  340. // function_declaration, class_declaration, etc.) is dispatched to
  341. // its own extractor. `isExported` walks the parent chain, so the
  342. // exported flag is preserved automatically.
  343. //
  344. // Calling extractExportedVariables here AND descending caused every
  345. // `export const X = ...` to produce two nodes for the same symbol —
  346. // one kind:'variable' from extractExportedVariables and one
  347. // kind:'constant' from extractVariable. The dedicated dispatch is
  348. // the correct one (it picks kind from isConst, captures the
  349. // initializer signature, and walks type annotations); the
  350. // export-statement helper was redundant.
  351. // Check for imports
  352. else if (this.extractor.importTypes.includes(nodeType)) {
  353. this.extractImport(node);
  354. }
  355. // Re-export from another module — `export { X } from './y'` (TS/JS). A
  356. // re-export is a dependency on the source module just like an import, but
  357. // the export_statement is otherwise only descended into (no declaration to
  358. // extract), so a barrel that ONLY re-exports produced zero edges and showed
  359. // 0 dependents. Link each re-exported name to its definition. Children are
  360. // still visited (a non-re-export `export const X = …` has no `source` and
  361. // falls through to its normal declaration extraction).
  362. else if (
  363. nodeType === 'export_statement' &&
  364. (this.language === 'typescript' || this.language === 'tsx' ||
  365. this.language === 'javascript' || this.language === 'jsx') &&
  366. getChildByField(node, 'source')
  367. ) {
  368. const parentId = this.nodeStack[this.nodeStack.length - 1];
  369. if (parentId) this.emitReExportRefs(node, parentId);
  370. }
  371. // Check for function calls
  372. else if (this.extractor.callTypes.includes(nodeType)) {
  373. this.extractCall(node);
  374. }
  375. // `new Foo(...)` / `Foo::new(...)` / object_creation_expression —
  376. // produce an `instantiates` reference. Children still walked so
  377. // nested calls inside the constructor args (`new Foo(bar())`) get
  378. // their own `calls` refs.
  379. else if (INSTANTIATION_KINDS.has(nodeType)) {
  380. this.extractInstantiation(node);
  381. // Java/C# `new T(...) { ... }` — anonymous class with body. Without
  382. // extracting it as a class node + its methods, the interface→impl
  383. // synthesizer (Phase 5.5) can't bridge T's abstract methods to the
  384. // anonymous overrides, and an agent investigating a call through T
  385. // (`strategy.iterator(...)` where strategy is a Strategy lambda body)
  386. // has to Read the file to find the actual implementation.
  387. const anonBody = this.findAnonymousClassBody(node);
  388. if (anonBody) {
  389. this.extractAnonymousClass(node, anonBody);
  390. skipChildren = true;
  391. }
  392. }
  393. // (Decorator handling lives inside the symbol-creating extractors
  394. // — extractClass / extractFunction / extractProperty — because the
  395. // decorator node sits BEFORE the symbol in the AST and the walker
  396. // would otherwise see the wrong nodeStack head.)
  397. // Rust: `impl Trait for Type { ... }` — creates implements edge from Type to Trait
  398. else if (nodeType === 'impl_item') {
  399. this.extractRustImplItem(node);
  400. }
  401. // TypeScript interface members: property_signature (`foo: T`, `foo?: T`)
  402. // and method_signature (`foo(arg: A): R`) both carry type annotations the
  403. // interface walker would otherwise drop. Extract them as `references`
  404. // edges from the interface so resolvers can wire callers/impact for
  405. // types that only appear in interface members.
  406. else if (
  407. (nodeType === 'property_signature' || nodeType === 'method_signature') &&
  408. this.isInsideClassLikeNode() &&
  409. this.TYPE_ANNOTATION_LANGUAGES.has(this.language)
  410. ) {
  411. const parentId = this.nodeStack[this.nodeStack.length - 1];
  412. if (parentId) {
  413. this.extractTypeAnnotations(node, parentId);
  414. }
  415. // don't skipChildren — nested signatures still need traversal
  416. }
  417. // Visit children (unless the extract method already visited them)
  418. if (!skipChildren) {
  419. for (let i = 0; i < node.namedChildCount; i++) {
  420. const child = node.namedChild(i);
  421. if (child) {
  422. this.visitNode(child);
  423. }
  424. }
  425. }
  426. }
  427. /**
  428. * Create a Node object
  429. */
  430. private createNode(
  431. kind: NodeKind,
  432. name: string,
  433. node: SyntaxNode,
  434. extra?: Partial<Node>
  435. ): Node | null {
  436. // Skip nodes with empty/missing names — they are not meaningful symbols
  437. // and would cause FK violations when edges reference them (see issue #42)
  438. if (!name) {
  439. return null;
  440. }
  441. const id = generateNodeId(this.filePath, kind, name, node.startPosition.row + 1);
  442. // Some grammars (e.g. Dart) model a function/method body as a *sibling* of
  443. // the signature node, so the declaration node's own range is just the
  444. // signature line. Extend endLine to the resolved body when it sits beyond
  445. // the node so the node spans its body — required for any body-level analysis
  446. // (callees, the callback synthesizer's body scan, context slices). Guarded to
  447. // only ever extend: for child-body grammars the body is within range (no-op).
  448. let endLine = node.endPosition.row + 1;
  449. if (kind === 'function' || kind === 'method') {
  450. const body = this.extractor?.resolveBody?.(node, this.extractor.bodyField);
  451. if (body && body.endPosition.row + 1 > endLine) {
  452. endLine = body.endPosition.row + 1;
  453. }
  454. }
  455. const newNode: Node = {
  456. id,
  457. kind,
  458. name,
  459. qualifiedName: this.buildQualifiedName(name),
  460. filePath: this.filePath,
  461. language: this.language,
  462. startLine: node.startPosition.row + 1,
  463. endLine,
  464. startColumn: node.startPosition.column,
  465. endColumn: node.endPosition.column,
  466. updatedAt: Date.now(),
  467. ...extra,
  468. };
  469. this.nodes.push(newNode);
  470. // Add containment edge from parent
  471. if (this.nodeStack.length > 0) {
  472. const parentId = this.nodeStack[this.nodeStack.length - 1];
  473. if (parentId) {
  474. this.edges.push({
  475. source: parentId,
  476. target: id,
  477. kind: 'contains',
  478. });
  479. }
  480. }
  481. return newNode;
  482. }
  483. /**
  484. * Find first named child whose type is in the given list.
  485. * Used to locate inner type nodes (e.g. enum_specifier inside a typedef).
  486. */
  487. private findChildByTypes(node: SyntaxNode, types: string[]): SyntaxNode | null {
  488. for (let i = 0; i < node.namedChildCount; i++) {
  489. const child = node.namedChild(i);
  490. if (child && types.includes(child.type)) return child;
  491. }
  492. return null;
  493. }
  494. /**
  495. * Find a `packageTypes` child under the root, create a `namespace` node
  496. * for it, and return its id so the caller can scope top-level
  497. * declarations underneath. Returns null when no package header is
  498. * present (script files, .kts without a package).
  499. */
  500. private extractFilePackage(rootNode: SyntaxNode): string | null {
  501. const types = this.extractor?.packageTypes;
  502. if (!types || types.length === 0 || !this.extractor?.extractPackage) return null;
  503. let pkgNode: SyntaxNode | null = null;
  504. for (let i = 0; i < rootNode.namedChildCount; i++) {
  505. const child = rootNode.namedChild(i);
  506. if (child && types.includes(child.type)) {
  507. pkgNode = child;
  508. break;
  509. }
  510. }
  511. if (!pkgNode) return null;
  512. const pkgName = this.extractor.extractPackage(pkgNode, this.source);
  513. if (!pkgName) return null;
  514. const ns = this.createNode('namespace', pkgName, pkgNode);
  515. return ns?.id ?? null;
  516. }
  517. /**
  518. * Build qualified name from node stack
  519. */
  520. private buildQualifiedName(name: string): string {
  521. // Build a qualified name from the semantic hierarchy only (no file path).
  522. // The file path is stored separately in filePath and pollutes FTS if included here.
  523. const parts: string[] = [];
  524. for (const nodeId of this.nodeStack) {
  525. const node = this.nodes.find((n) => n.id === nodeId);
  526. if (node && node.kind !== 'file') {
  527. parts.push(node.name);
  528. }
  529. }
  530. parts.push(name);
  531. return parts.join('::');
  532. }
  533. /**
  534. * Build an ExtractorContext for passing to language-specific visitNode hooks.
  535. */
  536. private makeExtractorContext(): ExtractorContext {
  537. // eslint-disable-next-line @typescript-eslint/no-this-alias
  538. const self = this;
  539. return {
  540. createNode: (kind, name, node, extra) => self.createNode(kind, name, node, extra),
  541. visitNode: (node) => self.visitNode(node),
  542. visitFunctionBody: (body, functionId) => self.visitFunctionBody(body, functionId),
  543. addUnresolvedReference: (ref) => self.unresolvedReferences.push(ref),
  544. pushScope: (nodeId) => self.nodeStack.push(nodeId),
  545. popScope: () => self.nodeStack.pop(),
  546. get filePath() { return self.filePath; },
  547. get source() { return self.source; },
  548. get nodeStack() { return self.nodeStack; },
  549. get nodes() { return self.nodes; },
  550. };
  551. }
  552. /**
  553. * Check if the current node stack indicates we are inside a class-like node
  554. * (class, struct, interface, trait). File nodes do not count as class-like.
  555. */
  556. private isInsideClassLikeNode(): boolean {
  557. if (this.nodeStack.length === 0) return false;
  558. const parentId = this.nodeStack[this.nodeStack.length - 1];
  559. if (!parentId) return false;
  560. const parentNode = this.nodes.find((n) => n.id === parentId);
  561. if (!parentNode) return false;
  562. return (
  563. parentNode.kind === 'class' ||
  564. parentNode.kind === 'struct' ||
  565. parentNode.kind === 'interface' ||
  566. parentNode.kind === 'trait' ||
  567. parentNode.kind === 'enum' ||
  568. parentNode.kind === 'module'
  569. );
  570. }
  571. /**
  572. * Extract a function
  573. */
  574. private extractFunction(node: SyntaxNode, nameOverride?: string): void {
  575. if (!this.extractor) return;
  576. // If the language provides getReceiverType and this function has a receiver
  577. // (e.g., Rust function_item inside an impl block), extract as method instead
  578. if (this.extractor.getReceiverType?.(node, this.source)) {
  579. this.extractMethod(node);
  580. return;
  581. }
  582. // nameOverride is supplied only for explicitly-named anonymous functions the
  583. // caller resolved itself (e.g. arrow values of exported-const object members
  584. // — SvelteKit actions). Inline-object arrows reached by the general walker
  585. // get no override, so they still fall through to the <anonymous> skip below.
  586. let name = nameOverride ?? extractName(node, this.source, this.extractor);
  587. // For arrow functions and function expressions assigned to variables,
  588. // resolve the name from the parent variable_declarator.
  589. // e.g. `export const useAuth = () => { ... }` — the arrow_function node
  590. // has no `name` field; the name lives on the variable_declarator.
  591. if (
  592. !nameOverride &&
  593. name === '<anonymous>' &&
  594. (node.type === 'arrow_function' || node.type === 'function_expression')
  595. ) {
  596. const parent = node.parent;
  597. if (parent?.type === 'variable_declarator') {
  598. const varName = getChildByField(parent, 'name');
  599. if (varName) {
  600. name = getNodeText(varName, this.source);
  601. }
  602. }
  603. }
  604. if (name === '<anonymous>') {
  605. // Don't emit a node for the anonymous wrapper itself, but still visit its
  606. // body: AMD/RequireJS and CommonJS module wrappers (`define([], function(){…})`,
  607. // `(function(){…})()`) hold named inner functions and calls that would
  608. // otherwise be lost — the dispatcher set skipChildren, so nothing else
  609. // descends into this subtree. (#528)
  610. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  611. ?? getChildByField(node, this.extractor.bodyField);
  612. if (body) {
  613. this.visitFunctionBody(body, '');
  614. }
  615. return;
  616. }
  617. // Check for misparse artifacts (e.g. C++ macros causing "namespace detail" functions)
  618. // Skip the node but still visit the body for calls and structural nodes
  619. if (this.extractor.isMisparsedFunction?.(name, node)) {
  620. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  621. ?? getChildByField(node, this.extractor.bodyField);
  622. if (body) {
  623. this.visitFunctionBody(body, '');
  624. }
  625. return;
  626. }
  627. const docstring = getPrecedingDocstring(node, this.source);
  628. const signature = this.extractor.getSignature?.(node, this.source);
  629. const visibility = this.extractor.getVisibility?.(node);
  630. const isExported = this.extractor.isExported?.(node, this.source);
  631. const isAsync = this.extractor.isAsync?.(node);
  632. const isStatic = this.extractor.isStatic?.(node);
  633. const funcNode = this.createNode('function', name, node, {
  634. docstring,
  635. signature,
  636. visibility,
  637. isExported,
  638. isAsync,
  639. isStatic,
  640. });
  641. if (!funcNode) return;
  642. // Extract type annotations (parameter types and return type)
  643. this.extractTypeAnnotations(node, funcNode.id);
  644. // Extract decorators applied to the function (rare in JS/TS but
  645. // present in Python `@decorator def f():` and Java/Kotlin
  646. // annotations on free functions).
  647. this.extractDecoratorsFor(node, funcNode.id);
  648. // Push to stack and visit body
  649. this.nodeStack.push(funcNode.id);
  650. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  651. ?? getChildByField(node, this.extractor.bodyField);
  652. if (body) {
  653. this.visitFunctionBody(body, funcNode.id);
  654. }
  655. this.nodeStack.pop();
  656. }
  657. /**
  658. * Extract a class
  659. */
  660. private extractClass(node: SyntaxNode, kind: NodeKind = 'class'): void {
  661. if (!this.extractor) return;
  662. const name = extractName(node, this.source, this.extractor);
  663. const docstring = getPrecedingDocstring(node, this.source);
  664. const visibility = this.extractor.getVisibility?.(node);
  665. const isExported = this.extractor.isExported?.(node, this.source);
  666. const classNode = this.createNode(kind, name, node, {
  667. docstring,
  668. visibility,
  669. isExported,
  670. });
  671. if (!classNode) return;
  672. // Extract extends/implements
  673. this.extractInheritance(node, classNode.id);
  674. // Extract decorators applied to the class (`@Foo class X {}`).
  675. this.extractDecoratorsFor(node, classNode.id);
  676. // Push to stack and visit body
  677. this.nodeStack.push(classNode.id);
  678. let body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  679. ?? getChildByField(node, this.extractor.bodyField);
  680. if (!body) body = node;
  681. // Visit all children for methods and properties
  682. for (let i = 0; i < body.namedChildCount; i++) {
  683. const child = body.namedChild(i);
  684. if (child) {
  685. this.visitNode(child);
  686. }
  687. }
  688. this.nodeStack.pop();
  689. }
  690. /**
  691. * Extract a method
  692. */
  693. private extractMethod(node: SyntaxNode): void {
  694. if (!this.extractor) return;
  695. // For languages with receiver types (Go, Rust), include receiver in qualified name
  696. // so FTS can match "scrapeLoop.run" → qualified_name "...::scrapeLoop::run"
  697. const receiverType = this.extractor.getReceiverType?.(node, this.source);
  698. // For most languages, only extract as method if inside a class-like node
  699. // Languages with methodsAreTopLevel (e.g. Go) always treat them as methods
  700. // Languages with getReceiverType (e.g. Rust) extract as method when receiver is found
  701. if (!this.isInsideClassLikeNode() && !this.extractor.methodsAreTopLevel && !receiverType) {
  702. // Skip method_definition nodes inside object literals (getters/setters/methods
  703. // in inline objects). These are ephemeral and create noise (e.g., Svelte context
  704. // objects: `ctx.set({ get view() { ... } })`).
  705. if (node.parent?.type === 'object' || node.parent?.type === 'object_expression') {
  706. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  707. ?? getChildByField(node, this.extractor.bodyField);
  708. if (body) {
  709. this.visitFunctionBody(body, '');
  710. }
  711. return;
  712. }
  713. // Not inside a class-like node and no receiver type, treat as function
  714. this.extractFunction(node);
  715. return;
  716. }
  717. const name = extractName(node, this.source, this.extractor);
  718. // Check for misparse artifacts (e.g. C++ "switch" inside macro-confused class body)
  719. if (this.extractor.isMisparsedFunction?.(name, node)) {
  720. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  721. ?? getChildByField(node, this.extractor.bodyField);
  722. if (body) {
  723. this.visitFunctionBody(body, '');
  724. }
  725. return;
  726. }
  727. const docstring = getPrecedingDocstring(node, this.source);
  728. const signature = this.extractor.getSignature?.(node, this.source);
  729. const visibility = this.extractor.getVisibility?.(node);
  730. const isAsync = this.extractor.isAsync?.(node);
  731. const isStatic = this.extractor.isStatic?.(node);
  732. const extraProps: Partial<Node> = {
  733. docstring,
  734. signature,
  735. visibility,
  736. isAsync,
  737. isStatic,
  738. };
  739. if (receiverType) {
  740. extraProps.qualifiedName = `${receiverType}::${name}`;
  741. }
  742. const methodNode = this.createNode('method', name, node, extraProps);
  743. if (!methodNode) return;
  744. // For methods with a receiver type but no class-like parent on the stack
  745. // (e.g., Rust impl blocks), add a contains edge from the owning struct/trait
  746. if (receiverType && !this.isInsideClassLikeNode()) {
  747. const ownerNode = this.nodes.find(
  748. (n) =>
  749. n.name === receiverType &&
  750. n.filePath === this.filePath &&
  751. (n.kind === 'struct' || n.kind === 'class' || n.kind === 'enum' || n.kind === 'trait')
  752. );
  753. if (ownerNode) {
  754. this.edges.push({
  755. source: ownerNode.id,
  756. target: methodNode.id,
  757. kind: 'contains',
  758. });
  759. }
  760. }
  761. // Extract type annotations (parameter types and return type)
  762. this.extractTypeAnnotations(node, methodNode.id);
  763. // Extract decorators (`@Get('/list') list() {}`).
  764. this.extractDecoratorsFor(node, methodNode.id);
  765. // Push to stack and visit body
  766. this.nodeStack.push(methodNode.id);
  767. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  768. ?? getChildByField(node, this.extractor.bodyField);
  769. if (body) {
  770. this.visitFunctionBody(body, methodNode.id);
  771. }
  772. this.nodeStack.pop();
  773. }
  774. /**
  775. * Extract an interface/protocol/trait
  776. */
  777. private extractInterface(node: SyntaxNode): void {
  778. if (!this.extractor) return;
  779. const name = extractName(node, this.source, this.extractor);
  780. const docstring = getPrecedingDocstring(node, this.source);
  781. const isExported = this.extractor.isExported?.(node, this.source);
  782. const kind: NodeKind = this.extractor.interfaceKind ?? 'interface';
  783. const interfaceNode = this.createNode(kind, name, node, {
  784. docstring,
  785. isExported,
  786. });
  787. if (!interfaceNode) return;
  788. // Extract extends (interface inheritance)
  789. this.extractInheritance(node, interfaceNode.id);
  790. // Visit body children for interface methods and nested types
  791. this.nodeStack.push(interfaceNode.id);
  792. let body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  793. ?? getChildByField(node, this.extractor.bodyField);
  794. if (!body) body = node;
  795. for (let i = 0; i < body.namedChildCount; i++) {
  796. const child = body.namedChild(i);
  797. if (child) {
  798. this.visitNode(child);
  799. }
  800. }
  801. this.nodeStack.pop();
  802. }
  803. /**
  804. * Extract a struct
  805. */
  806. private extractStruct(node: SyntaxNode): void {
  807. if (!this.extractor) return;
  808. // Skip forward declarations and type references (no body = not a definition)
  809. const body = getChildByField(node, this.extractor.bodyField);
  810. if (!body) return;
  811. const name = extractName(node, this.source, this.extractor);
  812. const docstring = getPrecedingDocstring(node, this.source);
  813. const visibility = this.extractor.getVisibility?.(node);
  814. const isExported = this.extractor.isExported?.(node, this.source);
  815. const structNode = this.createNode('struct', name, node, {
  816. docstring,
  817. visibility,
  818. isExported,
  819. });
  820. if (!structNode) return;
  821. // Extract inheritance (e.g. Swift: struct HTTPMethod: RawRepresentable)
  822. this.extractInheritance(node, structNode.id);
  823. // Push to stack for field extraction
  824. this.nodeStack.push(structNode.id);
  825. for (let i = 0; i < body.namedChildCount; i++) {
  826. const child = body.namedChild(i);
  827. if (child) {
  828. this.visitNode(child);
  829. }
  830. }
  831. this.nodeStack.pop();
  832. }
  833. /**
  834. * Extract an enum
  835. */
  836. private extractEnum(node: SyntaxNode): void {
  837. if (!this.extractor) return;
  838. // Skip forward declarations and type references (no body = not a definition)
  839. const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
  840. ?? getChildByField(node, this.extractor.bodyField);
  841. if (!body) return;
  842. const name = extractName(node, this.source, this.extractor);
  843. const docstring = getPrecedingDocstring(node, this.source);
  844. const visibility = this.extractor.getVisibility?.(node);
  845. const isExported = this.extractor.isExported?.(node, this.source);
  846. const enumNode = this.createNode('enum', name, node, {
  847. docstring,
  848. visibility,
  849. isExported,
  850. });
  851. if (!enumNode) return;
  852. // Extract inheritance (e.g. Swift: enum AFError: Error)
  853. this.extractInheritance(node, enumNode.id);
  854. // Push to stack and visit body children (enum members, nested types, methods)
  855. this.nodeStack.push(enumNode.id);
  856. const memberTypes = this.extractor.enumMemberTypes;
  857. for (let i = 0; i < body.namedChildCount; i++) {
  858. const child = body.namedChild(i);
  859. if (!child) continue;
  860. if (memberTypes?.includes(child.type)) {
  861. this.extractEnumMembers(child);
  862. } else {
  863. this.visitNode(child);
  864. }
  865. }
  866. this.nodeStack.pop();
  867. }
  868. /**
  869. * Extract enum member names from an enum member node.
  870. * Handles multi-case declarations (Swift: `case put, delete`) and single-case patterns.
  871. */
  872. private extractEnumMembers(node: SyntaxNode): void {
  873. // Try field-based name first (e.g. Rust enum_variant has a 'name' field)
  874. const nameNode = getChildByField(node, 'name');
  875. if (nameNode) {
  876. this.createNode('enum_member', getNodeText(nameNode, this.source), node);
  877. return;
  878. }
  879. // Check for identifier-like children (Swift: simple_identifier, TS: property_identifier)
  880. let found = false;
  881. for (let i = 0; i < node.namedChildCount; i++) {
  882. const child = node.namedChild(i);
  883. if (child && (child.type === 'simple_identifier' || child.type === 'identifier' || child.type === 'property_identifier')) {
  884. this.createNode('enum_member', getNodeText(child, this.source), child);
  885. found = true;
  886. }
  887. }
  888. // If the node itself IS the identifier (e.g. TS property_identifier directly in enum body)
  889. if (!found && node.namedChildCount === 0) {
  890. this.createNode('enum_member', getNodeText(node, this.source), node);
  891. }
  892. }
  893. /**
  894. * Extract a class property declaration (e.g. C# `public string Name { get; set; }`).
  895. * Extracts as 'property' kind node inside the owning class.
  896. */
  897. private extractProperty(node: SyntaxNode): void {
  898. if (!this.extractor) return;
  899. const docstring = getPrecedingDocstring(node, this.source);
  900. const visibility = this.extractor.getVisibility?.(node);
  901. const isStatic = this.extractor.isStatic?.(node) ?? false;
  902. const hookName = this.extractor.extractPropertyName?.(node, this.source);
  903. const nameNode = hookName
  904. ? null
  905. : getChildByField(node, 'name') || node.namedChildren.find(c => c.type === 'identifier');
  906. const name = hookName ?? (nameNode ? getNodeText(nameNode, this.source) : null);
  907. if (!name) return;
  908. // Get property type from the type child (first named child that isn't modifier or identifier)
  909. const typeNode = node.namedChildren.find(
  910. c => c.type !== 'modifier' && c.type !== 'modifiers'
  911. && c.type !== 'identifier' && c.type !== 'accessor_list'
  912. && c.type !== 'accessors' && c.type !== 'equals_value_clause'
  913. );
  914. const typeText = typeNode ? getNodeText(typeNode, this.source) : undefined;
  915. const signature = typeText ? `${typeText} ${name}` : name;
  916. const propNode = this.createNode('property', name, node, {
  917. docstring,
  918. signature,
  919. visibility,
  920. isStatic,
  921. });
  922. // `@Inject() private svc: Foo` and similar — capture the
  923. // decorator->target relationship for class properties too.
  924. if (propNode) {
  925. this.extractDecoratorsFor(node, propNode.id);
  926. // Emit `references` edges from the property to types named in its
  927. // type annotation (#381). The generic walker handles TS-style
  928. // `type_annotation` children; the C# branch walks the `type` field.
  929. this.extractTypeAnnotations(node, propNode.id);
  930. }
  931. }
  932. /**
  933. * Extract a class field declaration (e.g. Java field_declaration, C# field_declaration).
  934. * Extracts each declarator as a 'field' kind node inside the owning class.
  935. */
  936. private extractField(node: SyntaxNode): void {
  937. if (!this.extractor) return;
  938. const docstring = getPrecedingDocstring(node, this.source);
  939. const visibility = this.extractor.getVisibility?.(node);
  940. const isStatic = this.extractor.isStatic?.(node) ?? false;
  941. // Java field_declaration: "private final String name = value;" → variable_declarator(s) are direct children
  942. // C# field_declaration: wraps in variable_declaration → variable_declarator(s)
  943. let declarators = node.namedChildren.filter(
  944. c => c.type === 'variable_declarator'
  945. );
  946. // C#: look inside variable_declaration wrapper
  947. if (declarators.length === 0) {
  948. const varDecl = node.namedChildren.find(c => c.type === 'variable_declaration');
  949. if (varDecl) {
  950. declarators = varDecl.namedChildren.filter(c => c.type === 'variable_declarator');
  951. }
  952. }
  953. // PHP property_declaration: property_element → variable_name → name
  954. if (declarators.length === 0) {
  955. const propElements = node.namedChildren.filter(c => c.type === 'property_element');
  956. if (propElements.length > 0) {
  957. // Get type annotation if present (e.g. "string", "int", "?Foo")
  958. const typeNode = node.namedChildren.find(
  959. c => c.type !== 'visibility_modifier' && c.type !== 'static_modifier'
  960. && c.type !== 'readonly_modifier' && c.type !== 'property_element'
  961. && c.type !== 'var_modifier'
  962. );
  963. const typeText = typeNode ? getNodeText(typeNode, this.source) : undefined;
  964. for (const elem of propElements) {
  965. const varName = elem.namedChildren.find(c => c.type === 'variable_name');
  966. const nameNode = varName?.namedChildren.find(c => c.type === 'name');
  967. if (!nameNode) continue;
  968. const name = getNodeText(nameNode, this.source);
  969. const signature = typeText ? `${typeText} $${name}` : `$${name}`;
  970. this.createNode('field', name, elem, {
  971. docstring,
  972. signature,
  973. visibility,
  974. isStatic,
  975. });
  976. }
  977. return;
  978. }
  979. }
  980. if (declarators.length > 0) {
  981. // Get field type from the type child
  982. // Java: type is a direct child of field_declaration
  983. // C#: type is inside variable_declaration wrapper
  984. const varDecl = node.namedChildren.find(c => c.type === 'variable_declaration');
  985. const typeSearchNode = varDecl ?? node;
  986. const typeNode = typeSearchNode.namedChildren.find(
  987. c => c.type !== 'modifiers' && c.type !== 'modifier' && c.type !== 'variable_declarator'
  988. && c.type !== 'variable_declaration' && c.type !== 'marker_annotation' && c.type !== 'annotation'
  989. );
  990. const typeText = typeNode ? getNodeText(typeNode, this.source) : undefined;
  991. for (const decl of declarators) {
  992. const nameNode = getChildByField(decl, 'name')
  993. || decl.namedChildren.find(c => c.type === 'identifier');
  994. if (!nameNode) continue;
  995. const name = getNodeText(nameNode, this.source);
  996. const signature = typeText ? `${typeText} ${name}` : name;
  997. const fieldNode = this.createNode('field', name, decl, {
  998. docstring,
  999. signature,
  1000. visibility,
  1001. isStatic,
  1002. });
  1003. // Java/Kotlin annotations / TS field decorators sit on the
  1004. // outer field_declaration, not on the individual declarator.
  1005. if (fieldNode) {
  1006. this.extractDecoratorsFor(node, fieldNode.id);
  1007. // Same as properties: emit `references` to the field's annotated
  1008. // type. The outer `field_declaration` is the right scope to
  1009. // search from — C# carries the `type` inside `variable_declaration`
  1010. // and the language-aware path in `extractTypeAnnotations` descends
  1011. // into that wrapper (#381).
  1012. this.extractTypeAnnotations(node, fieldNode.id);
  1013. }
  1014. }
  1015. } else {
  1016. // Fallback: try to find an identifier child directly
  1017. const nameNode = getChildByField(node, 'name')
  1018. || node.namedChildren.find(c => c.type === 'identifier');
  1019. if (nameNode) {
  1020. const name = getNodeText(nameNode, this.source);
  1021. this.createNode('field', name, node, {
  1022. docstring,
  1023. visibility,
  1024. isStatic,
  1025. });
  1026. }
  1027. }
  1028. }
  1029. /**
  1030. * Extract function-valued properties of an object literal as named function
  1031. * nodes (named by their property key). Shared by the two object-of-functions
  1032. * shapes in extractVariable: the object as a direct const value, and the
  1033. * object returned by a store-initializer call. Handles both `key: () => {}` /
  1034. * `key: function() {}` pairs and method shorthand `key() {}`.
  1035. */
  1036. private extractObjectLiteralFunctions(obj: SyntaxNode): void {
  1037. for (let i = 0; i < obj.namedChildCount; i++) {
  1038. const member = obj.namedChild(i);
  1039. if (!member) continue;
  1040. if (member.type === 'pair') {
  1041. const key = getChildByField(member, 'key');
  1042. const value = getChildByField(member, 'value');
  1043. if (key && value && (value.type === 'arrow_function' || value.type === 'function_expression')) {
  1044. this.extractFunction(value, this.objectKeyName(key));
  1045. }
  1046. } else if (member.type === 'method_definition') {
  1047. // Method shorthand: `{ fetchUser() {...} }`. extractMethod deliberately
  1048. // skips object-literal methods, so route through extractFunction with an
  1049. // explicit name (method_definition exposes a `body` field, so resolveBody
  1050. // falls through to it and the node spans the full method).
  1051. const key = getChildByField(member, 'name');
  1052. if (key) this.extractFunction(member, this.objectKeyName(key));
  1053. }
  1054. }
  1055. }
  1056. /** Property-key text with surrounding quotes stripped (`'foo'` → `foo`). */
  1057. private objectKeyName(key: SyntaxNode): string {
  1058. return getNodeText(key, this.source).replace(/^['"`]|['"`]$/g, '');
  1059. }
  1060. /**
  1061. * Given a `call_expression` initializer (`create((set, get) => ({...}))`),
  1062. * find the object literal RETURNED by a function argument — descending through
  1063. * nested call_expression arguments so middleware wrappers are unwrapped
  1064. * (`create(persist((set, get) => ({...}), {...}))`, devtools, immer,
  1065. * subscribeWithSelector). Returns null when no such object is found — the
  1066. * common case for ordinary call initializers — so this stays cheap and silent
  1067. * rather than guessing. Keyed purely on AST shape; no library names.
  1068. */
  1069. private findInitializerReturnedObject(callNode: SyntaxNode, depth = 0): SyntaxNode | null {
  1070. if (depth > 4) return null;
  1071. const args = getChildByField(callNode, 'arguments');
  1072. if (!args) return null;
  1073. for (let i = 0; i < args.namedChildCount; i++) {
  1074. const arg = args.namedChild(i);
  1075. if (!arg) continue;
  1076. if (arg.type === 'arrow_function' || arg.type === 'function_expression') {
  1077. const obj = this.functionReturnedObject(arg);
  1078. if (obj) return obj;
  1079. } else if (arg.type === 'call_expression') {
  1080. const obj = this.findInitializerReturnedObject(arg, depth + 1);
  1081. if (obj) return obj;
  1082. }
  1083. }
  1084. return null;
  1085. }
  1086. /**
  1087. * The object literal a function expression returns — either the `=> ({...})`
  1088. * arrow form (a parenthesized_expression wrapping an object) or a
  1089. * `=> { return {...} }` block. Returns null for any other body shape.
  1090. */
  1091. private functionReturnedObject(fnNode: SyntaxNode): SyntaxNode | null {
  1092. const body = getChildByField(fnNode, 'body');
  1093. if (!body) return null;
  1094. const asObject = (n: SyntaxNode | null): SyntaxNode | null => {
  1095. if (!n) return null;
  1096. if (n.type === 'object' || n.type === 'object_expression') return n;
  1097. if (n.type === 'parenthesized_expression') {
  1098. for (let i = 0; i < n.namedChildCount; i++) {
  1099. const inner = asObject(n.namedChild(i));
  1100. if (inner) return inner;
  1101. }
  1102. }
  1103. return null;
  1104. };
  1105. // `(set, get) => ({...})` — body is the (parenthesized) object directly.
  1106. const direct = asObject(body);
  1107. if (direct) return direct;
  1108. // `(set, get) => { return {...} }` — scan top-level return statements.
  1109. if (body.type === 'statement_block') {
  1110. for (let i = 0; i < body.namedChildCount; i++) {
  1111. const stmt = body.namedChild(i);
  1112. if (stmt?.type !== 'return_statement') continue;
  1113. for (let j = 0; j < stmt.namedChildCount; j++) {
  1114. const obj = asObject(stmt.namedChild(j));
  1115. if (obj) return obj;
  1116. }
  1117. }
  1118. }
  1119. return null;
  1120. }
  1121. /**
  1122. * Extract a variable declaration (const, let, var, etc.)
  1123. *
  1124. * Extracts top-level and module-level variable declarations.
  1125. * Captures the variable name and first 100 chars of initializer in signature for searchability.
  1126. */
  1127. private extractVariable(node: SyntaxNode): void {
  1128. if (!this.extractor) return;
  1129. // Different languages have different variable declaration structures
  1130. // TypeScript/JavaScript: lexical_declaration contains variable_declarator children
  1131. // Python: assignment has left (identifier) and right (value)
  1132. // Go: var_declaration, short_var_declaration, const_declaration
  1133. const isConst = this.extractor.isConst?.(node) ?? false;
  1134. const kind: NodeKind = isConst ? 'constant' : 'variable';
  1135. const docstring = getPrecedingDocstring(node, this.source);
  1136. const isExported = this.extractor.isExported?.(node, this.source) ?? false;
  1137. // Extract variable declarators based on language
  1138. if (this.language === 'typescript' || this.language === 'javascript' ||
  1139. this.language === 'tsx' || this.language === 'jsx') {
  1140. // Handle lexical_declaration and variable_declaration
  1141. // These contain one or more variable_declarator children
  1142. for (let i = 0; i < node.namedChildCount; i++) {
  1143. const child = node.namedChild(i);
  1144. if (child?.type === 'variable_declarator') {
  1145. const nameNode = getChildByField(child, 'name');
  1146. const valueNode = getChildByField(child, 'value');
  1147. if (nameNode) {
  1148. // Skip destructured patterns (e.g., `let { x, y } = $props()` in Svelte)
  1149. // These produce ugly multi-line names like "{ class: className }"
  1150. if (nameNode.type === 'object_pattern' || nameNode.type === 'array_pattern') {
  1151. continue;
  1152. }
  1153. const name = getNodeText(nameNode, this.source);
  1154. // Arrow functions / function expressions: extract as function instead of variable
  1155. if (valueNode && (valueNode.type === 'arrow_function' || valueNode.type === 'function_expression')) {
  1156. this.extractFunction(valueNode);
  1157. continue;
  1158. }
  1159. // Capture first 100 chars of initializer for context (stored in signature for searchability)
  1160. const initValue = valueNode ? getNodeText(valueNode, this.source).slice(0, 100) : undefined;
  1161. const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
  1162. const varNode = this.createNode(kind, name, child, {
  1163. docstring,
  1164. signature: initSignature,
  1165. isExported,
  1166. });
  1167. // Extract type annotation references (e.g., const x: ITextModel = ...)
  1168. if (varNode) {
  1169. this.extractVariableTypeAnnotation(child, varNode.id);
  1170. }
  1171. // Exported const object-of-functions — extract each function-valued
  1172. // property as a function named by its key + walk its body so its
  1173. // calls are captured. Two shapes, both keyed on AST shape (not on any
  1174. // library name):
  1175. // `export const actions = { default: async () => {} }` — object is
  1176. // the DIRECT value (SvelteKit form actions / handler maps / route
  1177. // tables).
  1178. // `export const useStore = create((set, get) => ({ fetchUser:
  1179. // async () => {} }))` — object is RETURNED by an initializer call,
  1180. // possibly through middleware wrappers (persist/devtools/immer).
  1181. // Covers Zustand/Redux/Pinia/MobX stores generically. Without
  1182. // this, store actions exist only as object-literal properties —
  1183. // never nodes — so `node`/`callers` on `fetchUser` return "not
  1184. // found" and the agent Reads the store to reconstruct the flow.
  1185. // Scoped to EXPORTED consts to exclude inline-object noise
  1186. // (`ctx.set({...})`) the object-method skip deliberately avoids.
  1187. const objectOfFns =
  1188. valueNode && (valueNode.type === 'object' || valueNode.type === 'object_expression')
  1189. ? valueNode
  1190. : valueNode?.type === 'call_expression'
  1191. ? this.findInitializerReturnedObject(valueNode)
  1192. : null;
  1193. const extractObjectMethods = isExported && !!objectOfFns;
  1194. // Visit the initializer body for calls — EXCEPT object literals (their
  1195. // function-valued properties are extracted below) and the store-factory
  1196. // call whose returned object we extract method-by-method below (walking
  1197. // the whole call would re-visit those method arrows and mis-attribute
  1198. // their inner calls to the file/module scope).
  1199. if (valueNode &&
  1200. valueNode.type !== 'object' &&
  1201. valueNode.type !== 'object_expression' &&
  1202. !(extractObjectMethods && valueNode.type === 'call_expression')) {
  1203. this.visitFunctionBody(valueNode, '');
  1204. }
  1205. if (extractObjectMethods && objectOfFns) {
  1206. this.extractObjectLiteralFunctions(objectOfFns);
  1207. }
  1208. }
  1209. }
  1210. }
  1211. } else if (this.language === 'python' || this.language === 'ruby') {
  1212. // Python/Ruby assignment: left = right
  1213. const left = getChildByField(node, 'left') || node.namedChild(0);
  1214. const right = getChildByField(node, 'right') || node.namedChild(1);
  1215. if (left && left.type === 'identifier') {
  1216. const name = getNodeText(left, this.source);
  1217. // Skip if name starts with lowercase and looks like a function call result
  1218. // Python constants are usually UPPER_CASE
  1219. const initValue = right ? getNodeText(right, this.source).slice(0, 100) : undefined;
  1220. const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
  1221. this.createNode(kind, name, node, {
  1222. docstring,
  1223. signature: initSignature,
  1224. });
  1225. }
  1226. } else if (this.language === 'go') {
  1227. // Go: var_declaration, short_var_declaration, const_declaration
  1228. // These can have multiple identifiers on the left
  1229. const specs = node.namedChildren.filter(c =>
  1230. c.type === 'var_spec' || c.type === 'const_spec'
  1231. );
  1232. for (const spec of specs) {
  1233. const nameNode = spec.namedChild(0);
  1234. if (nameNode && nameNode.type === 'identifier') {
  1235. const name = getNodeText(nameNode, this.source);
  1236. const valueNode = spec.namedChildCount > 1 ? spec.namedChild(spec.namedChildCount - 1) : null;
  1237. const initValue = valueNode ? getNodeText(valueNode, this.source).slice(0, 100) : undefined;
  1238. const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
  1239. this.createNode(node.type === 'const_declaration' ? 'constant' : 'variable', name, spec, {
  1240. docstring,
  1241. signature: initSignature,
  1242. });
  1243. }
  1244. // Walk the initializer so composite literals and calls in a
  1245. // package-level `var Query Binding = queryBinding{}` (a registry of
  1246. // implementations) or `var c = pkg.New()` are extracted as
  1247. // instantiates/calls dependencies — the body walker only covers
  1248. // initializers inside functions, not these top-level declarations.
  1249. const valueField = getChildByField(spec, 'value');
  1250. if (valueField) this.visitFunctionBody(valueField, '');
  1251. }
  1252. // Handle short_var_declaration (:=)
  1253. if (node.type === 'short_var_declaration') {
  1254. const left = getChildByField(node, 'left');
  1255. const right = getChildByField(node, 'right');
  1256. if (left) {
  1257. // Can be expression_list with multiple identifiers
  1258. const identifiers = left.type === 'expression_list'
  1259. ? left.namedChildren.filter(c => c.type === 'identifier')
  1260. : [left];
  1261. for (const id of identifiers) {
  1262. const name = getNodeText(id, this.source);
  1263. const initValue = right ? getNodeText(right, this.source).slice(0, 100) : undefined;
  1264. const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
  1265. this.createNode('variable', name, node, {
  1266. docstring,
  1267. signature: initSignature,
  1268. });
  1269. }
  1270. }
  1271. }
  1272. } else if (this.language === 'lua' || this.language === 'luau') {
  1273. // Lua/Luau: variable_declaration → assignment_statement → variable_list
  1274. // (name: identifier...) = expression_list. `local x, y = 1, 2`
  1275. // declares multiple names; only plain identifiers are locals.
  1276. const assign = node.namedChildren.find((c) => c.type === 'assignment_statement') ?? node;
  1277. const varList = assign.namedChildren.find((c) => c.type === 'variable_list');
  1278. const exprList = assign.namedChildren.find((c) => c.type === 'expression_list');
  1279. const values = exprList ? exprList.namedChildren : [];
  1280. const names = varList ? varList.namedChildren.filter((c) => c.type === 'identifier') : [];
  1281. names.forEach((nameNode, i) => {
  1282. const name = getNodeText(nameNode, this.source);
  1283. if (!name) return;
  1284. const valueNode = values[i];
  1285. const initValue = valueNode ? getNodeText(valueNode, this.source).slice(0, 100) : undefined;
  1286. const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
  1287. this.createNode(kind, name, nameNode, { docstring, signature: initSignature, isExported });
  1288. });
  1289. } else {
  1290. // Generic fallback for other languages
  1291. // Try to find identifier children
  1292. for (let i = 0; i < node.namedChildCount; i++) {
  1293. const child = node.namedChild(i);
  1294. if (child?.type === 'identifier' || child?.type === 'variable_declarator') {
  1295. const name = child.type === 'identifier'
  1296. ? getNodeText(child, this.source)
  1297. : extractName(child, this.source, this.extractor);
  1298. if (name && name !== '<anonymous>') {
  1299. this.createNode(kind, name, child, {
  1300. docstring,
  1301. isExported,
  1302. });
  1303. }
  1304. }
  1305. }
  1306. }
  1307. }
  1308. /**
  1309. * Extract a type alias (e.g. `export type X = ...` in TypeScript).
  1310. * For languages like Go, resolveTypeAliasKind detects when the type_spec
  1311. * wraps a struct or interface definition and creates the correct node kind.
  1312. * Returns true if children should be skipped (struct/interface handled body visiting).
  1313. */
  1314. private extractTypeAlias(node: SyntaxNode): boolean {
  1315. if (!this.extractor) return false;
  1316. const name = extractName(node, this.source, this.extractor);
  1317. if (name === '<anonymous>') return false;
  1318. const docstring = getPrecedingDocstring(node, this.source);
  1319. const isExported = this.extractor.isExported?.(node, this.source);
  1320. // Check if this type alias is actually a struct or interface definition
  1321. // (e.g. Go: `type Foo struct { ... }` is a type_spec wrapping struct_type)
  1322. const resolvedKind = this.extractor.resolveTypeAliasKind?.(node, this.source);
  1323. if (resolvedKind === 'struct') {
  1324. const structNode = this.createNode('struct', name, node, { docstring, isExported });
  1325. if (!structNode) return true;
  1326. // Visit body children for field extraction
  1327. this.nodeStack.push(structNode.id);
  1328. // Try Go-style 'type' field first, then find inner struct child (C typedef struct)
  1329. const typeChild = getChildByField(node, 'type')
  1330. || this.findChildByTypes(node, this.extractor.structTypes);
  1331. if (typeChild) {
  1332. // Extract struct embedding (e.g. Go: `type DB struct { *Head; Queryable }`)
  1333. this.extractInheritance(typeChild, structNode.id);
  1334. const body = getChildByField(typeChild, this.extractor.bodyField) || typeChild;
  1335. for (let i = 0; i < body.namedChildCount; i++) {
  1336. const child = body.namedChild(i);
  1337. if (child) this.visitNode(child);
  1338. }
  1339. }
  1340. this.nodeStack.pop();
  1341. return true;
  1342. }
  1343. if (resolvedKind === 'enum') {
  1344. const enumNode = this.createNode('enum', name, node, { docstring, isExported });
  1345. if (!enumNode) return true;
  1346. this.nodeStack.push(enumNode.id);
  1347. // Find the inner enum type child (e.g. C: typedef enum { ... } name)
  1348. const innerEnum = this.findChildByTypes(node, this.extractor.enumTypes);
  1349. if (innerEnum) {
  1350. this.extractInheritance(innerEnum, enumNode.id);
  1351. const body = this.extractor.resolveBody?.(innerEnum, this.extractor.bodyField)
  1352. ?? getChildByField(innerEnum, this.extractor.bodyField);
  1353. if (body) {
  1354. const memberTypes = this.extractor.enumMemberTypes;
  1355. for (let i = 0; i < body.namedChildCount; i++) {
  1356. const child = body.namedChild(i);
  1357. if (!child) continue;
  1358. if (memberTypes?.includes(child.type)) {
  1359. this.extractEnumMembers(child);
  1360. } else {
  1361. this.visitNode(child);
  1362. }
  1363. }
  1364. }
  1365. }
  1366. this.nodeStack.pop();
  1367. return true;
  1368. }
  1369. if (resolvedKind === 'interface') {
  1370. const kind: NodeKind = this.extractor.interfaceKind ?? 'interface';
  1371. const interfaceNode = this.createNode(kind, name, node, { docstring, isExported });
  1372. if (!interfaceNode) return true;
  1373. // Extract interface inheritance from the inner type node
  1374. const typeChild = getChildByField(node, 'type');
  1375. if (typeChild) this.extractInheritance(typeChild, interfaceNode.id);
  1376. // Go: extract the interface's method specs as `method` nodes so implicit
  1377. // interface satisfaction (a struct's method set ⊇ the interface's) and
  1378. // impl-navigation can see the contract. Go has no `implements` keyword, so
  1379. // without the interface's method set there's nothing to match against.
  1380. if (this.language === 'go' && typeChild) {
  1381. this.extractGoInterfaceMethods(typeChild, interfaceNode.id);
  1382. }
  1383. return true;
  1384. }
  1385. const typeAliasNode = this.createNode('type_alias', name, node, {
  1386. docstring,
  1387. isExported,
  1388. });
  1389. // Extract type references from the alias value (e.g., `type X = ITextModel | null`)
  1390. if (typeAliasNode && this.TYPE_ANNOTATION_LANGUAGES.has(this.language)) {
  1391. // The value is everything after the `=`, which is typically the last named child
  1392. // In tree-sitter TS: type_alias_declaration has name + value children
  1393. const value = getChildByField(node, 'value');
  1394. if (value) {
  1395. this.extractTypeRefsFromSubtree(value, typeAliasNode.id);
  1396. // `type X = { foo: T; bar(): T }` — make the members first-class
  1397. // property/method nodes under the type alias so `recorder.stop()`
  1398. // can attach the call edge to `RecorderHandle.stop` instead of
  1399. // an unrelated class method picked by path-proximity (#359).
  1400. if (this.language === 'typescript' || this.language === 'tsx') {
  1401. this.extractTsTypeAliasMembers(value, typeAliasNode);
  1402. }
  1403. }
  1404. }
  1405. return false;
  1406. }
  1407. /**
  1408. * Extract the method specs of a Go `interface_type` body as `method` nodes
  1409. * contained by the interface (e.g. `Marshal`, `Unmarshal` of a `Core`
  1410. * interface). tree-sitter-go names these `method_elem` (newer) or
  1411. * `method_spec` (older). Embedded interfaces (`Reader` inside `ReadWriter`)
  1412. * are `type_identifier`s, not methods, and are left to inheritance extraction.
  1413. */
  1414. private extractGoInterfaceMethods(interfaceType: SyntaxNode, ifaceId: string): void {
  1415. this.nodeStack.push(ifaceId);
  1416. for (let i = 0; i < interfaceType.namedChildCount; i++) {
  1417. const m = interfaceType.namedChild(i);
  1418. if (!m || (m.type !== 'method_elem' && m.type !== 'method_spec')) continue;
  1419. const nameNode = getChildByField(m, 'name') ?? m.namedChild(0);
  1420. if (!nameNode) continue;
  1421. const mname = getNodeText(nameNode, this.source);
  1422. if (mname) {
  1423. this.createNode('method', mname, m, {
  1424. signature: this.extractor?.getSignature?.(m, this.source),
  1425. });
  1426. }
  1427. }
  1428. this.nodeStack.pop();
  1429. }
  1430. /**
  1431. * Surface the members of a TypeScript `type X = { ... }` (or intersection
  1432. * thereof) as `property` / `method` nodes under the type-alias node. Only
  1433. * walks the immediate object_type / intersection operands so anonymous
  1434. * nested object types inside generic arguments (`Promise<{ ok: true }>`)
  1435. * don't produce phantom members.
  1436. */
  1437. private extractTsTypeAliasMembers(value: SyntaxNode, typeAliasNode: Node): void {
  1438. const objectTypes: SyntaxNode[] = [];
  1439. if (value.type === 'object_type') {
  1440. objectTypes.push(value);
  1441. } else if (value.type === 'intersection_type') {
  1442. for (let i = 0; i < value.namedChildCount; i++) {
  1443. const op = value.namedChild(i);
  1444. if (op && op.type === 'object_type') objectTypes.push(op);
  1445. }
  1446. } else {
  1447. return;
  1448. }
  1449. this.nodeStack.push(typeAliasNode.id);
  1450. for (const objType of objectTypes) {
  1451. for (let i = 0; i < objType.namedChildCount; i++) {
  1452. const child = objType.namedChild(i);
  1453. if (!child) continue;
  1454. if (child.type !== 'property_signature' && child.type !== 'method_signature') continue;
  1455. const nameNode = getChildByField(child, 'name');
  1456. const memberName = nameNode ? getNodeText(nameNode, this.source) : '';
  1457. if (!memberName) continue;
  1458. // `foo: () => T` and `foo(): T` are functionally a method on the
  1459. // type contract. Treat the property_signature with a function-typed
  1460. // annotation as a method too so call sites can resolve to it.
  1461. const memberKind: NodeKind = child.type === 'method_signature'
  1462. ? 'method'
  1463. : this.isTsFunctionTypedProperty(child) ? 'method' : 'property';
  1464. const docstring = getPrecedingDocstring(child, this.source);
  1465. const signature = getNodeText(child, this.source);
  1466. this.createNode(memberKind, memberName, child, {
  1467. docstring,
  1468. signature,
  1469. qualifiedName: `${typeAliasNode.name}::${memberName}`,
  1470. });
  1471. // Emit `references` edges from the type alias to types named in the
  1472. // member's signature, matching the interface-member behavior added in
  1473. // #432. We attach refs to the type-alias parent (consistent with
  1474. // interface property_signature treatment).
  1475. this.extractTypeAnnotations(child, typeAliasNode.id);
  1476. }
  1477. }
  1478. this.nodeStack.pop();
  1479. }
  1480. /**
  1481. * `foo: () => T` → property_signature whose type_annotation contains a
  1482. * `function_type`. Treat that as a method-shaped contract member, since
  1483. * the call site `obj.foo()` has identical semantics to `bar(): T`.
  1484. */
  1485. private isTsFunctionTypedProperty(propertySignature: SyntaxNode): boolean {
  1486. const typeAnno = getChildByField(propertySignature, 'type');
  1487. if (!typeAnno) return false;
  1488. for (let i = 0; i < typeAnno.namedChildCount; i++) {
  1489. const inner = typeAnno.namedChild(i);
  1490. if (inner && inner.type === 'function_type') return true;
  1491. }
  1492. return false;
  1493. }
  1494. // extractExportedVariables removed — the walker now descends into
  1495. // export_statement children and the inner declaration's dedicated
  1496. // extractor (extractVariable, extractFunction, extractClass, etc.)
  1497. // handles the symbol with isExported=true via parent-walk in the
  1498. // language extractor's isExported predicate.
  1499. /**
  1500. * Extract an import
  1501. *
  1502. * Creates an import node with the full import statement stored in signature for searchability.
  1503. * Also creates unresolved references for resolution purposes.
  1504. */
  1505. private extractImport(node: SyntaxNode): void {
  1506. if (!this.extractor) return;
  1507. const importText = getNodeText(node, this.source).trim();
  1508. // Try language-specific hook first
  1509. if (this.extractor.extractImport) {
  1510. const info = this.extractor.extractImport(node, this.source);
  1511. if (info) {
  1512. this.createNode('import', info.moduleName, node, {
  1513. signature: info.signature,
  1514. });
  1515. // Create unresolved reference unless the hook handled it
  1516. if (!info.handledRefs && info.moduleName && this.nodeStack.length > 0) {
  1517. const parentId = this.nodeStack[this.nodeStack.length - 1];
  1518. if (parentId) {
  1519. this.unresolvedReferences.push({
  1520. fromNodeId: parentId,
  1521. referenceName: info.moduleName,
  1522. referenceKind: 'imports',
  1523. line: node.startPosition.row + 1,
  1524. column: node.startPosition.column,
  1525. });
  1526. }
  1527. }
  1528. // Link each imported binding to its definition so imported-but-not-
  1529. // called/typed symbols still record a cross-file dependency (TS/JS only).
  1530. if (
  1531. this.language === 'typescript' || this.language === 'tsx' ||
  1532. this.language === 'javascript' || this.language === 'jsx'
  1533. ) {
  1534. const parentId = this.nodeStack[this.nodeStack.length - 1];
  1535. if (parentId) this.emitImportBindingRefs(node, parentId);
  1536. }
  1537. // Python `from module import X, Y` — link each imported name to its
  1538. // definition (covers `__init__.py` re-export barrels, which are just
  1539. // `from .sub import X`). Same recall gap as TS: a name imported and
  1540. // used in a non-call position created no dependency edge.
  1541. if (this.language === 'python' && node.type === 'import_from_statement') {
  1542. const parentId = this.nodeStack[this.nodeStack.length - 1];
  1543. if (parentId) this.emitPyFromImportRefs(node, parentId);
  1544. }
  1545. // Rust `use crate::m::Item;` / `pub use self::sub::Item;` — link each
  1546. // imported leaf to its definition. Covers `pub use` re-export hubs
  1547. // (a `mod.rs` re-exporting submodule items, e.g. tokio's `fs/mod.rs`)
  1548. // and items imported but used in non-call/non-type positions.
  1549. if (this.language === 'rust' && node.type === 'use_declaration') {
  1550. const parentId = this.nodeStack[this.nodeStack.length - 1];
  1551. if (parentId) this.emitRustUseBindingRefs(node, parentId);
  1552. }
  1553. return;
  1554. }
  1555. // Hook returned null — fall through to multi-import inline handlers only
  1556. // (hook returning null means "I didn't handle this" for multi-import cases,
  1557. // NOT "use generic fallback" — the hook already declined)
  1558. }
  1559. // Multi-import cases that create multiple nodes (can't be expressed with single-return hook)
  1560. // Python import_statement: import os, sys (creates one import per module)
  1561. if (this.language === 'python' && node.type === 'import_statement') {
  1562. for (let i = 0; i < node.namedChildCount; i++) {
  1563. const child = node.namedChild(i);
  1564. if (child?.type === 'dotted_name') {
  1565. this.createNode('import', getNodeText(child, this.source), node, {
  1566. signature: importText,
  1567. });
  1568. } else if (child?.type === 'aliased_import') {
  1569. const dottedName = child.namedChildren.find(c => c.type === 'dotted_name');
  1570. if (dottedName) {
  1571. this.createNode('import', getNodeText(dottedName, this.source), node, {
  1572. signature: importText,
  1573. });
  1574. }
  1575. }
  1576. }
  1577. return;
  1578. }
  1579. // Go imports: single or grouped (creates one import per spec)
  1580. if (this.language === 'go') {
  1581. const parentId = this.nodeStack.length > 0 ? this.nodeStack[this.nodeStack.length - 1] : null;
  1582. const extractFromSpec = (spec: SyntaxNode): void => {
  1583. const stringLiteral = spec.namedChildren.find(c => c.type === 'interpreted_string_literal');
  1584. if (stringLiteral) {
  1585. const importPath = getNodeText(stringLiteral, this.source).replace(/['"]/g, '');
  1586. if (importPath) {
  1587. this.createNode('import', importPath, spec, {
  1588. signature: getNodeText(spec, this.source).trim(),
  1589. });
  1590. // Create unresolved reference so the resolver can create imports edges
  1591. if (parentId) {
  1592. this.unresolvedReferences.push({
  1593. fromNodeId: parentId,
  1594. referenceName: importPath,
  1595. referenceKind: 'imports',
  1596. line: spec.startPosition.row + 1,
  1597. column: spec.startPosition.column,
  1598. });
  1599. }
  1600. }
  1601. }
  1602. };
  1603. const importSpecList = node.namedChildren.find(c => c.type === 'import_spec_list');
  1604. if (importSpecList) {
  1605. for (const spec of importSpecList.namedChildren.filter(c => c.type === 'import_spec')) {
  1606. extractFromSpec(spec);
  1607. }
  1608. } else {
  1609. const importSpec = node.namedChildren.find(c => c.type === 'import_spec');
  1610. if (importSpec) {
  1611. extractFromSpec(importSpec);
  1612. }
  1613. }
  1614. return;
  1615. }
  1616. // PHP grouped imports: use X\{A, B} (creates one import per item)
  1617. if (this.language === 'php') {
  1618. const namespacePrefix = node.namedChildren.find(c => c.type === 'namespace_name');
  1619. const useGroup = node.namedChildren.find(c => c.type === 'namespace_use_group');
  1620. if (namespacePrefix && useGroup) {
  1621. const prefix = getNodeText(namespacePrefix, this.source);
  1622. const useClauses = useGroup.namedChildren.filter((c: SyntaxNode) =>
  1623. c.type === 'namespace_use_group_clause' || c.type === 'namespace_use_clause'
  1624. );
  1625. for (const clause of useClauses) {
  1626. const nsName = clause.namedChildren.find((c: SyntaxNode) => c.type === 'namespace_name');
  1627. const name = nsName
  1628. ? nsName.namedChildren.find((c: SyntaxNode) => c.type === 'name')
  1629. : clause.namedChildren.find((c: SyntaxNode) => c.type === 'name');
  1630. if (name) {
  1631. const fullPath = `${prefix}\\${getNodeText(name, this.source)}`;
  1632. this.createNode('import', fullPath, node, {
  1633. signature: importText,
  1634. });
  1635. }
  1636. }
  1637. return;
  1638. }
  1639. }
  1640. // If a hook exists but returned null, it intentionally declined this node — don't create fallback
  1641. if (this.extractor.extractImport) return;
  1642. // Generic fallback for languages without hooks
  1643. this.createNode('import', importText, node, {
  1644. signature: importText,
  1645. });
  1646. }
  1647. /**
  1648. * Emit one `imports` reference per named/default import binding (TS/JS family),
  1649. * attributed to the file node — so the resolver links each imported symbol to
  1650. * the file that DEFINES it.
  1651. *
  1652. * Importing a symbol IS a dependency, but extraction only emits references for
  1653. * calls, instantiations, type annotations, and inheritance. A symbol that's
  1654. * imported and then only re-exported (`export { X } from './x'`), placed in a
  1655. * registry array (`[expressResolver, …]`), passed as an argument, or used in
  1656. * JSX produced NO cross-file edge at all — so the providing file showed a
  1657. * false "0 dependents" and was invisible to blast-radius / `affected`. The
  1658. * resolver maps the local name (alias-aware) to the provider's definition and
  1659. * creates a cross-file `imports` edge; `getFileDependents` picks it up, while
  1660. * `getImpactRadius` keeps it as a bounded leaf (the importing file node).
  1661. *
  1662. * Namespace imports (`import * as NS`) bind a whole module: `NS.member` calls
  1663. * resolve on their own, but a namespace used ONLY via a value-member read
  1664. * (`NS.SOME_CONST`) would leave no edge — so we also emit the namespace local
  1665. * name, which the resolver links to the module FILE as a dependency backstop.
  1666. */
  1667. private emitImportBindingRefs(node: SyntaxNode, fromNodeId: string): void {
  1668. const clause = node.namedChildren.find((c) => c.type === 'import_clause');
  1669. if (!clause) return; // side-effect import (`import './x'`) — no bindings
  1670. const pushRef = (nameNode: SyntaxNode | null | undefined): void => {
  1671. if (!nameNode) return;
  1672. const name = getNodeText(nameNode, this.source);
  1673. if (!name) return;
  1674. this.unresolvedReferences.push({
  1675. fromNodeId,
  1676. referenceName: name,
  1677. referenceKind: 'imports',
  1678. line: nameNode.startPosition.row + 1,
  1679. column: nameNode.startPosition.column,
  1680. });
  1681. };
  1682. for (const child of clause.namedChildren) {
  1683. if (child.type === 'identifier') {
  1684. // default import: `import Foo from './x'`
  1685. pushRef(child);
  1686. } else if (child.type === 'named_imports') {
  1687. // `import { A, B as C } from './x'` — link the LOCAL name (alias if any)
  1688. for (const spec of child.namedChildren) {
  1689. if (spec.type !== 'import_specifier') continue;
  1690. pushRef(getChildByField(spec, 'alias') ?? getChildByField(spec, 'name') ?? spec.namedChild(0));
  1691. }
  1692. } else if (child.type === 'namespace_import') {
  1693. // `import * as NS from './x'` — emit NS so the module-import backstop can
  1694. // record the file dependency even if NS is only used by value-member read.
  1695. pushRef(child.namedChildren.find((c) => c.type === 'identifier') ?? child.namedChild(0));
  1696. }
  1697. }
  1698. }
  1699. /**
  1700. * Emit one `imports` reference per re-exported binding of a
  1701. * `export { A, B as C } from './y'` statement, attributed to the file node —
  1702. * so a barrel that re-exports from another module records a dependency on it.
  1703. *
  1704. * Links the SOURCE-side name (`A`, the `name` field — not the local alias
  1705. * `C`), since that is what the source module defines. `export * from './y'`
  1706. * has no named bindings to attribute and `export { default as X }` can't be
  1707. * name-matched, so both are skipped.
  1708. */
  1709. private emitReExportRefs(node: SyntaxNode, fromNodeId: string): void {
  1710. const clause = node.namedChildren.find((c) => c.type === 'export_clause');
  1711. if (!clause) return; // `export * from './y'` — no named bindings
  1712. for (const spec of clause.namedChildren) {
  1713. if (spec.type !== 'export_specifier') continue;
  1714. const nameNode = getChildByField(spec, 'name') ?? spec.namedChild(0);
  1715. if (!nameNode) continue;
  1716. const name = getNodeText(nameNode, this.source);
  1717. if (!name || name === 'default') continue;
  1718. this.unresolvedReferences.push({
  1719. fromNodeId,
  1720. referenceName: name,
  1721. referenceKind: 'imports',
  1722. line: nameNode.startPosition.row + 1,
  1723. column: nameNode.startPosition.column,
  1724. });
  1725. }
  1726. }
  1727. /**
  1728. * Emit one `imports` reference per binding of a Rust `use` declaration —
  1729. * `use crate::m::Item`, `use crate::m::{A, B as C}`, `pub use self::sub::Item`.
  1730. * Emits the FULL path (e.g. `self::sub::Item`, not just `Item`) so the resolver
  1731. * can resolve the module prefix to a file and find the leaf symbol there —
  1732. * disambiguating common-name re-exports (`pub use self::read::read`, where the
  1733. * leaf `read` collides with many same-named symbols). Falls back to name-match
  1734. * on the leaf when the path can't be resolved. `use ...::*` has no leaf binding.
  1735. */
  1736. private emitRustUseBindingRefs(node: SyntaxNode, fromNodeId: string): void {
  1737. const paths: { text: string; node: SyntaxNode }[] = [];
  1738. const join = (prefix: string, seg: string): string => (prefix ? `${prefix}::${seg}` : seg);
  1739. const collect = (n: SyntaxNode, prefix: string): void => {
  1740. switch (n.type) {
  1741. case 'identifier':
  1742. paths.push({ text: join(prefix, getNodeText(n, this.source)), node: n });
  1743. break;
  1744. case 'scoped_identifier': {
  1745. // Full scoped path (`a::b::C`); combine with any outer group prefix.
  1746. const full = getNodeText(n, this.source).trim();
  1747. paths.push({ text: prefix ? `${prefix}::${full}` : full, node: n });
  1748. break;
  1749. }
  1750. case 'scoped_use_list': {
  1751. // `path::{ ... }` — the group's path becomes the prefix for each item.
  1752. const pathNode = getChildByField(n, 'path');
  1753. const seg = pathNode ? getNodeText(pathNode, this.source).trim() : '';
  1754. const newPrefix = seg ? join(prefix, seg) : prefix;
  1755. const list = getChildByField(n, 'list') ?? n.namedChildren.find((c) => c.type === 'use_list');
  1756. if (list) collect(list, newPrefix);
  1757. break;
  1758. }
  1759. case 'use_list':
  1760. for (let i = 0; i < n.namedChildCount; i++) {
  1761. const c = n.namedChild(i);
  1762. if (c) collect(c, prefix);
  1763. }
  1764. break;
  1765. case 'use_as_clause': {
  1766. // `Path as Alias` → link the source path (the definition), not the alias.
  1767. const p = getChildByField(n, 'path') ?? n.namedChild(0);
  1768. if (p) collect(p, prefix);
  1769. break;
  1770. }
  1771. // use_wildcard → no specific binding to link.
  1772. }
  1773. };
  1774. for (let i = 0; i < node.namedChildCount; i++) {
  1775. const c = node.namedChild(i);
  1776. if (c) collect(c, '');
  1777. }
  1778. for (const p of paths) {
  1779. // The leaf must be a real name (skip a path that is only `self`/`super`/`crate`).
  1780. const leaf = p.text.split('::').pop();
  1781. if (!leaf || leaf === 'self' || leaf === 'super' || leaf === 'crate' || leaf === '*') continue;
  1782. this.unresolvedReferences.push({
  1783. fromNodeId,
  1784. referenceName: p.text,
  1785. referenceKind: 'imports',
  1786. line: p.node.startPosition.row + 1,
  1787. column: p.node.startPosition.column,
  1788. });
  1789. }
  1790. }
  1791. /**
  1792. * Emit one `imports` reference per name imported in a Python
  1793. * `from module import A, B as C` statement, attributed to the file node — so
  1794. * the resolver links each imported name to the module that DEFINES it.
  1795. *
  1796. * Same recall gap as TS: extraction only emitted references for calls,
  1797. * instantiations, and inheritance, so a name imported and then used in a
  1798. * non-call position (a list/dict literal, a default argument, a decorator
  1799. * target, or simply re-exported through an `__init__.py` barrel) produced no
  1800. * cross-file edge — the providing module showed a false "0 dependents". Links
  1801. * the LOCAL name (alias when present, since that's what the resolver's import
  1802. * mapping keys on); `from module import *` has no names to attribute.
  1803. */
  1804. private emitPyFromImportRefs(node: SyntaxNode, fromNodeId: string): void {
  1805. const moduleNameNode = getChildByField(node, 'module_name');
  1806. for (const child of node.namedChildren) {
  1807. // Skip the `from <module>` part itself and `import *`.
  1808. if (moduleNameNode &&
  1809. child.startIndex === moduleNameNode.startIndex &&
  1810. child.endIndex === moduleNameNode.endIndex) continue;
  1811. if (child.type === 'wildcard_import') continue;
  1812. let nameNode: SyntaxNode | null | undefined = null;
  1813. if (child.type === 'aliased_import') {
  1814. nameNode = getChildByField(child, 'alias') ?? getChildByField(child, 'name') ?? child.namedChild(0);
  1815. } else if (child.type === 'dotted_name') {
  1816. nameNode = child;
  1817. }
  1818. if (!nameNode) continue;
  1819. const raw = getNodeText(nameNode, this.source);
  1820. // Imported names are simple identifiers; defensively take the last segment.
  1821. const local = raw.includes('.') ? raw.split('.').pop()! : raw;
  1822. if (!local) continue;
  1823. this.unresolvedReferences.push({
  1824. fromNodeId,
  1825. referenceName: local,
  1826. referenceKind: 'imports',
  1827. line: nameNode.startPosition.row + 1,
  1828. column: nameNode.startPosition.column,
  1829. });
  1830. }
  1831. }
  1832. /**
  1833. * Extract a function call
  1834. */
  1835. private extractCall(node: SyntaxNode): void {
  1836. if (this.nodeStack.length === 0) return;
  1837. const callerId = this.nodeStack[this.nodeStack.length - 1];
  1838. if (!callerId) return;
  1839. // Get the function/method being called
  1840. let calleeName = '';
  1841. // Java/Kotlin method_invocation has 'object' + 'name' fields instead of 'function'
  1842. // PHP member_call_expression has 'object' + 'name', scoped_call_expression has 'scope' + 'name'
  1843. const nameField = getChildByField(node, 'name');
  1844. const objectField = getChildByField(node, 'object') || getChildByField(node, 'scope');
  1845. if (nameField && objectField && (node.type === 'method_invocation' || node.type === 'member_call_expression' || node.type === 'scoped_call_expression')) {
  1846. // Method call with explicit receiver: receiver.method() / $receiver->method() / ClassName::method()
  1847. const methodName = getNodeText(nameField, this.source);
  1848. // Java `this.userbo.toLogin2()` parses as method_invocation(object=field_access(this, userbo)).
  1849. // Without unwrapping, receiverName is `this.userbo` and the name-matcher's
  1850. // single-dot receiver regex fails. Pull out the immediate field after `this.`
  1851. // so the receiver is the field name (`userbo`), which the resolver can then
  1852. // look up in the enclosing class's field declarations.
  1853. let receiverName: string;
  1854. if (objectField.type === 'field_access') {
  1855. const inner = getChildByField(objectField, 'object');
  1856. const fld = getChildByField(objectField, 'field');
  1857. if (inner && fld && (inner.type === 'this' || inner.type === 'this_expression')) {
  1858. receiverName = getNodeText(fld, this.source);
  1859. } else {
  1860. receiverName = getNodeText(objectField, this.source);
  1861. }
  1862. } else {
  1863. receiverName = getNodeText(objectField, this.source);
  1864. }
  1865. // Strip PHP $ prefix from variable names
  1866. receiverName = receiverName.replace(/^\$/, '');
  1867. if (methodName) {
  1868. // Skip self/this/parent/static receivers — they don't aid resolution
  1869. const SKIP_RECEIVERS = new Set(['self', 'this', 'cls', 'super', 'parent', 'static']);
  1870. if (SKIP_RECEIVERS.has(receiverName)) {
  1871. calleeName = methodName;
  1872. } else {
  1873. calleeName = `${receiverName}.${methodName}`;
  1874. }
  1875. }
  1876. } else if (node.type === 'message_expression') {
  1877. // ObjC message expressions emit one `method` field child per selector
  1878. // keyword: `[obj a:1 b:2 c:3]` has three `method=identifier` siblings.
  1879. // Joining them with `:` reconstructs the full selector and matches the
  1880. // multi-part selector names produced by the ObjC method_definition
  1881. // extractor (`extractObjcMethodName` in languages/objc.ts). Without this
  1882. // join, multi-keyword call sites only emitted the first keyword and never
  1883. // resolved to their target methods (e.g. `GET:parameters:headers:...` had
  1884. // zero callers despite obviously being called).
  1885. const methodKeywords: string[] = [];
  1886. for (let i = 0; i < node.namedChildCount; i++) {
  1887. if (node.fieldNameForNamedChild(i) === 'method') {
  1888. const kw = node.namedChild(i);
  1889. if (kw) methodKeywords.push(getNodeText(kw, this.source));
  1890. }
  1891. }
  1892. if (methodKeywords.length > 0) {
  1893. const methodName: string =
  1894. methodKeywords.length === 1
  1895. ? (methodKeywords[0] as string)
  1896. : methodKeywords.map((k) => `${k}:`).join('');
  1897. const receiverField = getChildByField(node, 'receiver');
  1898. const SKIP_RECEIVERS = new Set(['self', 'super']);
  1899. if (receiverField && receiverField.type !== 'message_expression') {
  1900. const receiverName = getNodeText(receiverField, this.source);
  1901. if (receiverName && !SKIP_RECEIVERS.has(receiverName)) {
  1902. calleeName = `${receiverName}.${methodName}`;
  1903. } else {
  1904. calleeName = methodName;
  1905. }
  1906. } else {
  1907. calleeName = methodName;
  1908. }
  1909. }
  1910. } else {
  1911. const func = getChildByField(node, 'function') || node.namedChild(0);
  1912. if (func) {
  1913. if (func.type === 'member_expression' || func.type === 'attribute' || func.type === 'selector_expression' || func.type === 'navigation_expression' || func.type === 'field_expression') {
  1914. // Method call: obj.method() or obj.field.method()
  1915. // Go uses selector_expression with 'field', JS/TS uses member_expression with 'property'
  1916. // Kotlin uses navigation_expression with navigation_suffix > simple_identifier
  1917. // C/C++ use field_expression for both `obj.method()` and `ptr->method()`
  1918. let property = getChildByField(func, 'property') || getChildByField(func, 'field');
  1919. if (!property) {
  1920. const child1 = func.namedChild(1);
  1921. // Kotlin: navigation_suffix wraps the method name — extract simple_identifier from it
  1922. if (child1?.type === 'navigation_suffix') {
  1923. property = child1.namedChildren.find((c: SyntaxNode) => c.type === 'simple_identifier') ?? child1;
  1924. } else {
  1925. property = child1;
  1926. }
  1927. }
  1928. if (property) {
  1929. const methodName = getNodeText(property, this.source);
  1930. // Include receiver name for qualified resolution (e.g., console.print → "console.print")
  1931. // This helps the resolver distinguish method calls from bare function calls
  1932. // (e.g., Python's console.print() vs builtin print())
  1933. // Skip self/this/cls as they don't aid resolution
  1934. const receiver =
  1935. getChildByField(func, 'object') ||
  1936. getChildByField(func, 'operand') ||
  1937. getChildByField(func, 'argument') ||
  1938. func.namedChild(0);
  1939. const SKIP_RECEIVERS = new Set(['self', 'this', 'cls', 'super']);
  1940. if (receiver && (receiver.type === 'identifier' || receiver.type === 'simple_identifier' || receiver.type === 'field_identifier')) {
  1941. const receiverName = getNodeText(receiver, this.source);
  1942. if (!SKIP_RECEIVERS.has(receiverName)) {
  1943. calleeName = `${receiverName}.${methodName}`;
  1944. } else {
  1945. calleeName = methodName;
  1946. }
  1947. } else {
  1948. calleeName = methodName;
  1949. }
  1950. }
  1951. } else if (func.type === 'scoped_identifier' || func.type === 'scoped_call_expression') {
  1952. // Scoped call: Module::function()
  1953. calleeName = getNodeText(func, this.source);
  1954. } else {
  1955. calleeName = getNodeText(func, this.source);
  1956. }
  1957. }
  1958. }
  1959. // Parenthesized type conversions — Go `(*T)(x)` / `(T)(x)` (and a
  1960. // parenthesized callee generally) parse as a call whose "function" is a
  1961. // parenthesized type/expression, so the callee text is the un-resolvable
  1962. // literal `(*T)`. Normalize to the inner name so it resolves to `T` (a real
  1963. // dependency on the converted-to type) instead of dropping on the floor.
  1964. if (calleeName) {
  1965. const conv = calleeName.match(/^\(\s*\*?\s*([A-Za-z_][\w.]*)\s*\)$/);
  1966. if (conv && conv[1]) calleeName = conv[1];
  1967. }
  1968. if (calleeName) {
  1969. this.unresolvedReferences.push({
  1970. fromNodeId: callerId,
  1971. referenceName: calleeName,
  1972. referenceKind: 'calls',
  1973. line: node.startPosition.row + 1,
  1974. column: node.startPosition.column,
  1975. });
  1976. }
  1977. }
  1978. /**
  1979. * `new Foo(...)` / `Foo::new(...)` / object_creation_expression —
  1980. * emit an `instantiates` reference to the class name. The resolver
  1981. * then links it to the class node, producing the `instantiates`
  1982. * edge that powers "what creates instances of X" queries.
  1983. *
  1984. * Children are still walked so nested calls inside the constructor
  1985. * arguments (`new Foo(bar())`) get their own `calls` references.
  1986. */
  1987. private extractInstantiation(node: SyntaxNode): void {
  1988. if (this.nodeStack.length === 0) return;
  1989. const fromId = this.nodeStack[this.nodeStack.length - 1];
  1990. if (!fromId) return;
  1991. // The class name is in the `constructor`/`type`/first-named-child
  1992. // depending on grammar.
  1993. const ctor =
  1994. getChildByField(node, 'constructor') ||
  1995. getChildByField(node, 'type') ||
  1996. getChildByField(node, 'name') ||
  1997. node.namedChild(0);
  1998. if (!ctor) return;
  1999. // Go composite literals: `Widget{...}` (same package) and `pkga.Widget{...}`
  2000. // (cross-package). Only a directly-named struct type is a meaningful
  2001. // instantiation target — skip slice/map/array literals (`[]T{}`,
  2002. // `map[K]V{}`) whose `type` field is a composite type, not a named type.
  2003. // Unlike `new ns.Foo()`, KEEP the package qualifier (`pkga.Widget`) so the
  2004. // Go cross-package resolver can disambiguate it to the right package's type.
  2005. if (node.type === 'composite_literal') {
  2006. if (ctor.type !== 'type_identifier' && ctor.type !== 'qualified_type') return;
  2007. let goType = getNodeText(ctor, this.source).trim();
  2008. const brIdx = goType.indexOf('['); // strip Go generic args: `Box[T]{}` -> `Box`
  2009. if (brIdx > 0) goType = goType.slice(0, brIdx).trim();
  2010. if (goType) {
  2011. this.unresolvedReferences.push({
  2012. fromNodeId: fromId,
  2013. referenceName: goType,
  2014. referenceKind: 'instantiates',
  2015. line: node.startPosition.row + 1,
  2016. column: node.startPosition.column,
  2017. });
  2018. }
  2019. return;
  2020. }
  2021. let className = getNodeText(ctor, this.source);
  2022. // Strip type-argument suffix first: `new Map<K, V>()` would
  2023. // otherwise produce className 'Map<K, V>' (the constructor
  2024. // field is a `generic_type` node) and resolution would fail
  2025. // because no class is named with the angle-bracket suffix.
  2026. const ltIdx = className.indexOf('<');
  2027. if (ltIdx > 0) className = className.slice(0, ltIdx);
  2028. // For namespaced/qualified constructors (`new ns.Foo()`,
  2029. // `new ns::Foo()`) keep the trailing identifier — that's what
  2030. // matches a class node in the index.
  2031. const lastDot = Math.max(
  2032. className.lastIndexOf('.'),
  2033. className.lastIndexOf('::')
  2034. );
  2035. if (lastDot >= 0) className = className.slice(lastDot + 1).replace(/^[:.]/, '');
  2036. className = className.trim();
  2037. if (className) {
  2038. this.unresolvedReferences.push({
  2039. fromNodeId: fromId,
  2040. referenceName: className,
  2041. referenceKind: 'instantiates',
  2042. line: node.startPosition.row + 1,
  2043. column: node.startPosition.column,
  2044. });
  2045. }
  2046. }
  2047. /**
  2048. * Find a `class_body` child of an `object_creation_expression` — the
  2049. * marker for an anonymous class (`new T() { ... }`). Returns the body
  2050. * node so the caller can walk it as the anon class's members.
  2051. */
  2052. private findAnonymousClassBody(node: SyntaxNode): SyntaxNode | null {
  2053. for (let i = 0; i < node.namedChildCount; i++) {
  2054. const child = node.namedChild(i);
  2055. // Java: `class_body`. C# uses the same node kind.
  2056. if (child && (child.type === 'class_body' || child.type === 'declaration_list')) {
  2057. return child;
  2058. }
  2059. }
  2060. return null;
  2061. }
  2062. /**
  2063. * Extract a Java/C# anonymous class — `new T() { ...members }`. Emits a
  2064. * `class` node named `<T$anon@line>`, an `extends` reference to T (so
  2065. * Phase 5.5 interface-impl can bridge), and walks the body so its
  2066. * `method_declaration` members become method nodes under the anon class.
  2067. *
  2068. * Why this matters: without anon-class extraction, the overrides inside
  2069. * a lambda-returned `new T() { @Override int foo(){...} }` are not nodes,
  2070. * so a call through T.foo (the abstract parent method) has no static
  2071. * target — the agent has to Read the file to find the implementation.
  2072. */
  2073. private extractAnonymousClass(node: SyntaxNode, body: SyntaxNode): void {
  2074. if (!this.extractor) return;
  2075. // The instantiated type sits in the same field/position that
  2076. // extractInstantiation reads from. Use the same lookup so the anon
  2077. // class's `extends` target matches the `instantiates` edge.
  2078. const typeNode =
  2079. getChildByField(node, 'constructor') ||
  2080. getChildByField(node, 'type') ||
  2081. getChildByField(node, 'name') ||
  2082. node.namedChild(0);
  2083. let typeName = typeNode ? getNodeText(typeNode, this.source) : 'Object';
  2084. const ltIdx = typeName.indexOf('<');
  2085. if (ltIdx > 0) typeName = typeName.slice(0, ltIdx);
  2086. const lastDot = Math.max(typeName.lastIndexOf('.'), typeName.lastIndexOf('::'));
  2087. if (lastDot >= 0) typeName = typeName.slice(lastDot + 1).replace(/^[:.]/, '');
  2088. typeName = typeName.trim() || 'Object';
  2089. const anonName = `<${typeName}$anon@${node.startPosition.row + 1}>`;
  2090. const classNode = this.createNode('class', anonName, node, {});
  2091. if (!classNode) return;
  2092. // The anonymous class implicitly extends/implements the named type.
  2093. // We can't tell at extraction time whether T is a class or an interface,
  2094. // so emit `extends`. Resolution will still bind T to whatever it is, and
  2095. // Phase 5.5 (which already handles both `extends` and `implements`) will
  2096. // bridge T's methods to the override names found in the anon body.
  2097. this.unresolvedReferences.push({
  2098. fromNodeId: classNode.id,
  2099. referenceName: typeName,
  2100. referenceKind: 'extends',
  2101. line: typeNode?.startPosition.row ?? node.startPosition.row,
  2102. column: typeNode?.startPosition.column ?? node.startPosition.column,
  2103. });
  2104. // Walk the body's children so method_declaration nodes inside become
  2105. // method nodes scoped to the anon class.
  2106. this.nodeStack.push(classNode.id);
  2107. for (let i = 0; i < body.namedChildCount; i++) {
  2108. const child = body.namedChild(i);
  2109. if (child) this.visitNode(child);
  2110. }
  2111. this.nodeStack.pop();
  2112. }
  2113. /**
  2114. * Scan `declNode` and its preceding siblings (within the parent's
  2115. * named children) for decorator nodes, emitting a `decorates`
  2116. * reference from `decoratedId` to each decorator's function name.
  2117. *
  2118. * Why preceding siblings: in TypeScript, `@Foo class Bar {}` parses
  2119. * as an `export_statement` (or top-level wrapper) with the
  2120. * `decorator` as a child *before* the `class_declaration` — so the
  2121. * decorator isn't a child of the class itself. For methods/
  2122. * properties, the decorator IS a direct child of the declaration,
  2123. * so we also scan declNode.namedChildren.
  2124. *
  2125. * Idempotent across grammars: if neither location yields decorators
  2126. * (most non-decorator-using languages), the function is a no-op.
  2127. */
  2128. private extractDecoratorsFor(declNode: SyntaxNode, decoratedId: string): void {
  2129. const consider = (n: SyntaxNode | null): void => {
  2130. if (!n) return;
  2131. // `marker_annotation` is Java's grammar for arg-less annotations
  2132. // (`@Override`, `@Deprecated`); without including it, every
  2133. // such Java annotation would be silently skipped.
  2134. if (
  2135. n.type !== 'decorator' &&
  2136. n.type !== 'annotation' &&
  2137. n.type !== 'marker_annotation'
  2138. ) {
  2139. return;
  2140. }
  2141. // Find the leading identifier: skip the `@` punct, unwrap
  2142. // a call_expression if the decorator is invoked with args.
  2143. let target: SyntaxNode | null = null;
  2144. for (let i = 0; i < n.namedChildCount; i++) {
  2145. const child = n.namedChild(i);
  2146. if (!child) continue;
  2147. if (child.type === 'call_expression') {
  2148. const fn = getChildByField(child, 'function') ?? child.namedChild(0);
  2149. if (fn) target = fn;
  2150. if (target) break;
  2151. }
  2152. if (
  2153. child.type === 'identifier' ||
  2154. child.type === 'member_expression' ||
  2155. child.type === 'scoped_identifier' ||
  2156. child.type === 'navigation_expression'
  2157. ) {
  2158. target = child;
  2159. break;
  2160. }
  2161. }
  2162. if (!target) return;
  2163. let name = getNodeText(target, this.source);
  2164. const lastDot = Math.max(name.lastIndexOf('.'), name.lastIndexOf('::'));
  2165. if (lastDot >= 0) name = name.slice(lastDot + 1).replace(/^[:.]/, '');
  2166. if (!name) return;
  2167. this.unresolvedReferences.push({
  2168. fromNodeId: decoratedId,
  2169. referenceName: name,
  2170. referenceKind: 'decorates',
  2171. line: n.startPosition.row + 1,
  2172. column: n.startPosition.column,
  2173. });
  2174. };
  2175. // 1. Decorators that are direct children of the declaration
  2176. // (method/property style, also some grammars for class).
  2177. for (let i = 0; i < declNode.namedChildCount; i++) {
  2178. consider(declNode.namedChild(i));
  2179. }
  2180. // 2. Decorators that are PRECEDING siblings of the declaration
  2181. // inside the parent's children (TypeScript class style).
  2182. // Walk BACKWARDS from the declaration and stop at the first
  2183. // non-decorator sibling — without that stop, decorators
  2184. // belonging to an EARLIER unrelated declaration leak in
  2185. // (e.g. `@A class Foo {} @B class Bar {}` would otherwise
  2186. // attribute @A to Bar).
  2187. //
  2188. // Note on identity: tree-sitter web bindings return fresh JS
  2189. // wrapper objects from `parent`/`namedChild` navigation, so
  2190. // `sibling === declNode` is unreliable — `startIndex` does
  2191. // the matching instead.
  2192. const parent = declNode.parent;
  2193. if (parent) {
  2194. const declStart = declNode.startIndex;
  2195. let declIdx = -1;
  2196. for (let i = 0; i < parent.namedChildCount; i++) {
  2197. const sibling = parent.namedChild(i);
  2198. if (sibling && sibling.startIndex === declStart) {
  2199. declIdx = i;
  2200. break;
  2201. }
  2202. }
  2203. if (declIdx > 0) {
  2204. for (let j = declIdx - 1; j >= 0; j--) {
  2205. const sibling = parent.namedChild(j);
  2206. if (!sibling) continue;
  2207. if (sibling.type !== 'decorator' && sibling.type !== 'annotation' && sibling.type !== 'marker_annotation') {
  2208. break; // non-decorator separator → stop consuming
  2209. }
  2210. consider(sibling);
  2211. }
  2212. }
  2213. }
  2214. }
  2215. /**
  2216. * Visit function body and extract calls (and structural nodes).
  2217. *
  2218. * In addition to call expressions, this also detects class/struct/enum
  2219. * definitions inside function bodies. This handles two cases:
  2220. * 1. Local class/struct/enum definitions (valid in C++, Java, etc.)
  2221. * 2. C++ macro misparsing — macros like NLOHMANN_JSON_NAMESPACE_BEGIN cause
  2222. * tree-sitter to interpret the namespace block as a function_definition,
  2223. * hiding real class/struct/enum nodes inside the "function body".
  2224. */
  2225. private visitFunctionBody(body: SyntaxNode, _functionId: string): void {
  2226. if (!this.extractor) return;
  2227. const visitForCallsAndStructure = (node: SyntaxNode): void => {
  2228. const nodeType = node.type;
  2229. if (this.extractor!.callTypes.includes(nodeType)) {
  2230. this.extractCall(node);
  2231. } else if (INSTANTIATION_KINDS.has(nodeType)) {
  2232. // `new Foo()` inside a function body — emit an `instantiates`
  2233. // reference. Without this branch the body walker only knew
  2234. // about `call_expression`, so constructor invocations
  2235. // produced no graph edges at all.
  2236. this.extractInstantiation(node);
  2237. // Anonymous class with body: `new T() { ... }` (Java/C#). Extract as
  2238. // a class so interface-impl synthesis (Phase 5.5) can bridge T's
  2239. // methods to the overrides — same rationale as in visitNode.
  2240. const anonBody = this.findAnonymousClassBody(node);
  2241. if (anonBody) {
  2242. this.extractAnonymousClass(node, anonBody);
  2243. return;
  2244. }
  2245. } else if (this.extractor!.extractBareCall) {
  2246. const calleeName = this.extractor!.extractBareCall(node, this.source);
  2247. if (calleeName && this.nodeStack.length > 0) {
  2248. const callerId = this.nodeStack[this.nodeStack.length - 1];
  2249. if (callerId) {
  2250. this.unresolvedReferences.push({
  2251. fromNodeId: callerId,
  2252. referenceName: calleeName,
  2253. referenceKind: 'calls',
  2254. line: node.startPosition.row + 1,
  2255. column: node.startPosition.column,
  2256. });
  2257. }
  2258. }
  2259. }
  2260. // Local variable type annotations inside a body — `const items: Foo[] = []`,
  2261. // `const x: SomeType = svc.load()`. We deliberately do NOT create nodes for
  2262. // locals (that would explode the graph — the data-flow frontier we leave
  2263. // uncovered), but the TYPE a local is annotated with is a real dependency of
  2264. // the enclosing function, so attribute a `references` edge to it. Without
  2265. // this, a function that uses a type ONLY in its body (very common — e.g. a
  2266. // resolver building `const nodes: Node[] = []`) produced no edge to that
  2267. // type, so impact / `affected` missed the dependency entirely. We fall
  2268. // through to the default recursion below so the initializer's calls (and any
  2269. // nested declarators) are still walked.
  2270. if (
  2271. nodeType === 'variable_declarator' &&
  2272. this.TYPE_ANNOTATION_LANGUAGES.has(this.language)
  2273. ) {
  2274. const ownerId = this.nodeStack[this.nodeStack.length - 1];
  2275. if (ownerId) this.extractVariableTypeAnnotation(node, ownerId);
  2276. }
  2277. // Nested NAMED functions inside a body — function declarations and named
  2278. // function expressions like `.on('mount', function onmount(){})` — become
  2279. // their own nodes so the graph can link to them (callback handlers, local
  2280. // helpers). Anonymous arrows/expressions fall through to the default
  2281. // recursion below, keeping their inner calls attributed to the enclosing
  2282. // function: this bounds the new nodes to NAMED functions only (no explosion,
  2283. // no lost edges). extractFunction walks the nested body itself, so we return.
  2284. if (this.extractor!.functionTypes.includes(nodeType)) {
  2285. const nestedName = extractName(node, this.source, this.extractor!);
  2286. if (nestedName && nestedName !== '<anonymous>') {
  2287. this.extractFunction(node);
  2288. return;
  2289. }
  2290. }
  2291. // Extract structural nodes found inside function bodies.
  2292. // Each extract method visits its own children, so we return after extracting.
  2293. if (this.extractor!.classTypes.includes(nodeType)) {
  2294. const classification = this.extractor!.classifyClassNode?.(node) ?? 'class';
  2295. if (classification === 'struct') this.extractStruct(node);
  2296. else if (classification === 'enum') this.extractEnum(node);
  2297. else if (classification === 'interface') this.extractInterface(node);
  2298. else if (classification === 'trait') this.extractClass(node, 'trait');
  2299. else this.extractClass(node);
  2300. return;
  2301. }
  2302. if (this.extractor!.structTypes.includes(nodeType)) {
  2303. this.extractStruct(node);
  2304. return;
  2305. }
  2306. if (this.extractor!.enumTypes.includes(nodeType)) {
  2307. this.extractEnum(node);
  2308. return;
  2309. }
  2310. if (this.extractor!.interfaceTypes.includes(nodeType)) {
  2311. this.extractInterface(node);
  2312. return;
  2313. }
  2314. for (let i = 0; i < node.namedChildCount; i++) {
  2315. const child = node.namedChild(i);
  2316. if (child) {
  2317. visitForCallsAndStructure(child);
  2318. }
  2319. }
  2320. };
  2321. visitForCallsAndStructure(body);
  2322. }
  2323. /**
  2324. * Extract inheritance relationships
  2325. */
  2326. private extractInheritance(node: SyntaxNode, classId: string): void {
  2327. // Objective-C @interface MyClass : NSObject <ProtoA, ProtoB>
  2328. if (node.type === 'class_interface') {
  2329. const superclass = getChildByField(node, 'superclass');
  2330. if (superclass) {
  2331. const name = getNodeText(superclass, this.source);
  2332. this.unresolvedReferences.push({
  2333. fromNodeId: classId,
  2334. referenceName: name,
  2335. referenceKind: 'extends',
  2336. line: superclass.startPosition.row + 1,
  2337. column: superclass.startPosition.column,
  2338. });
  2339. }
  2340. for (let j = 0; j < node.namedChildCount; j++) {
  2341. const argList = node.namedChild(j);
  2342. if (argList?.type !== 'parameterized_arguments') continue;
  2343. for (let k = 0; k < argList.namedChildCount; k++) {
  2344. const typeName = argList.namedChild(k);
  2345. if (!typeName) continue;
  2346. const typeId = typeName.namedChildren.find(
  2347. (c: SyntaxNode) => c.type === 'type_identifier' || c.type === 'identifier'
  2348. );
  2349. if (!typeId) continue;
  2350. const protocolName = getNodeText(typeId, this.source);
  2351. this.unresolvedReferences.push({
  2352. fromNodeId: classId,
  2353. referenceName: protocolName,
  2354. referenceKind: 'implements',
  2355. line: typeId.startPosition.row + 1,
  2356. column: typeId.startPosition.column,
  2357. });
  2358. }
  2359. }
  2360. return;
  2361. }
  2362. // Look for extends/implements clauses
  2363. for (let i = 0; i < node.namedChildCount; i++) {
  2364. const child = node.namedChild(i);
  2365. if (!child) continue;
  2366. if (
  2367. child.type === 'extends_clause' ||
  2368. child.type === 'superclass' ||
  2369. child.type === 'base_clause' || // PHP class extends
  2370. child.type === 'extends_interfaces' // Java interface extends
  2371. ) {
  2372. // Extract parent class/interface names
  2373. // Java uses type_list wrapper: superclass -> type_identifier, extends_interfaces -> type_list -> type_identifier
  2374. const typeList = child.namedChildren.find((c: SyntaxNode) => c.type === 'type_list');
  2375. const targets = typeList ? typeList.namedChildren : [child.namedChild(0)];
  2376. for (const target of targets) {
  2377. if (target) {
  2378. const name = getNodeText(target, this.source);
  2379. this.unresolvedReferences.push({
  2380. fromNodeId: classId,
  2381. referenceName: name,
  2382. referenceKind: 'extends',
  2383. line: target.startPosition.row + 1,
  2384. column: target.startPosition.column,
  2385. });
  2386. }
  2387. }
  2388. }
  2389. // C++ base classes: `class Derived : public Base, private Other` →
  2390. // base_class_clause holds access specifiers + base type(s). Emit an extends
  2391. // ref per base type (skip the public/private/protected keywords).
  2392. if (child.type === 'base_class_clause') {
  2393. for (const t of child.namedChildren) {
  2394. if (
  2395. t.type === 'type_identifier' ||
  2396. t.type === 'qualified_identifier' ||
  2397. t.type === 'template_type'
  2398. ) {
  2399. this.unresolvedReferences.push({
  2400. fromNodeId: classId,
  2401. referenceName: getNodeText(t, this.source),
  2402. referenceKind: 'extends',
  2403. line: t.startPosition.row + 1,
  2404. column: t.startPosition.column,
  2405. });
  2406. }
  2407. }
  2408. }
  2409. if (
  2410. child.type === 'implements_clause' ||
  2411. child.type === 'class_interface_clause' ||
  2412. child.type === 'super_interfaces' || // Java class implements
  2413. child.type === 'interfaces' // Dart
  2414. ) {
  2415. // Extract implemented interfaces
  2416. // Java uses type_list wrapper: super_interfaces -> type_list -> type_identifier
  2417. const typeList = child.namedChildren.find((c: SyntaxNode) => c.type === 'type_list');
  2418. const targets = typeList ? typeList.namedChildren : child.namedChildren;
  2419. for (const iface of targets) {
  2420. if (iface) {
  2421. const name = getNodeText(iface, this.source);
  2422. this.unresolvedReferences.push({
  2423. fromNodeId: classId,
  2424. referenceName: name,
  2425. referenceKind: 'implements',
  2426. line: iface.startPosition.row + 1,
  2427. column: iface.startPosition.column,
  2428. });
  2429. }
  2430. }
  2431. }
  2432. // Python superclass list: `class Flask(Scaffold, Mixin):`
  2433. // argument_list contains identifier children for each parent class
  2434. if (child.type === 'argument_list' && node.type === 'class_definition') {
  2435. for (const arg of child.namedChildren) {
  2436. if (arg.type === 'identifier' || arg.type === 'attribute') {
  2437. const name = getNodeText(arg, this.source);
  2438. this.unresolvedReferences.push({
  2439. fromNodeId: classId,
  2440. referenceName: name,
  2441. referenceKind: 'extends',
  2442. line: arg.startPosition.row + 1,
  2443. column: arg.startPosition.column,
  2444. });
  2445. }
  2446. }
  2447. }
  2448. // Go interface embedding: `type Querier interface { LabelQuerier; ... }`
  2449. // constraint_elem wraps the embedded interface type identifier
  2450. if (child.type === 'constraint_elem') {
  2451. const typeId = child.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
  2452. if (typeId) {
  2453. const name = getNodeText(typeId, this.source);
  2454. this.unresolvedReferences.push({
  2455. fromNodeId: classId,
  2456. referenceName: name,
  2457. referenceKind: 'extends',
  2458. line: typeId.startPosition.row + 1,
  2459. column: typeId.startPosition.column,
  2460. });
  2461. }
  2462. }
  2463. // Go struct embedding: field_declaration without field_identifier
  2464. // e.g. `type DB struct { *Head; Queryable }` — no field name means embedded type
  2465. if (child.type === 'field_declaration') {
  2466. const hasFieldIdentifier = child.namedChildren.some((c: SyntaxNode) => c.type === 'field_identifier');
  2467. if (!hasFieldIdentifier) {
  2468. const typeId = child.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
  2469. if (typeId) {
  2470. const name = getNodeText(typeId, this.source);
  2471. this.unresolvedReferences.push({
  2472. fromNodeId: classId,
  2473. referenceName: name,
  2474. referenceKind: 'extends',
  2475. line: typeId.startPosition.row + 1,
  2476. column: typeId.startPosition.column,
  2477. });
  2478. }
  2479. }
  2480. }
  2481. // Rust trait supertraits: `trait SubTrait: SuperTrait + Display { ... }`
  2482. // trait_bounds contains type_identifier, generic_type, or higher_ranked_trait_bound children
  2483. if (child.type === 'trait_bounds') {
  2484. for (const bound of child.namedChildren) {
  2485. let typeName: string | undefined;
  2486. let posNode: SyntaxNode | undefined;
  2487. if (bound.type === 'type_identifier') {
  2488. typeName = getNodeText(bound, this.source);
  2489. posNode = bound;
  2490. } else if (bound.type === 'generic_type') {
  2491. // e.g. `Deserialize<'de>`
  2492. const inner = bound.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
  2493. if (inner) { typeName = getNodeText(inner, this.source); posNode = inner; }
  2494. } else if (bound.type === 'higher_ranked_trait_bound') {
  2495. // e.g. `for<'de> Deserialize<'de>`
  2496. const generic = bound.namedChildren.find((c: SyntaxNode) => c.type === 'generic_type');
  2497. const typeId = generic?.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier')
  2498. ?? bound.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
  2499. if (typeId) { typeName = getNodeText(typeId, this.source); posNode = typeId; }
  2500. }
  2501. if (typeName && posNode) {
  2502. this.unresolvedReferences.push({
  2503. fromNodeId: classId,
  2504. referenceName: typeName,
  2505. referenceKind: 'extends',
  2506. line: posNode.startPosition.row + 1,
  2507. column: posNode.startPosition.column,
  2508. });
  2509. }
  2510. }
  2511. }
  2512. // C#: `class Movie : BaseItem, IPlugin` → base_list with identifier children
  2513. // base_list combines both base class and interfaces in a single colon-separated list.
  2514. // We emit all as 'extends' since the syntax doesn't distinguish them.
  2515. if (child.type === 'base_list') {
  2516. for (const baseType of child.namedChildren) {
  2517. if (baseType) {
  2518. // For generic base types like `ClientBase<T>`, extract just the type name
  2519. const name = baseType.type === 'generic_name'
  2520. ? getNodeText(baseType.namedChildren.find((c: SyntaxNode) => c.type === 'identifier') ?? baseType, this.source)
  2521. : getNodeText(baseType, this.source);
  2522. this.unresolvedReferences.push({
  2523. fromNodeId: classId,
  2524. referenceName: name,
  2525. referenceKind: 'extends',
  2526. line: baseType.startPosition.row + 1,
  2527. column: baseType.startPosition.column,
  2528. });
  2529. }
  2530. }
  2531. }
  2532. // Kotlin: `class Foo : Bar, Baz` → delegation_specifier > user_type > type_identifier
  2533. // Also handles `class Foo : Bar()` → delegation_specifier > constructor_invocation > user_type
  2534. if (child.type === 'delegation_specifier') {
  2535. const userType = child.namedChildren.find((c: SyntaxNode) => c.type === 'user_type');
  2536. const constructorInvocation = child.namedChildren.find((c: SyntaxNode) => c.type === 'constructor_invocation');
  2537. const target = userType ?? constructorInvocation;
  2538. if (target) {
  2539. const typeId = target.type === 'user_type'
  2540. ? target.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier') ?? target
  2541. : target.namedChildren.find((c: SyntaxNode) => c.type === 'user_type')?.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier')
  2542. ?? target.namedChildren.find((c: SyntaxNode) => c.type === 'user_type') ?? target;
  2543. const name = getNodeText(typeId, this.source);
  2544. this.unresolvedReferences.push({
  2545. fromNodeId: classId,
  2546. referenceName: name,
  2547. referenceKind: 'extends',
  2548. line: typeId.startPosition.row + 1,
  2549. column: typeId.startPosition.column,
  2550. });
  2551. }
  2552. }
  2553. // Swift: inheritance_specifier > user_type > type_identifier
  2554. // Used for class inheritance, protocol conformance, and protocol inheritance
  2555. if (child.type === 'inheritance_specifier') {
  2556. const userType = child.namedChildren.find((c: SyntaxNode) => c.type === 'user_type');
  2557. const typeId = userType?.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
  2558. if (typeId) {
  2559. const name = getNodeText(typeId, this.source);
  2560. this.unresolvedReferences.push({
  2561. fromNodeId: classId,
  2562. referenceName: name,
  2563. referenceKind: 'extends',
  2564. line: typeId.startPosition.row + 1,
  2565. column: typeId.startPosition.column,
  2566. });
  2567. }
  2568. }
  2569. // JavaScript class_heritage has bare identifier without extends_clause wrapper
  2570. // e.g. `class Foo extends Bar {}` → class_heritage → identifier("Bar")
  2571. if (
  2572. (child.type === 'identifier' || child.type === 'type_identifier') &&
  2573. node.type === 'class_heritage'
  2574. ) {
  2575. const name = getNodeText(child, this.source);
  2576. this.unresolvedReferences.push({
  2577. fromNodeId: classId,
  2578. referenceName: name,
  2579. referenceKind: 'extends',
  2580. line: child.startPosition.row + 1,
  2581. column: child.startPosition.column,
  2582. });
  2583. }
  2584. // Recurse into container nodes (e.g. field_declaration_list in Go structs,
  2585. // class_heritage in TypeScript which wraps extends_clause/implements_clause)
  2586. if (child.type === 'field_declaration_list' || child.type === 'class_heritage') {
  2587. this.extractInheritance(child, classId);
  2588. }
  2589. }
  2590. }
  2591. /**
  2592. * Rust `impl Trait for Type` — creates an implements edge from Type to Trait.
  2593. * For plain `impl Type { ... }` (no trait), no inheritance edge is needed.
  2594. */
  2595. private extractRustImplItem(node: SyntaxNode): void {
  2596. // Check if this is `impl Trait for Type` by looking for a `for` keyword
  2597. const hasFor = node.children.some(
  2598. (c: SyntaxNode) => c.type === 'for' && !c.isNamed
  2599. );
  2600. if (!hasFor) return;
  2601. // In `impl Trait for Type`, the type_identifiers are:
  2602. // first = Trait name, last = implementing Type name
  2603. // Also handle generic types like `impl<T> Trait for MyStruct<T>`
  2604. const typeIdents = node.namedChildren.filter(
  2605. (c: SyntaxNode) => c.type === 'type_identifier' || c.type === 'generic_type' || c.type === 'scoped_type_identifier'
  2606. );
  2607. if (typeIdents.length < 2) return;
  2608. const traitNode = typeIdents[0]!;
  2609. const typeNode = typeIdents[typeIdents.length - 1]!;
  2610. // Get the trait name (handle scoped paths like std::fmt::Display)
  2611. const traitName = traitNode.type === 'scoped_type_identifier'
  2612. ? this.source.substring(traitNode.startIndex, traitNode.endIndex)
  2613. : getNodeText(traitNode, this.source);
  2614. // Get the implementing type name (extract inner type_identifier for generics)
  2615. let typeName: string;
  2616. if (typeNode.type === 'generic_type') {
  2617. const inner = typeNode.namedChildren.find(
  2618. (c: SyntaxNode) => c.type === 'type_identifier'
  2619. );
  2620. typeName = inner ? getNodeText(inner, this.source) : getNodeText(typeNode, this.source);
  2621. } else {
  2622. typeName = getNodeText(typeNode, this.source);
  2623. }
  2624. // Find the struct/type node for the implementing type
  2625. const typeNodeId = this.findNodeByName(typeName);
  2626. if (typeNodeId) {
  2627. this.unresolvedReferences.push({
  2628. fromNodeId: typeNodeId,
  2629. referenceName: traitName,
  2630. referenceKind: 'implements',
  2631. line: traitNode.startPosition.row + 1,
  2632. column: traitNode.startPosition.column,
  2633. });
  2634. }
  2635. }
  2636. /**
  2637. * Find a previously-extracted node by name (used for back-references like impl blocks)
  2638. */
  2639. private findNodeByName(name: string): string | undefined {
  2640. for (const node of this.nodes) {
  2641. if (node.name === name && (node.kind === 'struct' || node.kind === 'enum' || node.kind === 'class')) {
  2642. return node.id;
  2643. }
  2644. }
  2645. return undefined;
  2646. }
  2647. /**
  2648. * Languages that support type annotations (TypeScript, etc.)
  2649. */
  2650. private readonly TYPE_ANNOTATION_LANGUAGES = new Set([
  2651. 'typescript', 'tsx', 'dart', 'kotlin', 'swift', 'rust', 'go', 'java', 'csharp',
  2652. ]);
  2653. /**
  2654. * Built-in/primitive type names that shouldn't create references
  2655. */
  2656. private readonly BUILTIN_TYPES = new Set([
  2657. 'string', 'number', 'boolean', 'void', 'null', 'undefined', 'never', 'any', 'unknown',
  2658. 'object', 'symbol', 'bigint', 'true', 'false',
  2659. // Rust
  2660. 'str', 'bool', 'i8', 'i16', 'i32', 'i64', 'i128', 'isize',
  2661. 'u8', 'u16', 'u32', 'u64', 'u128', 'usize', 'f32', 'f64', 'char',
  2662. // Java/C#
  2663. 'int', 'long', 'short', 'byte', 'float', 'double', 'char',
  2664. // Go
  2665. 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64',
  2666. 'float32', 'float64', 'complex64', 'complex128', 'rune', 'error',
  2667. ]);
  2668. /**
  2669. * Extract type references from type annotations on a function/method/field node.
  2670. * Creates 'references' edges for parameter types, return types, and field types.
  2671. */
  2672. private extractTypeAnnotations(node: SyntaxNode, nodeId: string): void {
  2673. if (!this.extractor) return;
  2674. if (!this.TYPE_ANNOTATION_LANGUAGES.has(this.language)) return;
  2675. // C# tree-sitter doesn't produce `type_identifier` leaves — it uses
  2676. // `identifier`, `predefined_type`, `qualified_name`, `generic_name`,
  2677. // etc. — so the generic walker below emits zero references for it.
  2678. // Dispatch to a C#-aware path that only walks type-position subtrees
  2679. // (the `type` field of a parameter/method/property/field), so
  2680. // parameter NAMES never accidentally surface as type refs (#381).
  2681. if (this.language === 'csharp') {
  2682. this.extractCsharpTypeRefs(node, nodeId);
  2683. return;
  2684. }
  2685. // Extract parameter type annotations
  2686. const params = getChildByField(node, this.extractor.paramsField || 'parameters');
  2687. if (params) {
  2688. this.extractTypeRefsFromSubtree(params, nodeId);
  2689. }
  2690. // Extract return type annotation
  2691. const returnType = getChildByField(node, this.extractor.returnField || 'return_type');
  2692. if (returnType) {
  2693. this.extractTypeRefsFromSubtree(returnType, nodeId);
  2694. }
  2695. // Extract direct type annotation (for class fields like `model: ITextModel`)
  2696. const typeAnnotation = node.namedChildren.find(
  2697. (c: SyntaxNode) => c.type === 'type_annotation'
  2698. );
  2699. if (typeAnnotation) {
  2700. this.extractTypeRefsFromSubtree(typeAnnotation, nodeId);
  2701. }
  2702. }
  2703. /**
  2704. * Extract C# type references from a node that owns a type position —
  2705. * a method/constructor declaration, a property declaration, or a
  2706. * field declaration (which wraps `variable_declaration → type`).
  2707. *
  2708. * Walks ONLY into known type fields, so parameter names like
  2709. * `request` in `Build(UserDto request)` are never mis-emitted as
  2710. * type references. Once inside a type subtree, `walkCsharpTypePosition`
  2711. * recognizes C#'s actual type-leaf node kinds (`identifier`,
  2712. * `qualified_name`, `generic_name`, `array_type`, `nullable_type`,
  2713. * `tuple_type`, …) — none of which are `type_identifier`. Closes #381.
  2714. */
  2715. private extractCsharpTypeRefs(node: SyntaxNode, nodeId: string): void {
  2716. // Return type / property type — the field is named `type`.
  2717. const directType = getChildByField(node, 'type');
  2718. if (directType) this.walkCsharpTypePosition(directType, nodeId);
  2719. // Field declarations wrap declarators in a `variable_declaration`
  2720. // whose `type` field carries the type. The outer `field_declaration`
  2721. // has no `type` field of its own, so the call above is a no-op here
  2722. // and we descend one level.
  2723. const varDecl = node.namedChildren.find((c: SyntaxNode) => c.type === 'variable_declaration');
  2724. if (varDecl) {
  2725. const vdType = getChildByField(varDecl, 'type');
  2726. if (vdType) this.walkCsharpTypePosition(vdType, nodeId);
  2727. }
  2728. // Method / constructor parameters. The field name on
  2729. // `method_declaration` is `parameters`; it points at a
  2730. // `parameter_list` whose `parameter` children each have their own
  2731. // `type` field. Walking ONLY the type field skips parameter NAMES,
  2732. // which would otherwise mis-emit as type references.
  2733. const params = getChildByField(node, 'parameters');
  2734. if (params) {
  2735. for (let i = 0; i < params.namedChildCount; i++) {
  2736. const child = params.namedChild(i);
  2737. if (!child || child.type !== 'parameter') continue;
  2738. const paramType = getChildByField(child, 'type');
  2739. if (paramType) this.walkCsharpTypePosition(paramType, nodeId);
  2740. }
  2741. }
  2742. }
  2743. /**
  2744. * Walk a C# subtree that is KNOWN to be in a type position
  2745. * (return type, parameter type, property type, field type, generic
  2746. * argument). Identifiers here are type names, not parameter names.
  2747. */
  2748. private walkCsharpTypePosition(node: SyntaxNode, fromNodeId: string): void {
  2749. // `predefined_type` is int/string/bool/etc. — never a project ref.
  2750. if (node.type === 'predefined_type') return;
  2751. // Bare type name: `Foo` in `Foo bar`, or the `Foo` inside `List<Foo>`.
  2752. if (node.type === 'identifier') {
  2753. const name = getNodeText(node, this.source);
  2754. if (name && !this.BUILTIN_TYPES.has(name)) {
  2755. this.unresolvedReferences.push({
  2756. fromNodeId,
  2757. referenceName: name,
  2758. referenceKind: 'references',
  2759. line: node.startPosition.row + 1,
  2760. column: node.startPosition.column,
  2761. });
  2762. }
  2763. return;
  2764. }
  2765. // `Namespace.Foo` → the rightmost identifier is the type. Emit the
  2766. // full qualified name as the reference; the resolver can still match
  2767. // on the trailing simple name when needed.
  2768. if (node.type === 'qualified_name') {
  2769. const text = getNodeText(node, this.source);
  2770. const last = text.split('.').pop() ?? text;
  2771. if (last && !this.BUILTIN_TYPES.has(last)) {
  2772. this.unresolvedReferences.push({
  2773. fromNodeId,
  2774. referenceName: last,
  2775. referenceKind: 'references',
  2776. line: node.startPosition.row + 1,
  2777. column: node.startPosition.column,
  2778. });
  2779. }
  2780. return;
  2781. }
  2782. // `(int Code, Foo Payload)` — tuple element has BOTH a `type` and a
  2783. // `name` field; descending into all named children would mis-emit
  2784. // the element name (`Code`, `Payload`) as a type ref. Walk only the
  2785. // type field.
  2786. if (node.type === 'tuple_element') {
  2787. const t = getChildByField(node, 'type');
  2788. if (t) this.walkCsharpTypePosition(t, fromNodeId);
  2789. return;
  2790. }
  2791. // Composite type nodes — recurse into named children. Covers
  2792. // `generic_name` (head identifier + `type_argument_list`),
  2793. // `nullable_type`, `array_type`, `pointer_type`, `tuple_type`,
  2794. // `ref_type`, and any newer wrapping shapes the grammar adds.
  2795. // Identifiers reached here are all type-positional (parameter/field
  2796. // names are gated out before we descend).
  2797. for (let i = 0; i < node.namedChildCount; i++) {
  2798. const child = node.namedChild(i);
  2799. if (child) this.walkCsharpTypePosition(child, fromNodeId);
  2800. }
  2801. }
  2802. /**
  2803. * Extract type references from a variable's type annotation.
  2804. */
  2805. private extractVariableTypeAnnotation(node: SyntaxNode, nodeId: string): void {
  2806. if (!this.TYPE_ANNOTATION_LANGUAGES.has(this.language)) return;
  2807. // Find type_annotation child (covers TS `: Type`, Rust `: Type`, etc.)
  2808. const typeAnnotation = node.namedChildren.find(
  2809. (c: SyntaxNode) => c.type === 'type_annotation'
  2810. );
  2811. if (typeAnnotation) {
  2812. this.extractTypeRefsFromSubtree(typeAnnotation, nodeId);
  2813. }
  2814. }
  2815. /**
  2816. * Recursively walk a subtree and extract all type_identifier references.
  2817. * Handles unions, intersections, generics, arrays, etc.
  2818. */
  2819. private extractTypeRefsFromSubtree(node: SyntaxNode, fromNodeId: string): void {
  2820. if (node.type === 'type_identifier') {
  2821. const typeName = getNodeText(node, this.source);
  2822. if (typeName && !this.BUILTIN_TYPES.has(typeName)) {
  2823. this.unresolvedReferences.push({
  2824. fromNodeId,
  2825. referenceName: typeName,
  2826. referenceKind: 'references',
  2827. line: node.startPosition.row + 1,
  2828. column: node.startPosition.column,
  2829. });
  2830. }
  2831. return; // type_identifier is a leaf
  2832. }
  2833. // Recurse into children (handles union_type, intersection_type, generic_type, etc.)
  2834. for (let i = 0; i < node.namedChildCount; i++) {
  2835. const child = node.namedChild(i);
  2836. if (child) {
  2837. this.extractTypeRefsFromSubtree(child, fromNodeId);
  2838. }
  2839. }
  2840. }
  2841. /**
  2842. * Handle Pascal-specific AST structures.
  2843. * Returns true if the node was fully handled and children should be skipped.
  2844. */
  2845. private visitPascalNode(node: SyntaxNode): boolean {
  2846. const nodeType = node.type;
  2847. // Unit/Program/Library → module node
  2848. if (nodeType === 'unit' || nodeType === 'program' || nodeType === 'library') {
  2849. const moduleNameNode = node.namedChildren.find(
  2850. (c: SyntaxNode) => c.type === 'moduleName'
  2851. );
  2852. const name = moduleNameNode ? getNodeText(moduleNameNode, this.source) : '';
  2853. // Fallback to filename without extension if module name is empty
  2854. const moduleName = name || path.basename(this.filePath).replace(/\.[^.]+$/, '');
  2855. this.createNode('module', moduleName, node);
  2856. // Continue visiting children (interface/implementation sections)
  2857. for (let i = 0; i < node.namedChildCount; i++) {
  2858. const child = node.namedChild(i);
  2859. if (child) this.visitNode(child);
  2860. }
  2861. return true;
  2862. }
  2863. // declType wraps declClass/declIntf/declEnum/type-alias
  2864. // The name lives on declType, the inner node determines the kind
  2865. if (nodeType === 'declType') {
  2866. this.extractPascalDeclType(node);
  2867. return true;
  2868. }
  2869. // declUses → import nodes for each unit name
  2870. if (nodeType === 'declUses') {
  2871. this.extractPascalUses(node);
  2872. return true;
  2873. }
  2874. // declConsts → container; visit children for individual declConst
  2875. if (nodeType === 'declConsts') {
  2876. for (let i = 0; i < node.namedChildCount; i++) {
  2877. const child = node.namedChild(i);
  2878. if (child?.type === 'declConst') {
  2879. this.extractPascalConst(child);
  2880. }
  2881. }
  2882. return true;
  2883. }
  2884. // declConst at top level (outside declConsts)
  2885. if (nodeType === 'declConst') {
  2886. this.extractPascalConst(node);
  2887. return true;
  2888. }
  2889. // declTypes → container for type declarations
  2890. if (nodeType === 'declTypes') {
  2891. for (let i = 0; i < node.namedChildCount; i++) {
  2892. const child = node.namedChild(i);
  2893. if (child) this.visitNode(child);
  2894. }
  2895. return true;
  2896. }
  2897. // declVars → container for variable declarations
  2898. if (nodeType === 'declVars') {
  2899. for (let i = 0; i < node.namedChildCount; i++) {
  2900. const child = node.namedChild(i);
  2901. if (child?.type === 'declVar') {
  2902. const nameNode = getChildByField(child, 'name');
  2903. if (nameNode) {
  2904. const name = getNodeText(nameNode, this.source);
  2905. this.createNode('variable', name, child);
  2906. }
  2907. }
  2908. }
  2909. return true;
  2910. }
  2911. // defProc in implementation section → extract calls but don't create duplicate nodes
  2912. if (nodeType === 'defProc') {
  2913. this.extractPascalDefProc(node);
  2914. return true;
  2915. }
  2916. // declProp → property node
  2917. if (nodeType === 'declProp') {
  2918. const nameNode = getChildByField(node, 'name');
  2919. if (nameNode) {
  2920. const name = getNodeText(nameNode, this.source);
  2921. const visibility = this.extractor!.getVisibility?.(node);
  2922. this.createNode('property', name, node, { visibility });
  2923. }
  2924. return true;
  2925. }
  2926. // declField → field node
  2927. if (nodeType === 'declField') {
  2928. const nameNode = getChildByField(node, 'name');
  2929. if (nameNode) {
  2930. const name = getNodeText(nameNode, this.source);
  2931. const visibility = this.extractor!.getVisibility?.(node);
  2932. this.createNode('field', name, node, { visibility });
  2933. }
  2934. return true;
  2935. }
  2936. // declSection → visit children (propagates visibility via getVisibility)
  2937. if (nodeType === 'declSection') {
  2938. for (let i = 0; i < node.namedChildCount; i++) {
  2939. const child = node.namedChild(i);
  2940. if (child) this.visitNode(child);
  2941. }
  2942. return true;
  2943. }
  2944. // exprCall → extract function call reference
  2945. if (nodeType === 'exprCall') {
  2946. this.extractPascalCall(node);
  2947. return true;
  2948. }
  2949. // interface/implementation sections → visit children
  2950. if (nodeType === 'interface' || nodeType === 'implementation') {
  2951. for (let i = 0; i < node.namedChildCount; i++) {
  2952. const child = node.namedChild(i);
  2953. if (child) this.visitNode(child);
  2954. }
  2955. return true;
  2956. }
  2957. // block (begin..end) → visit for calls
  2958. if (nodeType === 'block') {
  2959. this.visitPascalBlock(node);
  2960. return true;
  2961. }
  2962. return false;
  2963. }
  2964. /**
  2965. * Extract a Pascal declType node (class, interface, enum, or type alias)
  2966. */
  2967. private extractPascalDeclType(node: SyntaxNode): void {
  2968. const nameNode = getChildByField(node, 'name');
  2969. if (!nameNode) return;
  2970. const name = getNodeText(nameNode, this.source);
  2971. // Find the inner type declaration
  2972. const declClass = node.namedChildren.find(
  2973. (c: SyntaxNode) => c.type === 'declClass'
  2974. );
  2975. const declIntf = node.namedChildren.find(
  2976. (c: SyntaxNode) => c.type === 'declIntf'
  2977. );
  2978. const typeChild = node.namedChildren.find(
  2979. (c: SyntaxNode) => c.type === 'type'
  2980. );
  2981. if (declClass) {
  2982. const classNode = this.createNode('class', name, node);
  2983. if (classNode) {
  2984. // Extract inheritance from typeref children of declClass
  2985. this.extractPascalInheritance(declClass, classNode.id);
  2986. // Visit class body
  2987. this.nodeStack.push(classNode.id);
  2988. for (let i = 0; i < declClass.namedChildCount; i++) {
  2989. const child = declClass.namedChild(i);
  2990. if (child) this.visitNode(child);
  2991. }
  2992. this.nodeStack.pop();
  2993. }
  2994. } else if (declIntf) {
  2995. const ifaceNode = this.createNode('interface', name, node);
  2996. if (ifaceNode) {
  2997. // Visit interface members
  2998. this.nodeStack.push(ifaceNode.id);
  2999. for (let i = 0; i < declIntf.namedChildCount; i++) {
  3000. const child = declIntf.namedChild(i);
  3001. if (child) this.visitNode(child);
  3002. }
  3003. this.nodeStack.pop();
  3004. }
  3005. } else if (typeChild) {
  3006. // Check if it contains a declEnum
  3007. const declEnum = typeChild.namedChildren.find(
  3008. (c: SyntaxNode) => c.type === 'declEnum'
  3009. );
  3010. if (declEnum) {
  3011. const enumNode = this.createNode('enum', name, node);
  3012. if (enumNode) {
  3013. // Extract enum members
  3014. this.nodeStack.push(enumNode.id);
  3015. for (let i = 0; i < declEnum.namedChildCount; i++) {
  3016. const child = declEnum.namedChild(i);
  3017. if (child?.type === 'declEnumValue') {
  3018. const memberName = getChildByField(child, 'name');
  3019. if (memberName) {
  3020. this.createNode('enum_member', getNodeText(memberName, this.source), child);
  3021. }
  3022. }
  3023. }
  3024. this.nodeStack.pop();
  3025. }
  3026. } else {
  3027. // Simple type alias: type TFoo = string / type TFoo = Integer
  3028. this.createNode('type_alias', name, node);
  3029. }
  3030. } else {
  3031. // Fallback: could be a forward declaration or simple alias
  3032. this.createNode('type_alias', name, node);
  3033. }
  3034. }
  3035. /**
  3036. * Extract Pascal uses clause into individual import nodes
  3037. */
  3038. private extractPascalUses(node: SyntaxNode): void {
  3039. const importText = getNodeText(node, this.source).trim();
  3040. for (let i = 0; i < node.namedChildCount; i++) {
  3041. const child = node.namedChild(i);
  3042. if (child?.type === 'moduleName') {
  3043. const unitName = getNodeText(child, this.source);
  3044. this.createNode('import', unitName, child, {
  3045. signature: importText,
  3046. });
  3047. // Create unresolved reference for resolution
  3048. if (this.nodeStack.length > 0) {
  3049. const parentId = this.nodeStack[this.nodeStack.length - 1];
  3050. if (parentId) {
  3051. this.unresolvedReferences.push({
  3052. fromNodeId: parentId,
  3053. referenceName: unitName,
  3054. referenceKind: 'imports',
  3055. line: child.startPosition.row + 1,
  3056. column: child.startPosition.column,
  3057. });
  3058. }
  3059. }
  3060. }
  3061. }
  3062. }
  3063. /**
  3064. * Extract a Pascal constant declaration
  3065. */
  3066. private extractPascalConst(node: SyntaxNode): void {
  3067. const nameNode = getChildByField(node, 'name');
  3068. if (!nameNode) return;
  3069. const name = getNodeText(nameNode, this.source);
  3070. const defaultValue = node.namedChildren.find(
  3071. (c: SyntaxNode) => c.type === 'defaultValue'
  3072. );
  3073. const sig = defaultValue ? getNodeText(defaultValue, this.source) : undefined;
  3074. this.createNode('constant', name, node, { signature: sig });
  3075. }
  3076. /**
  3077. * Extract Pascal inheritance (extends/implements) from declClass typeref children
  3078. */
  3079. private extractPascalInheritance(declClass: SyntaxNode, classId: string): void {
  3080. const typerefs = declClass.namedChildren.filter(
  3081. (c: SyntaxNode) => c.type === 'typeref'
  3082. );
  3083. for (let i = 0; i < typerefs.length; i++) {
  3084. const ref = typerefs[i]!;
  3085. const name = getNodeText(ref, this.source);
  3086. this.unresolvedReferences.push({
  3087. fromNodeId: classId,
  3088. referenceName: name,
  3089. referenceKind: i === 0 ? 'extends' : 'implements',
  3090. line: ref.startPosition.row + 1,
  3091. column: ref.startPosition.column,
  3092. });
  3093. }
  3094. }
  3095. /**
  3096. * Extract calls and resolve method context from a Pascal defProc (implementation body).
  3097. * Does not create a new node — the declaration was already captured from the interface section.
  3098. */
  3099. private extractPascalDefProc(node: SyntaxNode): void {
  3100. // Find the matching declaration node by name to use as call parent
  3101. const declProc = node.namedChildren.find(
  3102. (c: SyntaxNode) => c.type === 'declProc'
  3103. );
  3104. if (!declProc) return;
  3105. const nameNode = getChildByField(declProc, 'name');
  3106. if (!nameNode) return;
  3107. const fullName = getNodeText(nameNode, this.source).trim();
  3108. // fullName is like "TAuthService.Create"
  3109. const shortName = fullName.includes('.') ? fullName.split('.').pop()! : fullName;
  3110. const fullNameKey = fullName.toLowerCase();
  3111. const shortNameKey = shortName.toLowerCase();
  3112. // Build method index on first use (O(n) once, then O(1) per lookup)
  3113. if (!this.methodIndex) {
  3114. this.methodIndex = new Map();
  3115. for (const n of this.nodes) {
  3116. if (n.kind === 'method' || n.kind === 'function') {
  3117. const nameKey = n.name.toLowerCase();
  3118. // Keep first seen short-name mapping to avoid silently overwriting earlier entries.
  3119. if (!this.methodIndex.has(nameKey)) {
  3120. this.methodIndex.set(nameKey, n.id);
  3121. }
  3122. // For Pascal methods, also index qualified forms (e.g. TAuthService.Create).
  3123. if (n.kind === 'method') {
  3124. const qualifiedParts = n.qualifiedName.split('::');
  3125. if (qualifiedParts.length >= 2) {
  3126. // Create suffix keys so both "Module.Class.Method" and "Class.Method" can resolve.
  3127. for (let i = 0; i < qualifiedParts.length - 1; i++) {
  3128. const scopedName = qualifiedParts.slice(i).join('.').toLowerCase();
  3129. this.methodIndex.set(scopedName, n.id);
  3130. }
  3131. }
  3132. }
  3133. }
  3134. }
  3135. }
  3136. const parentId =
  3137. this.methodIndex.get(fullNameKey) ||
  3138. this.methodIndex.get(shortNameKey) ||
  3139. this.nodeStack[this.nodeStack.length - 1];
  3140. if (!parentId) return;
  3141. // Visit the block for calls
  3142. const block = node.namedChildren.find(
  3143. (c: SyntaxNode) => c.type === 'block'
  3144. );
  3145. if (block) {
  3146. this.nodeStack.push(parentId);
  3147. this.visitPascalBlock(block);
  3148. this.nodeStack.pop();
  3149. }
  3150. }
  3151. /**
  3152. * Extract function calls from a Pascal expression
  3153. */
  3154. private extractPascalCall(node: SyntaxNode): void {
  3155. if (this.nodeStack.length === 0) return;
  3156. const callerId = this.nodeStack[this.nodeStack.length - 1];
  3157. if (!callerId) return;
  3158. // Get the callee name — first child is typically the identifier or exprDot
  3159. const firstChild = node.namedChild(0);
  3160. if (!firstChild) return;
  3161. let calleeName = '';
  3162. if (firstChild.type === 'exprDot') {
  3163. // Qualified call: Obj.Method(...)
  3164. const identifiers = firstChild.namedChildren.filter(
  3165. (c: SyntaxNode) => c.type === 'identifier'
  3166. );
  3167. if (identifiers.length > 0) {
  3168. calleeName = identifiers.map((id: SyntaxNode) => getNodeText(id, this.source)).join('.');
  3169. }
  3170. } else if (firstChild.type === 'identifier') {
  3171. calleeName = getNodeText(firstChild, this.source);
  3172. }
  3173. if (calleeName) {
  3174. this.unresolvedReferences.push({
  3175. fromNodeId: callerId,
  3176. referenceName: calleeName,
  3177. referenceKind: 'calls',
  3178. line: node.startPosition.row + 1,
  3179. column: node.startPosition.column,
  3180. });
  3181. }
  3182. // Also visit arguments for nested calls
  3183. const args = node.namedChildren.find(
  3184. (c: SyntaxNode) => c.type === 'exprArgs'
  3185. );
  3186. if (args) {
  3187. this.visitPascalBlock(args);
  3188. }
  3189. }
  3190. /**
  3191. * Recursively visit a Pascal block/statement tree for call expressions
  3192. */
  3193. private visitPascalBlock(node: SyntaxNode): void {
  3194. for (let i = 0; i < node.namedChildCount; i++) {
  3195. const child = node.namedChild(i);
  3196. if (!child) continue;
  3197. if (child.type === 'exprCall') {
  3198. this.extractPascalCall(child);
  3199. } else if (child.type === 'exprDot') {
  3200. // Check if exprDot contains an exprCall
  3201. for (let j = 0; j < child.namedChildCount; j++) {
  3202. const grandchild = child.namedChild(j);
  3203. if (grandchild?.type === 'exprCall') {
  3204. this.extractPascalCall(grandchild);
  3205. }
  3206. }
  3207. } else {
  3208. this.visitPascalBlock(child);
  3209. }
  3210. }
  3211. }
  3212. }
  3213. /**
  3214. * Extract nodes and edges from source code.
  3215. *
  3216. * If `frameworkNames` is provided, framework-specific extractors matching
  3217. * those names and the file's language are run after the tree-sitter pass.
  3218. * Their nodes/references/errors are merged into the returned result.
  3219. */
  3220. export function extractFromSource(
  3221. filePath: string,
  3222. source: string,
  3223. language?: Language,
  3224. frameworkNames?: string[]
  3225. ): ExtractionResult {
  3226. const detectedLanguage = language || detectLanguage(filePath, source);
  3227. const fileExtension = path.extname(filePath).toLowerCase();
  3228. let result: ExtractionResult;
  3229. // Use custom extractor for Svelte
  3230. if (detectedLanguage === 'svelte') {
  3231. const extractor = new SvelteExtractor(filePath, source);
  3232. result = extractor.extract();
  3233. } else if (detectedLanguage === 'vue') {
  3234. // Use custom extractor for Vue
  3235. const extractor = new VueExtractor(filePath, source);
  3236. result = extractor.extract();
  3237. } else if (detectedLanguage === 'liquid') {
  3238. // Use custom extractor for Liquid
  3239. const extractor = new LiquidExtractor(filePath, source);
  3240. result = extractor.extract();
  3241. } else if (detectedLanguage === 'xml') {
  3242. // Custom extractor for MyBatis mapper XML. Non-mapper XML returns just a
  3243. // file node so the watcher tracks it without emitting symbols.
  3244. const extractor = new MyBatisExtractor(filePath, source);
  3245. result = extractor.extract();
  3246. } else if (isFileLevelOnlyLanguage(detectedLanguage)) {
  3247. // No symbol extraction at this stage — files are tracked at the file-record
  3248. // level only. Framework extractors (Drupal routing yml, Spring `@Value`
  3249. // resolution against application.yml/application.properties) run later and
  3250. // add per-file nodes/references when they apply.
  3251. result = { nodes: [], edges: [], unresolvedReferences: [], errors: [], durationMs: 0 };
  3252. } else if (
  3253. detectedLanguage === 'pascal' &&
  3254. (fileExtension === '.dfm' || fileExtension === '.fmx')
  3255. ) {
  3256. // Use custom extractor for DFM/FMX form files
  3257. const extractor = new DfmExtractor(filePath, source);
  3258. result = extractor.extract();
  3259. } else {
  3260. const extractor = new TreeSitterExtractor(filePath, source, detectedLanguage);
  3261. result = extractor.extract();
  3262. }
  3263. // Framework-specific extraction (routes, middleware, etc.)
  3264. if (frameworkNames && frameworkNames.length > 0) {
  3265. const allResolvers = getAllFrameworkResolvers();
  3266. const applicable = getApplicableFrameworks(
  3267. allResolvers.filter((r) => frameworkNames.includes(r.name)),
  3268. detectedLanguage
  3269. );
  3270. for (const fw of applicable) {
  3271. if (!fw.extract) continue;
  3272. try {
  3273. const fwResult = fw.extract(filePath, source);
  3274. result.nodes.push(...fwResult.nodes);
  3275. result.unresolvedReferences.push(...fwResult.references);
  3276. } catch (err) {
  3277. result.errors.push({
  3278. message: `Framework extractor '${fw.name}' failed: ${
  3279. err instanceof Error ? err.message : String(err)
  3280. }`,
  3281. filePath,
  3282. severity: 'warning',
  3283. });
  3284. }
  3285. }
  3286. }
  3287. return result;
  3288. }