name-matcher.ts 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340
  1. /**
  2. * Name Matcher
  3. *
  4. * Handles symbol name matching for reference resolution.
  5. */
  6. import { Node } from '../types';
  7. import { UnresolvedRef, ResolvedRef, ResolutionContext } from './types';
  8. /**
  9. * Try to resolve a path-like reference (e.g., "snippets/drawer-menu.liquid")
  10. * by matching the filename against file nodes.
  11. */
  12. export function matchByFilePath(
  13. ref: UnresolvedRef,
  14. context: ResolutionContext
  15. ): ResolvedRef | null {
  16. // Path-like (`a/b.liquid`) OR a bare filename ending in a short extension
  17. // (`Foo.h` — an Objective-C `#import "Foo.h"`, resolved to the header by
  18. // basename). A bare ref WITHOUT an extension is a symbol name, not a file, so
  19. // leave it to the symbol-matching strategies.
  20. if (!ref.referenceName.includes('/') && !/\.[A-Za-z][A-Za-z0-9]{0,3}$/.test(ref.referenceName)) {
  21. return null;
  22. }
  23. // Extract the filename from the path
  24. const fileName = ref.referenceName.split('/').pop();
  25. if (!fileName) return null;
  26. // Search for file nodes with this name
  27. const candidates = context.getNodesByName(fileName);
  28. const fileNodes = candidates.filter(n => n.kind === 'file');
  29. if (fileNodes.length === 0) return null;
  30. // Prefer exact path match on qualified_name
  31. const exactMatch = fileNodes.find(n => n.qualifiedName === ref.referenceName || n.filePath === ref.referenceName);
  32. if (exactMatch) {
  33. return {
  34. original: ref,
  35. targetNodeId: exactMatch.id,
  36. confidence: 0.95,
  37. resolvedBy: 'file-path',
  38. };
  39. }
  40. // Fall back to suffix match (e.g., ref="snippets/foo.liquid" matches
  41. // "src/snippets/foo.liquid"). When several files share the basename — a
  42. // `#include "RNCAsyncStorage.h"` with a same-named header on another platform
  43. // (windows/code/ vs apple/) — prefer the one in the includer's own directory,
  44. // then by directory proximity / same language family. A C/C++ include (and any
  45. // bare-filename import) resolves relative to the including file, not to an
  46. // arbitrary same-named header elsewhere in the tree.
  47. const suffixMatches = fileNodes.filter(
  48. n => n.qualifiedName.endsWith(ref.referenceName) || n.filePath.endsWith(ref.referenceName)
  49. );
  50. if (suffixMatches.length > 0) {
  51. return {
  52. original: ref,
  53. targetNodeId: pickClosestFileNode(suffixMatches, ref).id,
  54. confidence: 0.85,
  55. resolvedBy: 'file-path',
  56. };
  57. }
  58. // If only one file node with this name, use it with lower confidence
  59. if (fileNodes.length === 1) {
  60. return {
  61. original: ref,
  62. targetNodeId: fileNodes[0]!.id,
  63. confidence: 0.7,
  64. resolvedBy: 'file-path',
  65. };
  66. }
  67. return null;
  68. }
  69. /**
  70. * Among several file nodes that all match a bare include/import by basename,
  71. * pick the one closest to the referencing file: same directory first, then by
  72. * directory-tree proximity, with the same language family as a tiebreak. A
  73. * C/C++ `#include "X.h"` (and any bare-filename import) resolves relative to the
  74. * including file — not to an arbitrary same-named header on another platform.
  75. */
  76. function pickClosestFileNode(candidates: Node[], ref: UnresolvedRef): Node {
  77. const dirOf = (p: string): string => {
  78. const i = p.lastIndexOf('/');
  79. return i >= 0 ? p.slice(0, i) : '';
  80. };
  81. const refDir = dirOf(ref.filePath);
  82. const sameDir = candidates.filter((c) => dirOf(c.filePath) === refDir);
  83. const pool = sameDir.length > 0 ? sameDir : candidates;
  84. let best = pool[0]!;
  85. let bestScore = -Infinity;
  86. for (const c of pool) {
  87. const score =
  88. computePathProximity(ref.filePath, c.filePath) +
  89. (sameLanguageFamily(c.language, ref.language) ? 5 : 0);
  90. if (score > bestScore) {
  91. bestScore = score;
  92. best = c;
  93. }
  94. }
  95. return best;
  96. }
  97. /**
  98. * Language families that share a type system / runtime, so a same-language-only
  99. * reference may still resolve across them (a Kotlin `Foo.BAR` can name a Java
  100. * `Foo`). Anything not listed forms its own singleton family.
  101. */
  102. const LANGUAGE_FAMILY: Record<string, string> = {
  103. java: 'jvm', kotlin: 'jvm', scala: 'jvm',
  104. swift: 'apple', objc: 'apple',
  105. typescript: 'web', tsx: 'web', javascript: 'web', jsx: 'web',
  106. c: 'c', cpp: 'c',
  107. // Razor/Blazor markup names C# types — same family so `@model Foo` /
  108. // `<MyComponent/>` resolve to their `.cs` class through the cross-family gate.
  109. csharp: 'dotnet', razor: 'dotnet',
  110. };
  111. export function sameLanguageFamily(a: string, b: string): boolean {
  112. if (a === b) return true;
  113. const fa = LANGUAGE_FAMILY[a];
  114. return fa !== undefined && fa === LANGUAGE_FAMILY[b];
  115. }
  116. /**
  117. * True when `lang` belongs to a known multi-language family (jvm/apple/web/c).
  118. * Languages not listed (php, python, go, ruby, rust, dart, …) and config
  119. * formats (yaml/xml/blade) form their own singleton families and return
  120. * `false` — used to leave config↔code framework bridges (whose config side is
  121. * never a known programming-language family) out of the cross-family gate.
  122. */
  123. export function isKnownLanguageFamily(lang: string): boolean {
  124. return LANGUAGE_FAMILY[lang] !== undefined;
  125. }
  126. /**
  127. * True when `a` and `b` are two DIFFERENT *known* language families — the
  128. * signature of a coincidental cross-language name collision (a TS `import
  129. * React` matching a Swift `import React`, a C++ `#include "X.h"` matching a
  130. * same-named ObjC header on another platform). The both-*known* test is
  131. * deliberately weaker than {@link sameLanguageFamily}'s negation: a
  132. * single-file-component language that carries its own tag (`vue`/`svelte`)
  133. * importing a `.ts` module, or any singleton-family language (php/go/ruby/…),
  134. * returns `false` here and is left alone.
  135. */
  136. export function crossesKnownFamily(a: string, b: string): boolean {
  137. return isKnownLanguageFamily(a) && isKnownLanguageFamily(b) && !sameLanguageFamily(a, b);
  138. }
  139. /**
  140. * Drop cross-language candidates from a name lookup. Two regimes:
  141. * - `references` (type-usage): a type named in language X resolves to a
  142. * SAME-family type, never a coincidentally same-named symbol in another
  143. * language (the Android `BatteryManager` system class vs a JS one). Strict
  144. * same-family filter — cross-language communication is `calls`, not refs.
  145. * - `imports` (import binding): an `import`/`#include` never crosses two
  146. * KNOWN families (TS `import React` ↮ Swift `import React`). Weaker
  147. * both-known filter so `.vue`/`.svelte` (own tag) importing `.ts` survives.
  148. */
  149. function applyLanguageGate(candidates: Node[], ref: UnresolvedRef): Node[] {
  150. if (ref.referenceKind === 'references' || ref.referenceKind === 'function_ref') {
  151. return candidates.filter((c) => sameLanguageFamily(c.language, ref.language));
  152. }
  153. if (ref.referenceKind === 'imports') {
  154. return candidates.filter((c) => !crossesKnownFamily(c.language, ref.language));
  155. }
  156. return candidates;
  157. }
  158. /**
  159. * Resolve a function-as-value reference (#756) — a function name used as a
  160. * callback/function-pointer value (`register(handler)`, `o->cb = handler`,
  161. * `{ .cb = handler }`, `signal(SIGINT, handler)`). The ONLY strategy allowed
  162. * for `function_ref` refs: exact name, function/method targets only, same
  163. * language family, same-file first, and cross-file only when the match is
  164. * UNIQUE. No fuzzy fallback, no qualified-name walking — a wrong callback
  165. * edge is worse than none.
  166. */
  167. export function matchFunctionRef(
  168. ref: UnresolvedRef,
  169. context: ResolutionContext
  170. ): ResolvedRef | null {
  171. // `this.<member>` refs are resolved ONLY by the class-scoped resolver in
  172. // resolveOne (resolveThisMemberFnRef) — never by name matching here.
  173. if (ref.referenceName.startsWith('this.')) return null;
  174. // In JS/TS/Python a bare identifier can never be a method value (methods
  175. // are only reachable through a receiver — `this.m` / `self.m` /
  176. // `Cls.m`), so bare fn-refs match FUNCTIONS only. This also sidesteps the
  177. // pre-existing TS quirk of class fields extracting as method-kind nodes,
  178. // which otherwise soaked up local names passed as arguments (excalidraw
  179. // A/B finding; same pattern in vendored docopt.py). Python's `self.m`
  180. // form keeps method targets via its own capture shape. C++ likewise: a
  181. // bare identifier can only be a FREE function (member values need
  182. // `&Cls::method`). PHP string callables name global FUNCTIONS (methods
  183. // need the `[$obj, 'm']` array form, which carries its own shape). Other
  184. // languages keep method targets: C# method groups, Swift/Dart
  185. // implicit-self, Java/Kotlin method references.
  186. const bareFnOnly =
  187. ref.language === 'typescript' || ref.language === 'tsx' ||
  188. ref.language === 'javascript' || ref.language === 'jsx' ||
  189. ref.language === 'cpp' || ref.language === 'python' ||
  190. ref.language === 'php';
  191. // Qualified member-pointer (`&Widget::on_click` → "Widget::on_click"):
  192. // resolve the member ON THAT SCOPE — exempt from bareFnOnly (the `&Cls::m`
  193. // shape is an explicit member reference). Unique-or-drop like everything else.
  194. if (ref.referenceName.includes('::')) {
  195. const memberName = ref.referenceName.slice(ref.referenceName.lastIndexOf('::') + 2);
  196. const scoped = context
  197. .getNodesByName(memberName)
  198. .filter(
  199. (n) =>
  200. (n.kind === 'function' || n.kind === 'method') &&
  201. sameLanguageFamily(n.language, ref.language) &&
  202. n.id !== ref.fromNodeId &&
  203. (n.qualifiedName === ref.referenceName ||
  204. n.qualifiedName.endsWith(`::${ref.referenceName}`))
  205. );
  206. if (scoped.length === 0) return null;
  207. const sameFileScoped = scoped.filter((n) => n.filePath === ref.filePath);
  208. const pool = sameFileScoped.length > 0 ? sameFileScoped : scoped;
  209. if (sameFileScoped.length === 0 && scoped.length > 1) return null;
  210. const target = pool.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
  211. return {
  212. original: ref,
  213. targetNodeId: target.id,
  214. confidence: 0.9,
  215. resolvedBy: 'function-ref',
  216. };
  217. }
  218. let candidates = context
  219. .getNodesByName(ref.referenceName)
  220. .filter(
  221. (n) =>
  222. (n.kind === 'function' || (!bareFnOnly && n.kind === 'method')) &&
  223. sameLanguageFamily(n.language, ref.language) &&
  224. n.id !== ref.fromNodeId // a function registering itself is not a dependency edge
  225. );
  226. if (candidates.length === 0) return null;
  227. // Swift implicit-self: a bare identifier can name a METHOD only of the
  228. // ENCLOSING type (`Button(action: handleTap)` written inside that type) —
  229. // a same-named method on any OTHER class is a parameter collision
  230. // (Alamofire: a `request` parameter resolving to EventMonitor::request).
  231. // Scope method candidates to the from-symbol's type; top-level code has no
  232. // implicit self, so method targets are excluded there entirely. Free
  233. // functions are unaffected.
  234. if (ref.language === 'swift' && candidates.some((n) => n.kind === 'method')) {
  235. const fromNode = context.getNodeById?.(ref.fromNodeId);
  236. const sep = fromNode ? fromNode.qualifiedName.lastIndexOf('::') : -1;
  237. const classPrefix = fromNode && sep > 0 ? fromNode.qualifiedName.slice(0, sep) : null;
  238. candidates = candidates.filter((n) => {
  239. if (n.kind !== 'method') return true;
  240. if (!classPrefix) return false;
  241. const mSep = n.qualifiedName.lastIndexOf('::');
  242. if (mSep <= 0) return false;
  243. const methodPrefix = n.qualifiedName.slice(0, mSep);
  244. // Accept exact-scope matches plus suffix relationships either way, so
  245. // extension-declared members (`Holder::m`) still match a nested
  246. // from-scope (`Module::Holder::wire`) and vice versa.
  247. return (
  248. methodPrefix === classPrefix ||
  249. methodPrefix.endsWith(`::${classPrefix}`) ||
  250. classPrefix.endsWith(`::${methodPrefix}`)
  251. );
  252. });
  253. if (candidates.length === 0) return null;
  254. }
  255. // Same-file definition wins — the extraction gate guarantees most survivors
  256. // have one, and it's the dominant C pattern (static callback registered in
  257. // a same-file ops struct).
  258. const sameFile = candidates.filter((n) => n.filePath === ref.filePath);
  259. if (sameFile.length > 0) {
  260. // Swift: several same-named METHODS in one file is an API overload family
  261. // (`Session.request(...)` × N), and a bare identifier hitting it is almost
  262. // always a same-named parameter, not a method value (Alamofire A/B
  263. // finding) — refuse rather than guess. A single method (SwiftUI's
  264. // `action: handleTap`) still resolves.
  265. if (
  266. ref.language === 'swift' &&
  267. sameFile.length > 1 &&
  268. sameFile.every((n) => n.kind === 'method')
  269. ) {
  270. return null;
  271. }
  272. // Same-name overloads in one file are the same conceptual symbol; pick
  273. // the first by position for determinism.
  274. const target = sameFile.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
  275. return {
  276. original: ref,
  277. targetNodeId: target.id,
  278. confidence: sameFile.length === 1 ? 0.95 : 0.9,
  279. resolvedBy: 'function-ref',
  280. };
  281. }
  282. // Cross-file (imported names the import resolver didn't already claim):
  283. // only an unambiguous match resolves.
  284. if (candidates.length === 1) {
  285. return {
  286. original: ref,
  287. targetNodeId: candidates[0]!.id,
  288. confidence: 0.8,
  289. resolvedBy: 'function-ref',
  290. };
  291. }
  292. return null;
  293. }
  294. /**
  295. * Try to resolve a reference by exact name match
  296. */
  297. export function matchByExactName(
  298. ref: UnresolvedRef,
  299. context: ResolutionContext
  300. ): ResolvedRef | null {
  301. const candidates = applyLanguageGate(context.getNodesByName(ref.referenceName), ref);
  302. if (candidates.length === 0) {
  303. return null;
  304. }
  305. // If only one match, use it — but penalize cross-language matches
  306. if (candidates.length === 1) {
  307. const isCrossLanguage = candidates[0]!.language !== ref.language;
  308. return {
  309. original: ref,
  310. targetNodeId: candidates[0]!.id,
  311. confidence: isCrossLanguage ? 0.5 : 0.9,
  312. resolvedBy: 'exact-match',
  313. };
  314. }
  315. // Multiple matches - try to narrow down
  316. const bestMatch = findBestMatch(ref, candidates, context);
  317. if (bestMatch) {
  318. // Lower confidence when the match is from a distant/unrelated module
  319. const proximity = computePathProximity(ref.filePath, bestMatch.filePath);
  320. const confidence = proximity >= 30 ? 0.7 : 0.4;
  321. return {
  322. original: ref,
  323. targetNodeId: bestMatch.id,
  324. confidence,
  325. resolvedBy: 'exact-match',
  326. };
  327. }
  328. return null;
  329. }
  330. /**
  331. * Try to resolve by qualified name
  332. */
  333. export function matchByQualifiedName(
  334. ref: UnresolvedRef,
  335. context: ResolutionContext
  336. ): ResolvedRef | null {
  337. // Check if the reference name looks qualified (contains :: or .)
  338. if (!ref.referenceName.includes('::') && !ref.referenceName.includes('.')) {
  339. return null;
  340. }
  341. const candidates = context.getNodesByQualifiedName(ref.referenceName);
  342. if (candidates.length === 1) {
  343. return {
  344. original: ref,
  345. targetNodeId: candidates[0]!.id,
  346. confidence: 0.95,
  347. resolvedBy: 'qualified-name',
  348. };
  349. }
  350. // Try partial qualified name match
  351. const parts = ref.referenceName.split(/[:.]/);
  352. const lastName = parts[parts.length - 1];
  353. if (lastName) {
  354. const partialCandidates = context.getNodesByName(lastName);
  355. for (const candidate of partialCandidates) {
  356. if (candidate.qualifiedName.endsWith(ref.referenceName)) {
  357. return {
  358. original: ref,
  359. targetNodeId: candidate.id,
  360. confidence: 0.85,
  361. resolvedBy: 'qualified-name',
  362. };
  363. }
  364. }
  365. }
  366. return null;
  367. }
  368. function resolveMethodOnType(
  369. typeName: string,
  370. methodName: string,
  371. ref: UnresolvedRef,
  372. context: ResolutionContext,
  373. confidence: number,
  374. resolvedBy: ResolvedRef['resolvedBy'],
  375. /**
  376. * Optional FQN that identifies WHICH class declaration `typeName`
  377. * refers to in the caller's file. When multiple candidates share
  378. * the same qualifiedName (`FooConverter::convert` in both
  379. * `dao/converter/` and `service/converter/`), the FQN's
  380. * file-path-suffix picks the right one — the disambiguation
  381. * signal Java imports carry but the call site doesn't (#314).
  382. */
  383. preferredFqn?: string,
  384. /** Recursion guard for the supertype/conformance walk. */
  385. depth = 0,
  386. ): ResolvedRef | null {
  387. // Look up methods by name and match by qualifiedName ending in
  388. // `<typeName>::<methodName>`. This works whether the method is defined
  389. // in-class (`class Foo { int bar() { ... } }`) or out-of-line in a separate
  390. // file (`int Foo::bar() { ... }` in foo.cpp while class Foo is in foo.hpp).
  391. // The previous same-file approach missed the latter — the typical C++ layout.
  392. const methodCandidates = context.getNodesByName(methodName);
  393. const want = `${typeName}::${methodName}`;
  394. const matches: Node[] = [];
  395. for (const m of methodCandidates) {
  396. if (m.kind !== 'method') continue;
  397. if (m.language !== ref.language) continue;
  398. const qn = m.qualifiedName;
  399. if (qn === want || qn.endsWith(`::${want}`)) {
  400. matches.push(m);
  401. }
  402. }
  403. if (matches.length === 0) {
  404. // Conformance fallback: the method may be defined on a supertype `typeName`
  405. // extends, or on a protocol / trait it conforms to (e.g. a Swift protocol-
  406. // extension method, a C# default-interface or extension method, a Kotlin
  407. // extension on a supertype). Walk supertypes transitively (depth-capped) via
  408. // the resolved implements/extends edges — empty in the first resolution pass,
  409. // populated in the conformance pass. Still VALIDATED (the method must exist on
  410. // a supertype), so a wrong inference produces no edge.
  411. if (depth < 4 && context.getSupertypes) {
  412. for (const supertype of context.getSupertypes(typeName, ref.language)) {
  413. const via = resolveMethodOnType(
  414. supertype, methodName, ref, context, confidence, resolvedBy, preferredFqn, depth + 1,
  415. );
  416. if (via) return via;
  417. }
  418. }
  419. return null;
  420. }
  421. if (matches.length > 1 && preferredFqn) {
  422. const ext = ref.language === 'kotlin' ? '.kt' : '.java';
  423. const fqnPath = preferredFqn.replace(/\./g, '/') + ext;
  424. const chosen = matches.find((m) => {
  425. const fp = m.filePath.replace(/\\/g, '/');
  426. return fp.endsWith(fqnPath) || fp.endsWith('/' + fqnPath);
  427. });
  428. if (chosen) {
  429. return {
  430. original: ref,
  431. targetNodeId: chosen.id,
  432. confidence,
  433. resolvedBy,
  434. };
  435. }
  436. }
  437. return {
  438. original: ref,
  439. targetNodeId: matches[0]!.id,
  440. confidence,
  441. resolvedBy,
  442. };
  443. }
  444. // C++ keywords/control-flow tokens that can appear right before a receiver
  445. // (e.g. `return ptr->m()`) and must NOT be treated as a type.
  446. const CPP_NON_TYPE_TOKENS = new Set([
  447. 'return', 'if', 'else', 'for', 'while', 'do', 'switch', 'case', 'default',
  448. 'break', 'continue', 'goto', 'throw', 'new', 'delete', 'co_await', 'co_yield',
  449. 'co_return', 'static_cast', 'const_cast', 'dynamic_cast', 'reinterpret_cast',
  450. 'sizeof', 'alignof', 'typeid', 'and', 'or', 'not', 'xor',
  451. ]);
  452. function normalizeCppTypeName(typeName: string): string | null {
  453. const normalized = typeName
  454. .replace(/\b(const|volatile|mutable|typename|class|struct)\b/g, ' ')
  455. .replace(/[&*]+/g, ' ')
  456. .replace(/<[^>]*>/g, ' ')
  457. .replace(/\s+/g, ' ')
  458. .trim();
  459. if (!normalized) return null;
  460. const parts = normalized.split(/::/).filter(Boolean);
  461. const last = parts[parts.length - 1];
  462. if (!last) return null;
  463. if (CPP_NON_TYPE_TOKENS.has(last)) return null;
  464. return last;
  465. }
  466. // Declarator regex: matches `Type receiver`, `Type* receiver`, `Type *receiver`,
  467. // `Type*receiver`, `Type<X> receiver`, etc., REQUIRING a declarator terminator
  468. // (`;`, `=`, `,`, `)`, `[`, `{`, `(`, or end-of-line) after the receiver. The
  469. // terminator rules out uses like `return receiver->m()` where the preceding
  470. // token is a keyword, not a type.
  471. function buildDeclaratorRegex(escapedReceiver: string): RegExp {
  472. return new RegExp(
  473. `([A-Za-z_][\\w:]*(?:\\s*<[^;=(){}]+>)?(?:\\s*[*&]+)?)\\s*\\b${escapedReceiver}\\b\\s*(?=[;=,)\\[{(]|$)`,
  474. );
  475. }
  476. function inferCppReceiverType(
  477. receiverName: string,
  478. ref: UnresolvedRef,
  479. context: ResolutionContext,
  480. depth = 0,
  481. ): string | null {
  482. const source = context.readFile(ref.filePath);
  483. if (!source) return null;
  484. const lines = source.split(/\r?\n/);
  485. const callLineIndex = Math.max(0, Math.min(lines.length - 1, ref.line - 1));
  486. const escapedReceiver = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
  487. const receiverPattern = new RegExp(`\\b${escapedReceiver}\\b`);
  488. const declaratorRegex = buildDeclaratorRegex(escapedReceiver);
  489. for (let i = callLineIndex; i >= 0; i--) {
  490. const line = lines[i];
  491. if (!line || !receiverPattern.test(line)) continue;
  492. const declaratorMatch = line.match(declaratorRegex);
  493. if (declaratorMatch) {
  494. const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
  495. if (normalized === 'auto') {
  496. // `auto x = Foo::instance();` — the declared type is deduced; recover it
  497. // from the initializer (call return type / construction) (#645).
  498. const initType = inferCppAutoInitializerType(line, receiverName, ref, context, depth);
  499. if (initType) return initType;
  500. // No usable initializer on this line — keep scanning earlier ones.
  501. } else if (normalized) {
  502. return normalized;
  503. }
  504. }
  505. }
  506. const headerCandidates = [
  507. ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.h'),
  508. ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hpp'),
  509. ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hxx'),
  510. ].filter((candidate, index, arr) => arr.indexOf(candidate) === index && candidate !== ref.filePath);
  511. for (const headerPath of headerCandidates) {
  512. if (!context.fileExists(headerPath)) continue;
  513. const headerSource = context.readFile(headerPath);
  514. if (!headerSource) continue;
  515. for (const line of headerSource.split(/\r?\n/)) {
  516. if (!receiverPattern.test(line)) continue;
  517. const declaratorMatch = line.match(declaratorRegex);
  518. if (!declaratorMatch) continue;
  519. const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
  520. if (normalized && normalized !== 'auto') return normalized;
  521. }
  522. }
  523. return null;
  524. }
  525. /**
  526. * Last `::`-separated segment of a (possibly namespace-qualified) C++ name.
  527. */
  528. function cppLastSegment(name: string): string {
  529. const parts = name.split('::').filter(Boolean);
  530. return parts[parts.length - 1] ?? name;
  531. }
  532. /**
  533. * Return type captured at extraction for `Class::method` (or a free function),
  534. * read off the indexed node's `returnType` — used by the C++ (#645) and PHP
  535. * (#608) chained-call resolvers. Language-filtered. Null when not indexed or no
  536. * return type was recorded (a `void`/primitive return).
  537. */
  538. function lookupCalleeReturnType(
  539. callee: string,
  540. ref: UnresolvedRef,
  541. context: ResolutionContext,
  542. ): string | null {
  543. let method = callee;
  544. let cls: string | null = null;
  545. if (callee.includes('::')) {
  546. const parts = callee.split('::').filter(Boolean);
  547. method = parts[parts.length - 1] ?? callee;
  548. cls = parts.slice(0, -1).join('::');
  549. }
  550. const candidates = context.getNodesByName(method).filter(
  551. (n) =>
  552. (n.kind === 'method' || n.kind === 'function') &&
  553. n.language === ref.language &&
  554. !!n.returnType,
  555. );
  556. if (cls) {
  557. const want = `${cls}::${method}`;
  558. // The call site may name the class with MORE namespace qualification than
  559. // the stored node (`details::registry::instance` at the call vs
  560. // `registry::instance` on the node — the receiver type only carries the
  561. // immediate class), or LESS. Accept an exact match or either being a
  562. // namespace-suffix of the other; the shared `::<class>::<method>` tail keeps
  563. // it specific.
  564. const m = candidates.find(
  565. (n) =>
  566. n.qualifiedName === want ||
  567. n.qualifiedName.endsWith(`::${want}`) ||
  568. want.endsWith(`::${n.qualifiedName}`),
  569. );
  570. return m?.returnType ?? null;
  571. }
  572. return candidates.find((n) => n.kind === 'function')?.returnType ?? null;
  573. }
  574. /** Does the graph contain a class/struct named `name`'s last segment? */
  575. function cppClassExists(name: string, ref: UnresolvedRef, context: ResolutionContext): boolean {
  576. const last = cppLastSegment(name);
  577. return context
  578. .getNodesByName(last)
  579. .some((n) => (n.kind === 'class' || n.kind === 'struct') && n.language === ref.language);
  580. }
  581. /**
  582. * Infer the class produced by a C++ call/construction expression, using return
  583. * types captured at extraction (#645). Handles, in order:
  584. * - `make_unique<T>()` / `make_shared<T>()` → T
  585. * - single-level member call `recv.method()` → recv's type, then method's return
  586. * - `Class::method()` / free `func()` → the callee's recorded return type
  587. * - direct construction `Type()` / `ns::Type()` → Type
  588. * Returns null when undeterminable. Callers MUST still validate the outer method
  589. * exists on the result before creating an edge, so a wrong guess stays silent.
  590. */
  591. function resolveCppCallResultType(
  592. inner: string,
  593. ref: UnresolvedRef,
  594. context: ResolutionContext,
  595. depth = 0,
  596. ): string | null {
  597. if (depth > 3) return null; // guard against pathological mutual recursion
  598. const expr = inner.trim();
  599. const make = expr.match(/(?:^|::)(?:make_unique|make_shared)\s*<\s*([A-Za-z_]\w*)/);
  600. if (make) return make[1] ?? null;
  601. // Single-level member call `recv.method` (the `manager.view().render()` shape).
  602. const dotIdx = expr.lastIndexOf('.');
  603. if (dotIdx > 0) {
  604. const recv = expr.slice(0, dotIdx);
  605. const method = expr.slice(dotIdx + 1);
  606. if (recv.includes('.') || recv.includes('(') || recv.includes('::')) return null; // single level only
  607. const recvType = inferCppReceiverType(recv, ref, context, depth + 1);
  608. if (!recvType) return null;
  609. return lookupCalleeReturnType(`${recvType}::${method}`, ref, context);
  610. }
  611. const ret = lookupCalleeReturnType(expr, ref, context);
  612. if (ret) return ret;
  613. // Direct construction — the callee itself names a class/struct.
  614. if (cppClassExists(expr, ref, context)) return cppLastSegment(expr);
  615. return null;
  616. }
  617. /**
  618. * Recover the type of an `auto`-declared local from its initializer on the
  619. * declaration line — `auto x = Foo::instance();`, `auto w = make_unique<W>();`,
  620. * `auto p = new W();`, `auto w = Widget();` (#645).
  621. */
  622. function inferCppAutoInitializerType(
  623. line: string,
  624. receiverName: string,
  625. ref: UnresolvedRef,
  626. context: ResolutionContext,
  627. depth: number,
  628. ): string | null {
  629. const escaped = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
  630. const m = line.match(new RegExp(`\\b${escaped}\\b\\s*=\\s*([^;]+)`));
  631. if (!m || !m[1]) return null;
  632. const init = m[1].trim();
  633. const neu = init.match(/^new\s+([A-Za-z_][\w:]*)/);
  634. if (neu && neu[1]) return cppLastSegment(neu[1]);
  635. // A call or construction: `Foo(...)`, `A::b(...)`, `make_unique<T>(...)`.
  636. const call = init.match(/^([A-Za-z_][\w:]*(?:\s*<[^>;]*>)?)\s*\(/);
  637. if (call && call[1]) return resolveCppCallResultType(call[1].replace(/\s+/g, ''), ref, context, depth + 1);
  638. return null;
  639. }
  640. /**
  641. * Resolve a C++ chained call whose receiver is itself a call — encoded by the
  642. * extractor as `<innerCallee>().<method>` (#645). The receiver's type is what
  643. * the inner call returns; the outer method is then resolved and VALIDATED on it
  644. * (resolveMethodOnType requires `cls::method` to exist), so a wrong inference
  645. * produces no edge rather than a wrong one.
  646. */
  647. export function matchCppCallChain(
  648. ref: UnresolvedRef,
  649. context: ResolutionContext,
  650. ): ResolvedRef | null {
  651. const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
  652. if (!m || !m[1] || !m[2]) return null;
  653. const cls = resolveCppCallResultType(m[1], ref, context);
  654. if (!cls) return null;
  655. return resolveMethodOnType(cls, m[2], ref, context, 0.85, 'instance-method');
  656. }
  657. /**
  658. * Resolve a `::`-scoped factory chain whose receiver is a scoped/static call —
  659. * PHP `Cls::for($x)->method()` (#608, the per-credential Laravel client idiom) or
  660. * Rust `Foo::new().bar()` (an associated-function call) — both encoded by the
  661. * extractor as `Cls::factory().method`. The receiver's type is what `Cls::factory`
  662. * returns: a `self` marker (PHP `: self`/`: static`, Rust `-> Self`) resolves to
  663. * the factory's own type, a concrete return type to that type. The outer method is
  664. * then resolved and VALIDATED on it (resolveMethodOnType requires the method to
  665. * exist on the type or a supertype it conforms to), so a wrong inference yields no
  666. * edge rather than a wrong one. Shared by the `::`-receiver languages (PHP, Rust).
  667. */
  668. export function matchScopedCallChain(
  669. ref: UnresolvedRef,
  670. context: ResolutionContext,
  671. ): ResolvedRef | null {
  672. const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
  673. if (!m || !m[1] || !m[2]) return null;
  674. const inner = m[1];
  675. const method = m[2];
  676. if (!inner.includes('::')) return null; // only static-factory (`Cls::method`) chains
  677. const factoryClass = inner.slice(0, inner.lastIndexOf('::'));
  678. const ret = lookupCalleeReturnType(inner, ref, context);
  679. if (!ret) return null;
  680. // `self` (the extractor's marker for self/static/$this) → the factory's class.
  681. const resolvedClass = ret === 'self' ? factoryClass : ret;
  682. return resolveMethodOnType(resolvedClass, method, ref, context, 0.85, 'instance-method');
  683. }
  684. /**
  685. * Languages where an unprefixed capitalized call `Foo(args)` constructs the
  686. * class (so a `Foo(args).method()` receiver's type is `Foo`). Java/C# need `new`,
  687. * so a bare `Foo()` there is a method call, not construction — excluded. Scala's
  688. * `Foo(args)` is a case-class / companion `apply`, which conventionally returns
  689. * `Foo` — and resolveMethodOnType validates, so a non-conventional `apply` that
  690. * returns another type simply yields no edge rather than a wrong one. Pascal/Delphi:
  691. * a `TFoo(x)` is a TYPECAST whose result is a `TFoo`, so `TFoo(x).method()` resolves
  692. * the method on `TFoo` — same shape, same validation.
  693. */
  694. const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift', 'scala', 'dart', 'pascal']);
  695. /**
  696. * Resolve a dotted chained call whose receiver is a static factory / fluent call —
  697. * `Foo.getInstance().bar()`, encoded by the extractor as `Foo.getInstance().bar`
  698. * (#645/#608 mechanism). The receiver's type is what `Foo.getInstance` returns
  699. * (its declared return type); the outer method is then resolved and VALIDATED on
  700. * it (resolveMethodOnType requires `Type::method` to exist), so a wrong inference
  701. * yields no edge rather than a wrong one (e.g. a same-named `bar()` on an
  702. * unrelated class is never matched). Shared by the dot-notation languages
  703. * (Java, Kotlin, C#, Swift) — same receiver shape, same `Class::method` qualified names.
  704. */
  705. export function matchDottedCallChain(
  706. ref: UnresolvedRef,
  707. context: ResolutionContext,
  708. ): ResolvedRef | null {
  709. const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
  710. if (!m || !m[1] || !m[2]) return null;
  711. const inner = m[1]; // `Foo.getInstance`
  712. const method = m[2]; // `bar`
  713. const lastDot = inner.lastIndexOf('.');
  714. if (lastDot <= 0) {
  715. // Go: bare package-level factory FUNCTION `New().method()` — the receiver's
  716. // type is what `New` returns; resolve the method on that.
  717. if (ref.language === 'go') {
  718. const ret = lookupCalleeReturnType(inner, ref, context);
  719. if (ret) {
  720. return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
  721. }
  722. // `inner` isn't a function with a captured return type — typically a
  723. // package-level VARIABLE holding a function value (e.g. gin's `engine()`),
  724. // whose type we can't recover. Fall back to bare-name resolution of the
  725. // method so we don't DROP an edge the un-re-encoded bare path would have
  726. // found. (When `inner` IS a real factory function but the method doesn't
  727. // exist on its return type, `ret` is truthy and we returned no edge above —
  728. // the absent-method safety guarantee is preserved.)
  729. //
  730. // CRITICAL: resolve the TARGET via a synthetic bare-name ref, but return the
  731. // match tied to the ORIGINAL `ref` (referenceName `inner().method`). The
  732. // batched resolver (resolveAndPersistBatched) reads unresolved rows from
  733. // offset 0 every pass and relies on deleteSpecificResolvedReferences —
  734. // keyed on referenceName — to clear each resolved row so the batch empties.
  735. // If we propagated the synthetic ref's bare `method` as `.original`, the
  736. // delete would never match the stored `inner().method` row, the batch would
  737. // never drain, and the loop would re-resolve + re-insert forever (a runaway
  738. // that grew gin's graph to 5M edges / 1.4 GB before this fix).
  739. const bareRef = { ...ref, referenceName: method };
  740. const bareMatch = matchByExactName(bareRef, context) ?? matchFuzzy(bareRef, context);
  741. return bareMatch ? { ...bareMatch, original: ref } : null;
  742. }
  743. // Constructor receiver `Foo(args).method()` (encoded `Foo().method`): a bare,
  744. // capitalized inner is a class construction, so the receiver's type is the
  745. // class itself — resolve the method on it. Only in languages where an
  746. // unprefixed capitalized call constructs the class (Kotlin, Swift); in Java/C#
  747. // a bare `Foo()` is a method call (constructors need `new`), so we must not
  748. // assume construction. A lowercase bare inner is a top-level `factory().method()`
  749. // whose type we can't recover — bail.
  750. if (!CONSTRUCTS_VIA_BARE_CALL.has(ref.language) || !/^[A-Z]/.test(inner)) return null;
  751. return resolveMethodOnType(inner, method, ref, context, 0.85, 'instance-method', importedFqnOf(inner, ref, context));
  752. }
  753. // Factory/fluent receiver `Receiver.factory(args).method()`: the receiver's
  754. // type is what `Receiver.factory` returns (its declared return type).
  755. const factoryClass = inner.slice(0, lastDot).split('.').pop(); // simple class name
  756. const factoryMethod = inner.slice(lastDot + 1);
  757. if (!factoryClass || !factoryMethod) return null;
  758. const ret = lookupCalleeReturnType(`${factoryClass}::${factoryMethod}`, ref, context);
  759. if (!ret) {
  760. // Objective-C: a class-message factory — `[X alloc]`, `[X new]`,
  761. // `[X sharedFoo]` — returns an instance of the RECEIVER class `X` by
  762. // convention (`instancetype`). So when the factory's own return type isn't
  763. // recoverable (its selector returns `instancetype`, or `alloc`/`new` aren't
  764. // user-defined nodes at all), the receiver's type is the class `X` itself.
  765. // This resolves the ubiquitous `[[X alloc] init]` and singleton chains.
  766. // resolveMethodOnType validates against X (and its supertypes), so a class
  767. // whose method actually lives elsewhere yields NO edge, not a wrong one — and
  768. // crucially this does NOT fire when a concrete return type WAS captured but
  769. // simply lacks the method (that already returned null above: absent-method
  770. // safety, so a same-named decoy is still never matched).
  771. if (ref.language === 'objc' && /^[A-Z]/.test(factoryClass)) {
  772. return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context));
  773. }
  774. // Pascal/Delphi: the extractor only re-encodes a `TFoo`/`IFoo`-prefixed chain
  775. // (the type-naming convention), so `factoryClass` is always a real class here.
  776. // A factory whose return type wasn't captured is a CONSTRUCTOR
  777. // (`TFileMem.Create().SetCachePerformance` — `constructor Create` has no `:
  778. // TBar` annotation but returns its own class) or an unannotated function. In
  779. // both cases the receiver's type is the class itself, so resolve the method on
  780. // `factoryClass`. resolveMethodOnType validates against it (and its
  781. // supertypes), so a wrong inference yields no edge — and this never fires when
  782. // a return type WAS captured but lacks the method (absent-method safety above).
  783. if (ref.language === 'pascal' && /^[TI]/.test(factoryClass)) {
  784. return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context));
  785. }
  786. return null;
  787. }
  788. return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
  789. }
  790. /**
  791. * When several classes share a simple type name, the caller file's import of
  792. * that type is the only signal that names WHICH one (#314). Returns the imported
  793. * FQN for `typeName` in the ref's file, or undefined.
  794. */
  795. function importedFqnOf(
  796. typeName: string,
  797. ref: UnresolvedRef,
  798. context: ResolutionContext,
  799. ): string | undefined {
  800. const imports = context.getImportMappings(ref.filePath, ref.language);
  801. return imports.find((i) => i.localName === typeName)?.source;
  802. }
  803. /**
  804. * Java/Kotlin: infer a receiver's declared type by walking field declarations
  805. * in the class enclosing the call site. The field's `signature` is already in
  806. * the form "<TypeName> <fieldName>" (set by tree-sitter.ts extractField), so we
  807. * pull the type from there. Handles Spring `@Resource UserBO userbo;` /
  808. * `@Autowired private UserService userService;` where the receiver field name
  809. * doesn't match the class name by Java naming convention.
  810. *
  811. * Returns the bare type name (generics stripped, dotted package stripped) or
  812. * null when no matching field is in the enclosing class.
  813. */
  814. function inferJavaFieldReceiverType(
  815. receiverName: string,
  816. ref: UnresolvedRef,
  817. context: ResolutionContext,
  818. ): string | null {
  819. const inFile = context.getNodesInFile(ref.filePath);
  820. if (inFile.length === 0) return null;
  821. // Find the class enclosing the call line (tightest match by latest start).
  822. let enclosing: Node | null = null;
  823. for (const n of inFile) {
  824. if (n.kind !== 'class' && n.kind !== 'interface') continue;
  825. if (n.language !== ref.language) continue;
  826. const end = n.endLine ?? n.startLine;
  827. if (n.startLine <= ref.line && end >= ref.line) {
  828. if (!enclosing || n.startLine >= enclosing.startLine) enclosing = n;
  829. }
  830. }
  831. if (!enclosing) return null;
  832. const enclosingEnd = enclosing.endLine ?? enclosing.startLine;
  833. const field = inFile.find(
  834. (n) =>
  835. n.kind === 'field' &&
  836. n.name === receiverName &&
  837. n.language === ref.language &&
  838. n.startLine >= enclosing.startLine &&
  839. (n.endLine ?? n.startLine) <= enclosingEnd,
  840. );
  841. if (!field || !field.signature) return null;
  842. // Signature shape: "<TypeName> <fieldName>" (extractField). Pull the type,
  843. // strip generics + dotted package, drop array/varargs markers.
  844. const beforeName = field.signature.slice(
  845. 0,
  846. field.signature.lastIndexOf(field.name),
  847. );
  848. const typeRaw = beforeName.trim();
  849. if (!typeRaw) return null;
  850. const typeNoGenerics = typeRaw.replace(/<[^>]*>/g, '').trim();
  851. const typeNoArray = typeNoGenerics.replace(/\[\s*\]/g, '').replace(/\.\.\.$/, '').trim();
  852. const parts = typeNoArray.split(/[.\s]+/).filter(Boolean);
  853. const lastPart = parts[parts.length - 1];
  854. if (!lastPart) return null;
  855. if (!/^[A-Z]/.test(lastPart)) return null; // primitives / lowercase → skip
  856. return lastPart;
  857. }
  858. /**
  859. * Try to resolve by method name on a class/object
  860. */
  861. export function matchMethodCall(
  862. ref: UnresolvedRef,
  863. context: ResolutionContext
  864. ): ResolvedRef | null {
  865. // Parse method call patterns like "obj.method" or "Class::method". The method
  866. // part allows trailing `:` keywords so Objective-C selectors resolve
  867. // (`SDImageCache.storeImage:`, `obj.setX:y:`); colons never appear in other
  868. // languages' method refs, so this is a no-op for them.
  869. // The receiver allows dots (`builder.Services.AddCoreServices`) so a CHAINED
  870. // call resolves by its last segment — Strategy 3 below name-matches the method
  871. // (with its existing single-candidate / receiver-overlap guards). Without this
  872. // a multi-dot extension-method call (C# DI `builder.Services.AddCoreServices()`,
  873. // `Guard.Against.X()`) matched no pattern and never resolved.
  874. const dotMatch = ref.referenceName.match(/^([\w.]+)\.(\w+:?(?:\w+:)*)$/);
  875. const colonMatch = ref.referenceName.match(/^(\w+)::(\w+)$/);
  876. const match = dotMatch || colonMatch;
  877. if (!match) {
  878. return null;
  879. }
  880. const [, objectOrClass, methodName] = match;
  881. if (ref.language === 'cpp' && dotMatch) {
  882. const inferredType = inferCppReceiverType(objectOrClass!, ref, context);
  883. if (inferredType) {
  884. const typedMatch = resolveMethodOnType(
  885. inferredType,
  886. methodName!,
  887. ref,
  888. context,
  889. 0.9,
  890. 'instance-method',
  891. );
  892. if (typedMatch) {
  893. return typedMatch;
  894. }
  895. }
  896. }
  897. // Java/Kotlin: receiver may be a field whose name doesn't match the type by
  898. // Java naming convention (`userbo` → class `UserBO`, abbreviated). Look up
  899. // the field in the enclosing class to get its declared type, then resolve
  900. // the method on that type. Covers Spring `@Resource`/`@Autowired` field
  901. // injection where the field type is the concrete bean class.
  902. if ((ref.language === 'java' || ref.language === 'kotlin') && dotMatch) {
  903. const inferredType = inferJavaFieldReceiverType(objectOrClass!, ref, context);
  904. if (inferredType) {
  905. // When two classes share the same simple name, the caller file's
  906. // import is the only signal that names WHICH one — pass the
  907. // imported FQN so resolveMethodOnType can disambiguate (#314).
  908. const imports = context.getImportMappings(ref.filePath, ref.language);
  909. const importedFqn = imports.find((i) => i.localName === inferredType)?.source;
  910. const typedMatch = resolveMethodOnType(
  911. inferredType,
  912. methodName!,
  913. ref,
  914. context,
  915. 0.9,
  916. 'instance-method',
  917. importedFqn,
  918. );
  919. if (typedMatch) {
  920. return typedMatch;
  921. }
  922. }
  923. }
  924. // Strategy 1: Direct class name match (existing logic)
  925. const classCandidates = context.getNodesByName(objectOrClass!);
  926. for (const classNode of classCandidates) {
  927. if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
  928. // Skip cross-language class matches
  929. if (classNode.language !== ref.language) continue;
  930. const nodesInFile = context.getNodesInFile(classNode.filePath);
  931. const methodNode = nodesInFile.find(
  932. (n) =>
  933. n.kind === 'method' &&
  934. n.name === methodName &&
  935. n.qualifiedName.includes(classNode.name)
  936. );
  937. if (methodNode) {
  938. return {
  939. original: ref,
  940. targetNodeId: methodNode.id,
  941. confidence: 0.85,
  942. resolvedBy: 'qualified-name',
  943. };
  944. }
  945. }
  946. }
  947. // Strategy 2: Instance variable receiver - try capitalized form to find class
  948. // e.g., "permissionEngine" → look for classes containing "PermissionEngine"
  949. const capitalizedReceiver = objectOrClass!.charAt(0).toUpperCase() + objectOrClass!.slice(1);
  950. if (capitalizedReceiver !== objectOrClass) {
  951. const fuzzyClassCandidates = context.getNodesByName(capitalizedReceiver);
  952. for (const classNode of fuzzyClassCandidates) {
  953. if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
  954. // Skip cross-language class matches
  955. if (classNode.language !== ref.language) continue;
  956. const nodesInFile = context.getNodesInFile(classNode.filePath);
  957. const methodNode = nodesInFile.find(
  958. (n) =>
  959. n.kind === 'method' &&
  960. n.name === methodName &&
  961. n.qualifiedName.includes(classNode.name)
  962. );
  963. if (methodNode) {
  964. return {
  965. original: ref,
  966. targetNodeId: methodNode.id,
  967. confidence: 0.8,
  968. resolvedBy: 'instance-method',
  969. };
  970. }
  971. }
  972. }
  973. }
  974. // Strategy 3: Find methods by name across the codebase, match by receiver
  975. // name similarity with the containing class. Handles abbreviated variable
  976. // names like permissionEngine → PermissionRuleEngine.
  977. if (methodName) {
  978. const methodCandidates = context.getNodesByName(methodName!);
  979. const methods = methodCandidates.filter(
  980. (n) => n.kind === 'method' && n.name === methodName
  981. );
  982. // Filter to same-language candidates first
  983. const sameLanguageMethods = methods.filter(m => m.language === ref.language);
  984. const targetMethods = sameLanguageMethods.length > 0 ? sameLanguageMethods : methods;
  985. // If only one same-language method with this name exists, use it
  986. if (targetMethods.length === 1 && targetMethods[0]!.language === ref.language) {
  987. return {
  988. original: ref,
  989. targetNodeId: targetMethods[0]!.id,
  990. confidence: 0.7,
  991. resolvedBy: 'instance-method',
  992. };
  993. }
  994. // Multiple methods: score by receiver name word overlap with class name
  995. if (targetMethods.length > 1) {
  996. const receiverWords = splitCamelCase(objectOrClass!);
  997. let bestMatch: typeof targetMethods[0] | undefined;
  998. let bestScore = 0;
  999. for (const method of targetMethods) {
  1000. const classWords = splitCamelCase(method.qualifiedName);
  1001. let score = receiverWords.filter(w =>
  1002. classWords.some(cw => cw.toLowerCase() === w.toLowerCase())
  1003. ).length;
  1004. // Bonus for same language
  1005. if (method.language === ref.language) score += 1;
  1006. if (score > bestScore) {
  1007. bestScore = score;
  1008. bestMatch = method;
  1009. }
  1010. }
  1011. if (bestMatch && bestScore >= 2) {
  1012. return {
  1013. original: ref,
  1014. targetNodeId: bestMatch.id,
  1015. confidence: 0.65,
  1016. resolvedBy: 'instance-method',
  1017. };
  1018. }
  1019. }
  1020. }
  1021. return null;
  1022. }
  1023. /**
  1024. * Split a camelCase or PascalCase string into words.
  1025. */
  1026. function splitCamelCase(str: string): string[] {
  1027. return str.replace(/([a-z])([A-Z])/g, '$1 $2')
  1028. .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
  1029. .split(/[\s._:\/\\]+/)
  1030. .filter(w => w.length > 1);
  1031. }
  1032. /**
  1033. * Compute directory proximity between two file paths.
  1034. * Returns a score based on the number of shared directory segments.
  1035. * Higher score = closer in directory tree.
  1036. */
  1037. function computePathProximity(filePath1: string, filePath2: string): number {
  1038. const dir1 = filePath1.split('/').slice(0, -1);
  1039. const dir2 = filePath2.split('/').slice(0, -1);
  1040. let shared = 0;
  1041. for (let i = 0; i < Math.min(dir1.length, dir2.length); i++) {
  1042. if (dir1[i] === dir2[i]) {
  1043. shared++;
  1044. } else {
  1045. break;
  1046. }
  1047. }
  1048. // Each shared directory segment contributes 15 points, capped at 80
  1049. return Math.min(shared * 15, 80);
  1050. }
  1051. /**
  1052. * Find the best matching node when there are multiple candidates
  1053. */
  1054. function findBestMatch(
  1055. ref: UnresolvedRef,
  1056. candidates: Node[],
  1057. _context: ResolutionContext
  1058. ): Node | null {
  1059. // Prioritization rules:
  1060. // 1. Same file > different file
  1061. // 2. Directory proximity (same module/package > different module)
  1062. // 3. Same language > different language
  1063. // 4. Functions/methods > classes/types (for call references)
  1064. // 5. Exported > non-exported
  1065. let bestScore = -1;
  1066. let bestNode: Node | null = null;
  1067. for (const candidate of candidates) {
  1068. let score = 0;
  1069. // Same file bonus
  1070. if (candidate.filePath === ref.filePath) {
  1071. score += 100;
  1072. }
  1073. // Directory proximity bonus — strongly prefer same module/package
  1074. score += computePathProximity(ref.filePath, candidate.filePath);
  1075. // Language matching: strongly prefer same language, penalize cross-language
  1076. if (candidate.language === ref.language) {
  1077. score += 50;
  1078. } else {
  1079. score -= 80;
  1080. }
  1081. // For call references, prefer functions/methods
  1082. if (ref.referenceKind === 'calls') {
  1083. if (candidate.kind === 'function' || candidate.kind === 'method') {
  1084. score += 25;
  1085. }
  1086. }
  1087. // For instantiation references (`new Foo()`), prefer class-like
  1088. // targets — without this, a function named `Foo` in another module
  1089. // could outscore the actual class.
  1090. if (ref.referenceKind === 'instantiates') {
  1091. if (
  1092. candidate.kind === 'class' ||
  1093. candidate.kind === 'struct' ||
  1094. candidate.kind === 'interface'
  1095. ) {
  1096. score += 25;
  1097. }
  1098. }
  1099. // For decorator references (`@Foo`), prefer functions. Class
  1100. // decorators (Python `@SomeClass`, Java annotation interfaces)
  1101. // also resolve here, hence the smaller class bonus.
  1102. if (ref.referenceKind === 'decorates') {
  1103. if (candidate.kind === 'function' || candidate.kind === 'method') {
  1104. score += 25;
  1105. } else if (candidate.kind === 'class' || candidate.kind === 'interface') {
  1106. score += 15;
  1107. }
  1108. }
  1109. // Exported bonus
  1110. if (candidate.isExported) {
  1111. score += 10;
  1112. }
  1113. // Closer line number (within same file)
  1114. if (candidate.filePath === ref.filePath && candidate.startLine) {
  1115. const distance = Math.abs(candidate.startLine - ref.line);
  1116. score += Math.max(0, 20 - distance / 10);
  1117. }
  1118. if (score > bestScore) {
  1119. bestScore = score;
  1120. bestNode = candidate;
  1121. }
  1122. }
  1123. return bestNode;
  1124. }
  1125. /**
  1126. * Fuzzy match - last resort with lower confidence
  1127. */
  1128. export function matchFuzzy(
  1129. ref: UnresolvedRef,
  1130. context: ResolutionContext
  1131. ): ResolvedRef | null {
  1132. const lowerName = ref.referenceName.toLowerCase();
  1133. // Use pre-built lowercase index for O(1) lookup instead of scanning all nodes
  1134. const candidates = context.getNodesByLowerName(lowerName);
  1135. // Filter to callable kinds only (function, method, class)
  1136. const callableKinds = new Set(['function', 'method', 'class']);
  1137. const callableCandidates = applyLanguageGate(candidates.filter((n) => callableKinds.has(n.kind)), ref);
  1138. // Prefer same-language matches
  1139. const sameLanguageCandidates = callableCandidates.filter(n => n.language === ref.language);
  1140. const finalCandidates = sameLanguageCandidates.length > 0 ? sameLanguageCandidates : callableCandidates;
  1141. if (finalCandidates.length === 1) {
  1142. const isCrossLanguage = finalCandidates[0]!.language !== ref.language;
  1143. return {
  1144. original: ref,
  1145. targetNodeId: finalCandidates[0]!.id,
  1146. confidence: isCrossLanguage ? 0.3 : 0.5,
  1147. resolvedBy: 'fuzzy',
  1148. };
  1149. }
  1150. return null;
  1151. }
  1152. /**
  1153. * Match all strategies in order of confidence
  1154. */
  1155. export function matchReference(
  1156. ref: UnresolvedRef,
  1157. context: ResolutionContext
  1158. ): ResolvedRef | null {
  1159. // Function-as-value refs (#756) resolve ONLY through the dedicated matcher —
  1160. // never the fuzzy/qualified fallthrough below (a wrong callback edge is
  1161. // worse than none).
  1162. if (ref.referenceKind === 'function_ref') {
  1163. return matchFunctionRef(ref, context);
  1164. }
  1165. // Try strategies in order of confidence
  1166. let result: ResolvedRef | null;
  1167. // 0. File path match (e.g., "snippets/drawer-menu.liquid" → file node)
  1168. result = matchByFilePath(ref, context);
  1169. if (result) return result;
  1170. // 1. Qualified name match (highest confidence)
  1171. result = matchByQualifiedName(ref, context);
  1172. if (result) return result;
  1173. // 1b. C++ chained call whose receiver is another call — `Foo::instance().bar()`
  1174. // encoded as `Foo::instance().bar` by the extractor (#645). Resolve the
  1175. // receiver's type from what the inner call returns, then the method on it.
  1176. if (ref.language === 'cpp' || ref.language === 'c') {
  1177. result = matchCppCallChain(ref, context);
  1178. if (result) return result;
  1179. }
  1180. // 1c. `::`-scoped factory chain — PHP `Cls::for($x)->method()` (#608) or Rust
  1181. // `Foo::new().bar()`, both encoded as `Cls::factory().method`. The receiver's
  1182. // type is the factory's `self` (PHP `: self`/`: static`, Rust `-> Self`) or
  1183. // concrete return type.
  1184. if (ref.language === 'php' || ref.language === 'rust') {
  1185. result = matchScopedCallChain(ref, context);
  1186. if (result) return result;
  1187. }
  1188. // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C# / Swift /
  1189. // Go / Scala / Dart / Objective-C) — `Foo.getInstance().bar()` encoded as
  1190. // `Foo.getInstance().bar`, Go's bare-factory `New().Method()` as `New().Method`,
  1191. // Scala's companion factory, Dart's static factory / factory-constructor, or
  1192. // ObjC's chained message send `[[Foo create] doIt]` encoded as `Foo.create().doIt`
  1193. // (#645/#608 mechanism). Resolve the method's class from the inner call's
  1194. // declared return type, then validate it.
  1195. if (
  1196. ref.language === 'java' ||
  1197. ref.language === 'kotlin' ||
  1198. ref.language === 'csharp' ||
  1199. ref.language === 'swift' ||
  1200. ref.language === 'go' ||
  1201. ref.language === 'scala' ||
  1202. ref.language === 'dart' ||
  1203. ref.language === 'objc' ||
  1204. ref.language === 'pascal'
  1205. ) {
  1206. result = matchDottedCallChain(ref, context);
  1207. if (result) return result;
  1208. }
  1209. // 2. Method call pattern
  1210. result = matchMethodCall(ref, context);
  1211. if (result) return result;
  1212. // 3. Exact name match
  1213. result = matchByExactName(ref, context);
  1214. if (result) return result;
  1215. // 4. Fuzzy match (lowest confidence)
  1216. result = matchFuzzy(ref, context);
  1217. if (result) return result;
  1218. return null;
  1219. }