1
0

name-matcher.ts 67 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666
  1. /**
  2. * Name Matcher
  3. *
  4. * Handles symbol name matching for reference resolution.
  5. */
  6. import { Language, Node } from '../types';
  7. import { UnresolvedRef, ResolvedRef, ResolutionContext } from './types';
  8. /**
  9. * Ceiling on how many same-named definitions a FUZZY name-match strategy will
  10. * score. A name defined more times than this is "ubiquitous" — a method/symbol
  11. * re-declared across a vendored theme or SDK (e.g. `init`/`update`/`render` on
  12. * every widget of a committed Metronic theme — #999). No directory-proximity or
  13. * receiver-word-overlap score can reliably pick THE one true target among
  14. * thousands, so the fuzzy strategies (matchByExactName's findBestMatch, and
  15. * matchMethodCall Strategy 3) decline above the ceiling instead of emitting a
  16. * low-confidence, almost-certainly-wrong edge. This also caps their per-ref cost
  17. * at O(ceiling): without it, K same-named refs each scored K candidates — the
  18. * O(K²) blow-up that pinned a core for 15-28 min at "Resolving refs … 94%" on a
  19. * repo vendoring a large JS/TS theme (#999). The PRECISE strategies are
  20. * unaffected: qualified-name, import-based, and class-name (Strategy 1/2)
  21. * resolution all still run and resolve a ubiquitous name when the context names
  22. * its exact target. Real repos top out near ~40 same-named methods, so a normal
  23. * codebase never reaches this; only bulk-vendored code does. Tune via
  24. * `CODEGRAPH_AMBIGUOUS_NAME_CEILING`.
  25. */
  26. const DEFAULT_AMBIGUOUS_NAME_CEILING = 500;
  27. function resolveAmbiguousNameCeiling(): number {
  28. const raw = process.env.CODEGRAPH_AMBIGUOUS_NAME_CEILING;
  29. if (!raw) return DEFAULT_AMBIGUOUS_NAME_CEILING;
  30. const parsed = Number.parseInt(raw, 10);
  31. return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_AMBIGUOUS_NAME_CEILING;
  32. }
  33. const AMBIGUOUS_NAME_CEILING = resolveAmbiguousNameCeiling();
  34. /**
  35. * Try to resolve a path-like reference (e.g., "snippets/drawer-menu.liquid")
  36. * by matching the filename against file nodes.
  37. */
  38. export function matchByFilePath(
  39. ref: UnresolvedRef,
  40. context: ResolutionContext
  41. ): ResolvedRef | null {
  42. // Path-like (`a/b.liquid`) OR a bare filename ending in a short extension
  43. // (`Foo.h` — an Objective-C `#import "Foo.h"`, resolved to the header by
  44. // basename). A bare ref WITHOUT an extension is a symbol name, not a file, so
  45. // leave it to the symbol-matching strategies.
  46. if (!ref.referenceName.includes('/') && !/\.[A-Za-z][A-Za-z0-9]{0,3}$/.test(ref.referenceName)) {
  47. return null;
  48. }
  49. // Extract the filename from the path
  50. const fileName = ref.referenceName.split('/').pop();
  51. if (!fileName) return null;
  52. // Search for file nodes with this name
  53. const candidates = context.getNodesByName(fileName);
  54. const fileNodes = candidates.filter(n => n.kind === 'file');
  55. if (fileNodes.length === 0) return null;
  56. // Prefer exact path match on qualified_name
  57. const exactMatch = fileNodes.find(n => n.qualifiedName === ref.referenceName || n.filePath === ref.referenceName);
  58. if (exactMatch) {
  59. return {
  60. original: ref,
  61. targetNodeId: exactMatch.id,
  62. confidence: 0.95,
  63. resolvedBy: 'file-path',
  64. };
  65. }
  66. // Fall back to suffix match (e.g., ref="snippets/foo.liquid" matches
  67. // "src/snippets/foo.liquid"). When several files share the basename — a
  68. // `#include "RNCAsyncStorage.h"` with a same-named header on another platform
  69. // (windows/code/ vs apple/) — prefer the one in the includer's own directory,
  70. // then by directory proximity / same language family. A C/C++ include (and any
  71. // bare-filename import) resolves relative to the including file, not to an
  72. // arbitrary same-named header elsewhere in the tree.
  73. const suffixMatches = fileNodes.filter(
  74. n => n.qualifiedName.endsWith(ref.referenceName) || n.filePath.endsWith(ref.referenceName)
  75. );
  76. if (suffixMatches.length > 0) {
  77. return {
  78. original: ref,
  79. targetNodeId: pickClosestFileNode(suffixMatches, ref).id,
  80. confidence: 0.85,
  81. resolvedBy: 'file-path',
  82. };
  83. }
  84. // If only one file node with this name, use it with lower confidence
  85. if (fileNodes.length === 1) {
  86. return {
  87. original: ref,
  88. targetNodeId: fileNodes[0]!.id,
  89. confidence: 0.7,
  90. resolvedBy: 'file-path',
  91. };
  92. }
  93. return null;
  94. }
  95. /**
  96. * Among several file nodes that all match a bare include/import by basename,
  97. * pick the one closest to the referencing file: same directory first, then by
  98. * directory-tree proximity, with the same language family as a tiebreak. A
  99. * C/C++ `#include "X.h"` (and any bare-filename import) resolves relative to the
  100. * including file — not to an arbitrary same-named header on another platform.
  101. */
  102. function pickClosestFileNode(candidates: Node[], ref: UnresolvedRef): Node {
  103. const dirOf = (p: string): string => {
  104. const i = p.lastIndexOf('/');
  105. return i >= 0 ? p.slice(0, i) : '';
  106. };
  107. const refDir = dirOf(ref.filePath);
  108. const sameDir = candidates.filter((c) => dirOf(c.filePath) === refDir);
  109. const pool = sameDir.length > 0 ? sameDir : candidates;
  110. let best = pool[0]!;
  111. let bestScore = -Infinity;
  112. for (const c of pool) {
  113. const score =
  114. computePathProximity(ref.filePath, c.filePath) +
  115. (sameLanguageFamily(c.language, ref.language) ? 5 : 0);
  116. if (score > bestScore) {
  117. bestScore = score;
  118. best = c;
  119. }
  120. }
  121. return best;
  122. }
  123. /**
  124. * Language families that share a type system / runtime, so a same-language-only
  125. * reference may still resolve across them (a Kotlin `Foo.BAR` can name a Java
  126. * `Foo`). Anything not listed forms its own singleton family.
  127. */
  128. const LANGUAGE_FAMILY: Record<string, string> = {
  129. java: 'jvm', kotlin: 'jvm', scala: 'jvm',
  130. swift: 'apple', objc: 'apple',
  131. typescript: 'web', tsx: 'web', javascript: 'web', jsx: 'web',
  132. c: 'c', cpp: 'c',
  133. // Razor/Blazor markup names C# types — same family so `@model Foo` /
  134. // `<MyComponent/>` resolve to their `.cs` class through the cross-family gate.
  135. csharp: 'dotnet', razor: 'dotnet',
  136. };
  137. export function sameLanguageFamily(a: string, b: string): boolean {
  138. if (a === b) return true;
  139. const fa = LANGUAGE_FAMILY[a];
  140. return fa !== undefined && fa === LANGUAGE_FAMILY[b];
  141. }
  142. /**
  143. * True when `lang` belongs to a known multi-language family (jvm/apple/web/c).
  144. * Languages not listed (php, python, go, ruby, rust, dart, …) and config
  145. * formats (yaml/xml/blade) form their own singleton families and return
  146. * `false` — used to leave config↔code framework bridges (whose config side is
  147. * never a known programming-language family) out of the cross-family gate.
  148. */
  149. export function isKnownLanguageFamily(lang: string): boolean {
  150. return LANGUAGE_FAMILY[lang] !== undefined;
  151. }
  152. /**
  153. * True when `a` and `b` are two DIFFERENT *known* language families — the
  154. * signature of a coincidental cross-language name collision (a TS `import
  155. * React` matching a Swift `import React`, a C++ `#include "X.h"` matching a
  156. * same-named ObjC header on another platform). The both-*known* test is
  157. * deliberately weaker than {@link sameLanguageFamily}'s negation: a
  158. * single-file-component language that carries its own tag (`vue`/`svelte`)
  159. * importing a `.ts` module, or any singleton-family language (php/go/ruby/…),
  160. * returns `false` here and is left alone.
  161. */
  162. export function crossesKnownFamily(a: string, b: string): boolean {
  163. return isKnownLanguageFamily(a) && isKnownLanguageFamily(b) && !sameLanguageFamily(a, b);
  164. }
  165. /**
  166. * Drop cross-language candidates from a name lookup. Two regimes:
  167. * - `references` (type-usage): a type named in language X resolves to a
  168. * SAME-family type, never a coincidentally same-named symbol in another
  169. * language (the Android `BatteryManager` system class vs a JS one). Strict
  170. * same-family filter — cross-language communication is `calls`, not refs.
  171. * - `imports` (import binding): an `import`/`#include` never crosses two
  172. * KNOWN families (TS `import React` ↮ Swift `import React`). Weaker
  173. * both-known filter so `.vue`/`.svelte` (own tag) importing `.ts` survives.
  174. */
  175. function applyLanguageGate(candidates: Node[], ref: UnresolvedRef): Node[] {
  176. if (ref.referenceKind === 'references' || ref.referenceKind === 'function_ref') {
  177. return candidates.filter((c) => sameLanguageFamily(c.language, ref.language));
  178. }
  179. if (ref.referenceKind === 'imports') {
  180. return candidates.filter((c) => !crossesKnownFamily(c.language, ref.language));
  181. }
  182. return candidates;
  183. }
  184. /**
  185. * Resolve a function-as-value reference (#756) — a function name used as a
  186. * callback/function-pointer value (`register(handler)`, `o->cb = handler`,
  187. * `{ .cb = handler }`, `signal(SIGINT, handler)`). The ONLY strategy allowed
  188. * for `function_ref` refs: exact name, function/method targets only, same
  189. * language family, same-file first, and cross-file only when the match is
  190. * UNIQUE. No fuzzy fallback, no qualified-name walking — a wrong callback
  191. * edge is worse than none.
  192. */
  193. export function matchFunctionRef(
  194. ref: UnresolvedRef,
  195. context: ResolutionContext
  196. ): ResolvedRef | null {
  197. // `this.<member>` refs are resolved ONLY by the class-scoped resolver in
  198. // resolveOne (resolveThisMemberFnRef) — never by name matching here.
  199. if (ref.referenceName.startsWith('this.')) return null;
  200. // In JS/TS/Python a bare identifier can never be a method value (methods
  201. // are only reachable through a receiver — `this.m` / `self.m` /
  202. // `Cls.m`), so bare fn-refs match FUNCTIONS only. This also sidesteps the
  203. // pre-existing TS quirk of class fields extracting as method-kind nodes,
  204. // which otherwise soaked up local names passed as arguments (excalidraw
  205. // A/B finding; same pattern in vendored docopt.py). Python's `self.m`
  206. // form keeps method targets via its own capture shape. C++ likewise: a
  207. // bare identifier can only be a FREE function (member values need
  208. // `&Cls::method`). PHP string callables name global FUNCTIONS (methods
  209. // need the `[$obj, 'm']` array form, which carries its own shape). Other
  210. // languages keep method targets: C# method groups, Swift/Dart
  211. // implicit-self, Java/Kotlin method references.
  212. const bareFnOnly =
  213. ref.language === 'typescript' || ref.language === 'tsx' ||
  214. ref.language === 'javascript' || ref.language === 'jsx' ||
  215. ref.language === 'cpp' || ref.language === 'python' ||
  216. ref.language === 'php';
  217. // Qualified member-pointer (`&Widget::on_click` → "Widget::on_click"):
  218. // resolve the member ON THAT SCOPE — exempt from bareFnOnly (the `&Cls::m`
  219. // shape is an explicit member reference). Unique-or-drop like everything else.
  220. if (ref.referenceName.includes('::')) {
  221. const memberName = ref.referenceName.slice(ref.referenceName.lastIndexOf('::') + 2);
  222. const scoped = context
  223. .getNodesByName(memberName)
  224. .filter(
  225. (n) =>
  226. (n.kind === 'function' || n.kind === 'method') &&
  227. sameLanguageFamily(n.language, ref.language) &&
  228. n.id !== ref.fromNodeId &&
  229. (n.qualifiedName === ref.referenceName ||
  230. n.qualifiedName.endsWith(`::${ref.referenceName}`))
  231. );
  232. if (scoped.length === 0) return null;
  233. const sameFileScoped = scoped.filter((n) => n.filePath === ref.filePath);
  234. const pool = sameFileScoped.length > 0 ? sameFileScoped : scoped;
  235. if (sameFileScoped.length === 0 && scoped.length > 1) return null;
  236. const target = pool.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
  237. return {
  238. original: ref,
  239. targetNodeId: target.id,
  240. confidence: 0.9,
  241. resolvedBy: 'function-ref',
  242. };
  243. }
  244. let candidates = context
  245. .getNodesByName(ref.referenceName)
  246. .filter(
  247. (n) =>
  248. (n.kind === 'function' || (!bareFnOnly && n.kind === 'method')) &&
  249. sameLanguageFamily(n.language, ref.language) &&
  250. n.id !== ref.fromNodeId // a function registering itself is not a dependency edge
  251. );
  252. if (candidates.length === 0) return null;
  253. // Swift implicit-self: a bare identifier can name a METHOD only of the
  254. // ENCLOSING type (`Button(action: handleTap)` written inside that type) —
  255. // a same-named method on any OTHER class is a parameter collision
  256. // (Alamofire: a `request` parameter resolving to EventMonitor::request).
  257. // Scope method candidates to the from-symbol's type; top-level code has no
  258. // implicit self, so method targets are excluded there entirely. Free
  259. // functions are unaffected.
  260. if (ref.language === 'swift' && candidates.some((n) => n.kind === 'method')) {
  261. const fromNode = context.getNodeById?.(ref.fromNodeId);
  262. const sep = fromNode ? fromNode.qualifiedName.lastIndexOf('::') : -1;
  263. const classPrefix = fromNode && sep > 0 ? fromNode.qualifiedName.slice(0, sep) : null;
  264. candidates = candidates.filter((n) => {
  265. if (n.kind !== 'method') return true;
  266. if (!classPrefix) return false;
  267. const mSep = n.qualifiedName.lastIndexOf('::');
  268. if (mSep <= 0) return false;
  269. const methodPrefix = n.qualifiedName.slice(0, mSep);
  270. // Accept exact-scope matches plus suffix relationships either way, so
  271. // extension-declared members (`Holder::m`) still match a nested
  272. // from-scope (`Module::Holder::wire`) and vice versa.
  273. return (
  274. methodPrefix === classPrefix ||
  275. methodPrefix.endsWith(`::${classPrefix}`) ||
  276. classPrefix.endsWith(`::${methodPrefix}`)
  277. );
  278. });
  279. if (candidates.length === 0) return null;
  280. }
  281. // Same-file definition wins — the extraction gate guarantees most survivors
  282. // have one, and it's the dominant C pattern (static callback registered in
  283. // a same-file ops struct).
  284. const sameFile = candidates.filter((n) => n.filePath === ref.filePath);
  285. if (sameFile.length > 0) {
  286. // Swift: several same-named METHODS in one file is an API overload family
  287. // (`Session.request(...)` × N), and a bare identifier hitting it is almost
  288. // always a same-named parameter, not a method value (Alamofire A/B
  289. // finding) — refuse rather than guess. A single method (SwiftUI's
  290. // `action: handleTap`) still resolves.
  291. if (
  292. ref.language === 'swift' &&
  293. sameFile.length > 1 &&
  294. sameFile.every((n) => n.kind === 'method')
  295. ) {
  296. return null;
  297. }
  298. // Same-name overloads in one file are the same conceptual symbol; pick
  299. // the first by position for determinism.
  300. const target = sameFile.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
  301. return {
  302. original: ref,
  303. targetNodeId: target.id,
  304. confidence: sameFile.length === 1 ? 0.95 : 0.9,
  305. resolvedBy: 'function-ref',
  306. };
  307. }
  308. // Cross-file (imported names the import resolver didn't already claim):
  309. // only an unambiguous match resolves.
  310. if (candidates.length === 1) {
  311. return {
  312. original: ref,
  313. targetNodeId: candidates[0]!.id,
  314. confidence: 0.8,
  315. resolvedBy: 'function-ref',
  316. };
  317. }
  318. return null;
  319. }
  320. /**
  321. * Try to resolve a reference by exact name match
  322. */
  323. export function matchByExactName(
  324. ref: UnresolvedRef,
  325. context: ResolutionContext
  326. ): ResolvedRef | null {
  327. // `import`-kind nodes are import STATEMENTS, not definitions, so a reference
  328. // resolving to a sibling file's `import` is a meaningless edge — the real
  329. // import→definition resolution is the import resolver's job (resolveViaImport),
  330. // never name-matching here. Excluding them also removes a quadratic blow-up:
  331. // a ubiquitous package (`react`, `@superset-ui/core`, Python `logging`/`typing`)
  332. // is re-declared as an `import` node in every file that imports it, so K
  333. // unresolved import refs each scored K same-named import candidates through
  334. // findBestMatch — O(K²) per package, the dominant cost of "Resolving refs" on
  335. // large import-heavy (front-end + back-end) repos (#915).
  336. const candidates = applyLanguageGate(context.getNodesByName(ref.referenceName), ref)
  337. .filter((n) => n.kind !== 'import');
  338. if (candidates.length === 0) {
  339. return null;
  340. }
  341. // If only one match, use it — but penalize cross-language matches
  342. if (candidates.length === 1) {
  343. const isCrossLanguage = candidates[0]!.language !== ref.language;
  344. return {
  345. original: ref,
  346. targetNodeId: candidates[0]!.id,
  347. confidence: isCrossLanguage ? 0.5 : 0.9,
  348. resolvedBy: 'exact-match',
  349. };
  350. }
  351. // Ubiquitous-name ceiling (#999): above it, picking one target among K
  352. // same-named defs by directory proximity is unreliable AND O(K) per ref — the
  353. // quadratic behind the "Resolving refs" wedge on theme/SDK-vendoring repos.
  354. // Decline; the precise strategies (qualified-name, import, class-name) already
  355. // ran. Falls through to fuzzy, which itself only resolves a UNIQUE candidate.
  356. if (candidates.length > AMBIGUOUS_NAME_CEILING) {
  357. return null;
  358. }
  359. // Multiple matches - try to narrow down
  360. const bestMatch = findBestMatch(ref, candidates, context);
  361. if (bestMatch) {
  362. // Lower confidence when the match is from a distant/unrelated module
  363. const proximity = computePathProximity(ref.filePath, bestMatch.filePath);
  364. const confidence = proximity >= 30 ? 0.7 : 0.4;
  365. return {
  366. original: ref,
  367. targetNodeId: bestMatch.id,
  368. confidence,
  369. resolvedBy: 'exact-match',
  370. };
  371. }
  372. return null;
  373. }
  374. /**
  375. * Try to resolve by qualified name
  376. */
  377. export function matchByQualifiedName(
  378. ref: UnresolvedRef,
  379. context: ResolutionContext
  380. ): ResolvedRef | null {
  381. // Check if the reference name looks qualified (contains :: or .)
  382. if (!ref.referenceName.includes('::') && !ref.referenceName.includes('.')) {
  383. return null;
  384. }
  385. const candidates = context.getNodesByQualifiedName(ref.referenceName);
  386. if (candidates.length === 1) {
  387. return {
  388. original: ref,
  389. targetNodeId: candidates[0]!.id,
  390. confidence: 0.95,
  391. resolvedBy: 'qualified-name',
  392. };
  393. }
  394. // Several symbols share this exact qualified name (e.g. `Logger::log` declared
  395. // in two files — an ODR clash or separate translation units): prefer the one
  396. // in the call site's own file before the partial-match fallback below, else
  397. // the first-indexed def wins and a call in `b/svc` targets `a/svc` (#1079).
  398. if (candidates.length > 1) {
  399. const ordered = preferCallSiteFile(candidates, ref.filePath);
  400. if (ordered[0]!.filePath === ref.filePath) {
  401. return {
  402. original: ref,
  403. targetNodeId: ordered[0]!.id,
  404. confidence: 0.95,
  405. resolvedBy: 'qualified-name',
  406. };
  407. }
  408. }
  409. // Try partial qualified name match — again preferring the call site's own
  410. // file when more than one symbol's qualifiedName ends with the reference.
  411. const parts = ref.referenceName.split(/[:.]/);
  412. const lastName = parts[parts.length - 1];
  413. if (lastName) {
  414. const partialCandidates = context
  415. .getNodesByName(lastName)
  416. .filter((candidate) => candidate.qualifiedName.endsWith(ref.referenceName));
  417. const chosen = preferCallSiteFile(partialCandidates, ref.filePath)[0];
  418. if (chosen) {
  419. return {
  420. original: ref,
  421. targetNodeId: chosen.id,
  422. confidence: 0.85,
  423. resolvedBy: 'qualified-name',
  424. };
  425. }
  426. }
  427. return null;
  428. }
  429. /**
  430. * When a symbol name is ambiguous across files, prefer the candidate(s) declared
  431. * in the call site's own file, keeping the rest in their original order (#1079).
  432. * A same-file definition is the strongest language-agnostic signal for which of
  433. * several same-named symbols a call means; without it, resolution collapses onto
  434. * whichever was indexed first, so a call in `b/svc` wrongly targets `a/svc`.
  435. * No-op when there are <2 candidates or none share the call site's file.
  436. */
  437. export function preferCallSiteFile(nodes: Node[], callSiteFile: string): Node[] {
  438. if (nodes.length < 2) return nodes;
  439. const same: Node[] = [];
  440. const other: Node[] = [];
  441. for (const n of nodes) {
  442. if (n.filePath === callSiteFile) same.push(n);
  443. else other.push(n);
  444. }
  445. return same.length ? [...same, ...other] : nodes;
  446. }
  447. // Exported for the precedence unit tests (#1079): they assert the
  448. // preferredFqn → same-file → matches[0] ordering directly.
  449. export function resolveMethodOnType(
  450. typeName: string,
  451. methodName: string,
  452. ref: UnresolvedRef,
  453. context: ResolutionContext,
  454. confidence: number,
  455. resolvedBy: ResolvedRef['resolvedBy'],
  456. /**
  457. * Optional FQN that identifies WHICH class declaration `typeName`
  458. * refers to in the caller's file. When multiple candidates share
  459. * the same qualifiedName (`FooConverter::convert` in both
  460. * `dao/converter/` and `service/converter/`), the FQN's
  461. * file-path-suffix picks the right one — the disambiguation
  462. * signal Java imports carry but the call site doesn't (#314).
  463. */
  464. preferredFqn?: string,
  465. /** Recursion guard for the supertype/conformance walk. */
  466. depth = 0,
  467. ): ResolvedRef | null {
  468. // Look up methods by name and match by qualifiedName ending in
  469. // `<typeName>::<methodName>`. This works whether the method is defined
  470. // in-class (`class Foo { int bar() { ... } }`) or out-of-line in a separate
  471. // file (`int Foo::bar() { ... }` in foo.cpp while class Foo is in foo.hpp).
  472. // The previous same-file approach missed the latter — the typical C++ layout.
  473. const methodCandidates = context.getNodesByName(methodName);
  474. const want = `${typeName}::${methodName}`;
  475. const matches: Node[] = [];
  476. for (const m of methodCandidates) {
  477. if (m.kind !== 'method') continue;
  478. if (m.language !== ref.language) continue;
  479. const qn = m.qualifiedName;
  480. if (qn === want || qn.endsWith(`::${want}`)) {
  481. matches.push(m);
  482. }
  483. }
  484. if (matches.length === 0) {
  485. // Conformance fallback: the method may be defined on a supertype `typeName`
  486. // extends, or on a protocol / trait it conforms to (e.g. a Swift protocol-
  487. // extension method, a C# default-interface or extension method, a Kotlin
  488. // extension on a supertype). Walk supertypes transitively (depth-capped) via
  489. // the resolved implements/extends edges — empty in the first resolution pass,
  490. // populated in the conformance pass. Still VALIDATED (the method must exist on
  491. // a supertype), so a wrong inference produces no edge.
  492. if (depth < 4 && context.getSupertypes) {
  493. for (const supertype of context.getSupertypes(typeName, ref.language)) {
  494. const via = resolveMethodOnType(
  495. supertype, methodName, ref, context, confidence, resolvedBy, preferredFqn, depth + 1,
  496. );
  497. if (via) return via;
  498. }
  499. }
  500. return null;
  501. }
  502. if (matches.length > 1 && preferredFqn) {
  503. const ext = ref.language === 'kotlin' ? '.kt' : '.java';
  504. const fqnPath = preferredFqn.replace(/\./g, '/') + ext;
  505. const chosen = matches.find((m) => {
  506. const fp = m.filePath.replace(/\\/g, '/');
  507. return fp.endsWith(fqnPath) || fp.endsWith('/' + fqnPath);
  508. });
  509. if (chosen) {
  510. return {
  511. original: ref,
  512. targetNodeId: chosen.id,
  513. confidence,
  514. resolvedBy,
  515. };
  516. }
  517. }
  518. // Language-agnostic disambiguation: when several same-named methods survive
  519. // (e.g. two files each declaring `class Logger { void log(); }` — an ODR
  520. // clash, an anonymous-namespace type, or separate translation units), prefer
  521. // the definition in the CALL SITE's own file. Without this, every ambiguous
  522. // call collapses onto the first-indexed definition, so a call in `b/svc.cpp`
  523. // wrongly points at `a/svc.cpp` (#1079). This runs AFTER the `preferredFqn`
  524. // block, so Java/Kotlin import disambiguation — whose target is intentionally
  525. // in ANOTHER file (#314) — is unaffected: that block returns early whenever
  526. // an import FQN pins the class.
  527. const ordered = preferCallSiteFile(matches, ref.filePath);
  528. return {
  529. original: ref,
  530. targetNodeId: ordered[0]!.id,
  531. confidence,
  532. resolvedBy,
  533. };
  534. }
  535. // C++ keywords/control-flow tokens that can appear right before a receiver
  536. // (e.g. `return ptr->m()`) and must NOT be treated as a type.
  537. const CPP_NON_TYPE_TOKENS = new Set([
  538. 'return', 'if', 'else', 'for', 'while', 'do', 'switch', 'case', 'default',
  539. 'break', 'continue', 'goto', 'throw', 'new', 'delete', 'co_await', 'co_yield',
  540. 'co_return', 'static_cast', 'const_cast', 'dynamic_cast', 'reinterpret_cast',
  541. 'sizeof', 'alignof', 'typeid', 'and', 'or', 'not', 'xor',
  542. ]);
  543. function normalizeCppTypeName(typeName: string): string | null {
  544. const normalized = typeName
  545. .replace(/\b(const|volatile|mutable|typename|class|struct)\b/g, ' ')
  546. .replace(/[&*]+/g, ' ')
  547. .replace(/<[^>]*>/g, ' ')
  548. .replace(/\s+/g, ' ')
  549. .trim();
  550. if (!normalized) return null;
  551. const parts = normalized.split(/::/).filter(Boolean);
  552. const last = parts[parts.length - 1];
  553. if (!last) return null;
  554. if (CPP_NON_TYPE_TOKENS.has(last)) return null;
  555. return last;
  556. }
  557. // Declarator regex: matches `Type receiver`, `Type* receiver`, `Type *receiver`,
  558. // `Type*receiver`, `Type<X> receiver`, etc., REQUIRING a declarator terminator
  559. // (`;`, `=`, `,`, `)`, `[`, `{`, `(`, or end-of-line) after the receiver. The
  560. // terminator rules out uses like `return receiver->m()` where the preceding
  561. // token is a keyword, not a type.
  562. function buildDeclaratorRegex(escapedReceiver: string): RegExp {
  563. return new RegExp(
  564. `([A-Za-z_][\\w:]*(?:\\s*<[^;=(){}]+>)?(?:\\s*[*&]+)?)\\s*\\b${escapedReceiver}\\b\\s*(?=[;=,)\\[{(]|$)`,
  565. );
  566. }
  567. function inferCppReceiverType(
  568. receiverName: string,
  569. ref: UnresolvedRef,
  570. context: ResolutionContext,
  571. depth = 0,
  572. ): string | null {
  573. const source = context.readFile(ref.filePath);
  574. if (!source) return null;
  575. const lines = source.split(/\r?\n/);
  576. const callLineIndex = Math.max(0, Math.min(lines.length - 1, ref.line - 1));
  577. const escapedReceiver = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
  578. const receiverPattern = new RegExp(`\\b${escapedReceiver}\\b`);
  579. const declaratorRegex = buildDeclaratorRegex(escapedReceiver);
  580. for (let i = callLineIndex; i >= 0; i--) {
  581. const line = lines[i];
  582. if (!line || !receiverPattern.test(line)) continue;
  583. const declaratorMatch = line.match(declaratorRegex);
  584. if (declaratorMatch) {
  585. const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
  586. if (normalized === 'auto') {
  587. // `auto x = Foo::instance();` — the declared type is deduced; recover it
  588. // from the initializer (call return type / construction) (#645).
  589. const initType = inferCppAutoInitializerType(line, receiverName, ref, context, depth);
  590. if (initType) return initType;
  591. // No usable initializer on this line — keep scanning earlier ones.
  592. } else if (normalized) {
  593. return normalized;
  594. }
  595. }
  596. }
  597. const headerCandidates = [
  598. ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.h'),
  599. ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hpp'),
  600. ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hxx'),
  601. ].filter((candidate, index, arr) => arr.indexOf(candidate) === index && candidate !== ref.filePath);
  602. for (const headerPath of headerCandidates) {
  603. if (!context.fileExists(headerPath)) continue;
  604. const headerSource = context.readFile(headerPath);
  605. if (!headerSource) continue;
  606. for (const line of headerSource.split(/\r?\n/)) {
  607. if (!receiverPattern.test(line)) continue;
  608. const declaratorMatch = line.match(declaratorRegex);
  609. if (!declaratorMatch) continue;
  610. const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
  611. if (normalized && normalized !== 'auto') return normalized;
  612. }
  613. }
  614. return null;
  615. }
  616. /**
  617. * Last `::`-separated segment of a (possibly namespace-qualified) C++ name.
  618. */
  619. function cppLastSegment(name: string): string {
  620. const parts = name.split('::').filter(Boolean);
  621. return parts[parts.length - 1] ?? name;
  622. }
  623. /**
  624. * Return type captured at extraction for `Class::method` (or a free function),
  625. * read off the indexed node's `returnType` — used by the C++ (#645) and PHP
  626. * (#608) chained-call resolvers. Language-filtered. Null when not indexed or no
  627. * return type was recorded (a `void`/primitive return).
  628. */
  629. function lookupCalleeReturnType(
  630. callee: string,
  631. ref: UnresolvedRef,
  632. context: ResolutionContext,
  633. ): string | null {
  634. let method = callee;
  635. let cls: string | null = null;
  636. if (callee.includes('::')) {
  637. const parts = callee.split('::').filter(Boolean);
  638. method = parts[parts.length - 1] ?? callee;
  639. cls = parts.slice(0, -1).join('::');
  640. }
  641. const candidates = context.getNodesByName(method).filter(
  642. (n) =>
  643. (n.kind === 'method' || n.kind === 'function') &&
  644. n.language === ref.language &&
  645. !!n.returnType,
  646. );
  647. if (cls) {
  648. const want = `${cls}::${method}`;
  649. // The call site may name the class with MORE namespace qualification than
  650. // the stored node (`details::registry::instance` at the call vs
  651. // `registry::instance` on the node — the receiver type only carries the
  652. // immediate class), or LESS. Accept an exact match or either being a
  653. // namespace-suffix of the other; the shared `::<class>::<method>` tail keeps
  654. // it specific.
  655. const m = candidates.find(
  656. (n) =>
  657. n.qualifiedName === want ||
  658. n.qualifiedName.endsWith(`::${want}`) ||
  659. want.endsWith(`::${n.qualifiedName}`),
  660. );
  661. return m?.returnType ?? null;
  662. }
  663. return candidates.find((n) => n.kind === 'function')?.returnType ?? null;
  664. }
  665. /** Does the graph contain a class/struct named `name`'s last segment? */
  666. function cppClassExists(name: string, ref: UnresolvedRef, context: ResolutionContext): boolean {
  667. const last = cppLastSegment(name);
  668. return context
  669. .getNodesByName(last)
  670. .some((n) => (n.kind === 'class' || n.kind === 'struct') && n.language === ref.language);
  671. }
  672. /**
  673. * Infer the class produced by a C++ call/construction expression, using return
  674. * types captured at extraction (#645). Handles, in order:
  675. * - `make_unique<T>()` / `make_shared<T>()` → T
  676. * - single-level member call `recv.method()` → recv's type, then method's return
  677. * - `Class::method()` / free `func()` → the callee's recorded return type
  678. * - direct construction `Type()` / `ns::Type()` → Type
  679. * Returns null when undeterminable. Callers MUST still validate the outer method
  680. * exists on the result before creating an edge, so a wrong guess stays silent.
  681. */
  682. function resolveCppCallResultType(
  683. inner: string,
  684. ref: UnresolvedRef,
  685. context: ResolutionContext,
  686. depth = 0,
  687. ): string | null {
  688. if (depth > 3) return null; // guard against pathological mutual recursion
  689. const expr = inner.trim();
  690. const make = expr.match(/(?:^|::)(?:make_unique|make_shared)\s*<\s*([A-Za-z_]\w*)/);
  691. if (make) return make[1] ?? null;
  692. // Single-level member call `recv.method` (the `manager.view().render()` shape).
  693. const dotIdx = expr.lastIndexOf('.');
  694. if (dotIdx > 0) {
  695. const recv = expr.slice(0, dotIdx);
  696. const method = expr.slice(dotIdx + 1);
  697. if (recv.includes('.') || recv.includes('(') || recv.includes('::')) return null; // single level only
  698. const recvType = inferCppReceiverType(recv, ref, context, depth + 1);
  699. if (!recvType) return null;
  700. return lookupCalleeReturnType(`${recvType}::${method}`, ref, context);
  701. }
  702. const ret = lookupCalleeReturnType(expr, ref, context);
  703. if (ret) return ret;
  704. // Direct construction — the callee itself names a class/struct.
  705. if (cppClassExists(expr, ref, context)) return cppLastSegment(expr);
  706. return null;
  707. }
  708. /**
  709. * Recover the type of an `auto`-declared local from its initializer on the
  710. * declaration line — `auto x = Foo::instance();`, `auto w = make_unique<W>();`,
  711. * `auto p = new W();`, `auto w = Widget();` (#645).
  712. */
  713. function inferCppAutoInitializerType(
  714. line: string,
  715. receiverName: string,
  716. ref: UnresolvedRef,
  717. context: ResolutionContext,
  718. depth: number,
  719. ): string | null {
  720. const escaped = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
  721. const m = line.match(new RegExp(`\\b${escaped}\\b\\s*=\\s*([^;]+)`));
  722. if (!m || !m[1]) return null;
  723. const init = m[1].trim();
  724. const neu = init.match(/^new\s+([A-Za-z_][\w:]*)/);
  725. if (neu && neu[1]) return cppLastSegment(neu[1]);
  726. // A call or construction: `Foo(...)`, `A::b(...)`, `make_unique<T>(...)`.
  727. const call = init.match(/^([A-Za-z_][\w:]*(?:\s*<[^>;]*>)?)\s*\(/);
  728. if (call && call[1]) return resolveCppCallResultType(call[1].replace(/\s+/g, ''), ref, context, depth + 1);
  729. return null;
  730. }
  731. /**
  732. * Resolve a C++ chained call whose receiver is itself a call — encoded by the
  733. * extractor as `<innerCallee>().<method>` (#645). The receiver's type is what
  734. * the inner call returns; the outer method is then resolved and VALIDATED on it
  735. * (resolveMethodOnType requires `cls::method` to exist), so a wrong inference
  736. * produces no edge rather than a wrong one.
  737. */
  738. export function matchCppCallChain(
  739. ref: UnresolvedRef,
  740. context: ResolutionContext,
  741. ): ResolvedRef | null {
  742. const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
  743. if (!m || !m[1] || !m[2]) return null;
  744. const cls = resolveCppCallResultType(m[1], ref, context);
  745. if (!cls) return null;
  746. return resolveMethodOnType(cls, m[2], ref, context, 0.85, 'instance-method');
  747. }
  748. /**
  749. * Resolve a `::`-scoped factory chain whose receiver is a scoped/static call —
  750. * PHP `Cls::for($x)->method()` (#608, the per-credential Laravel client idiom) or
  751. * Rust `Foo::new().bar()` (an associated-function call) — both encoded by the
  752. * extractor as `Cls::factory().method`. The receiver's type is what `Cls::factory`
  753. * returns: a `self` marker (PHP `: self`/`: static`, Rust `-> Self`) resolves to
  754. * the factory's own type, a concrete return type to that type. The outer method is
  755. * then resolved and VALIDATED on it (resolveMethodOnType requires the method to
  756. * exist on the type or a supertype it conforms to), so a wrong inference yields no
  757. * edge rather than a wrong one. Shared by the `::`-receiver languages (PHP, Rust).
  758. */
  759. export function matchScopedCallChain(
  760. ref: UnresolvedRef,
  761. context: ResolutionContext,
  762. ): ResolvedRef | null {
  763. const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
  764. if (!m || !m[1] || !m[2]) return null;
  765. const inner = m[1];
  766. const method = m[2];
  767. if (!inner.includes('::')) return null; // only static-factory (`Cls::method`) chains
  768. const factoryClass = inner.slice(0, inner.lastIndexOf('::'));
  769. const ret = lookupCalleeReturnType(inner, ref, context);
  770. if (!ret) return null;
  771. // `self` (the extractor's marker for self/static/$this) → the factory's class.
  772. const resolvedClass = ret === 'self' ? factoryClass : ret;
  773. return resolveMethodOnType(resolvedClass, method, ref, context, 0.85, 'instance-method');
  774. }
  775. /**
  776. * Languages where an unprefixed capitalized call `Foo(args)` constructs the
  777. * class (so a `Foo(args).method()` receiver's type is `Foo`). Java/C# need `new`,
  778. * so a bare `Foo()` there is a method call, not construction — excluded. Scala's
  779. * `Foo(args)` is a case-class / companion `apply`, which conventionally returns
  780. * `Foo` — and resolveMethodOnType validates, so a non-conventional `apply` that
  781. * returns another type simply yields no edge rather than a wrong one. Pascal/Delphi:
  782. * a `TFoo(x)` is a TYPECAST whose result is a `TFoo`, so `TFoo(x).method()` resolves
  783. * the method on `TFoo` — same shape, same validation.
  784. */
  785. const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift', 'scala', 'dart', 'pascal']);
  786. /**
  787. * Resolve a dotted chained call whose receiver is a static factory / fluent call —
  788. * `Foo.getInstance().bar()`, encoded by the extractor as `Foo.getInstance().bar`
  789. * (#645/#608 mechanism). The receiver's type is what `Foo.getInstance` returns
  790. * (its declared return type); the outer method is then resolved and VALIDATED on
  791. * it (resolveMethodOnType requires `Type::method` to exist), so a wrong inference
  792. * yields no edge rather than a wrong one (e.g. a same-named `bar()` on an
  793. * unrelated class is never matched). Shared by the dot-notation languages
  794. * (Java, Kotlin, C#, Swift) — same receiver shape, same `Class::method` qualified names.
  795. */
  796. export function matchDottedCallChain(
  797. ref: UnresolvedRef,
  798. context: ResolutionContext,
  799. ): ResolvedRef | null {
  800. const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
  801. if (!m || !m[1] || !m[2]) return null;
  802. const inner = m[1]; // `Foo.getInstance`
  803. const method = m[2]; // `bar`
  804. const lastDot = inner.lastIndexOf('.');
  805. if (lastDot <= 0) {
  806. // Go: bare package-level factory FUNCTION `New().method()` — the receiver's
  807. // type is what `New` returns; resolve the method on that.
  808. if (ref.language === 'go') {
  809. const ret = lookupCalleeReturnType(inner, ref, context);
  810. if (ret) {
  811. return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
  812. }
  813. // `inner` isn't a function with a captured return type — typically a
  814. // package-level VARIABLE holding a function value (e.g. gin's `engine()`),
  815. // whose type we can't recover. Fall back to bare-name resolution of the
  816. // method so we don't DROP an edge the un-re-encoded bare path would have
  817. // found. (When `inner` IS a real factory function but the method doesn't
  818. // exist on its return type, `ret` is truthy and we returned no edge above —
  819. // the absent-method safety guarantee is preserved.)
  820. //
  821. // CRITICAL: resolve the TARGET via a synthetic bare-name ref, but return the
  822. // match tied to the ORIGINAL `ref` (referenceName `inner().method`). The
  823. // batched resolver (resolveAndPersistBatched) reads unresolved rows from
  824. // offset 0 every pass and relies on deleteSpecificResolvedReferences —
  825. // keyed on referenceName — to clear each resolved row so the batch empties.
  826. // If we propagated the synthetic ref's bare `method` as `.original`, the
  827. // delete would never match the stored `inner().method` row, the batch would
  828. // never drain, and the loop would re-resolve + re-insert forever (a runaway
  829. // that grew gin's graph to 5M edges / 1.4 GB before this fix).
  830. const bareRef = { ...ref, referenceName: method };
  831. const bareMatch = matchByExactName(bareRef, context) ?? matchFuzzy(bareRef, context);
  832. return bareMatch ? { ...bareMatch, original: ref } : null;
  833. }
  834. // Constructor receiver `Foo(args).method()` (encoded `Foo().method`): a bare,
  835. // capitalized inner is a class construction, so the receiver's type is the
  836. // class itself — resolve the method on it. Only in languages where an
  837. // unprefixed capitalized call constructs the class (Kotlin, Swift); in Java/C#
  838. // a bare `Foo()` is a method call (constructors need `new`), so we must not
  839. // assume construction. A lowercase bare inner is a top-level `factory().method()`
  840. // whose type we can't recover — bail.
  841. if (!CONSTRUCTS_VIA_BARE_CALL.has(ref.language) || !/^[A-Z]/.test(inner)) return null;
  842. return resolveMethodOnType(inner, method, ref, context, 0.85, 'instance-method', importedFqnOf(inner, ref, context));
  843. }
  844. // Factory/fluent receiver `Receiver.factory(args).method()`: the receiver's
  845. // type is what `Receiver.factory` returns (its declared return type).
  846. const factoryClass = inner.slice(0, lastDot).split('.').pop(); // simple class name
  847. const factoryMethod = inner.slice(lastDot + 1);
  848. if (!factoryClass || !factoryMethod) return null;
  849. const ret = lookupCalleeReturnType(`${factoryClass}::${factoryMethod}`, ref, context);
  850. if (!ret) {
  851. // Objective-C: a class-message factory — `[X alloc]`, `[X new]`,
  852. // `[X sharedFoo]` — returns an instance of the RECEIVER class `X` by
  853. // convention (`instancetype`). So when the factory's own return type isn't
  854. // recoverable (its selector returns `instancetype`, or `alloc`/`new` aren't
  855. // user-defined nodes at all), the receiver's type is the class `X` itself.
  856. // This resolves the ubiquitous `[[X alloc] init]` and singleton chains.
  857. // resolveMethodOnType validates against X (and its supertypes), so a class
  858. // whose method actually lives elsewhere yields NO edge, not a wrong one — and
  859. // crucially this does NOT fire when a concrete return type WAS captured but
  860. // simply lacks the method (that already returned null above: absent-method
  861. // safety, so a same-named decoy is still never matched).
  862. if (ref.language === 'objc' && /^[A-Z]/.test(factoryClass)) {
  863. return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context));
  864. }
  865. // Pascal/Delphi: the extractor only re-encodes a `TFoo`/`IFoo`-prefixed chain
  866. // (the type-naming convention), so `factoryClass` is always a real class here.
  867. // A factory whose return type wasn't captured is a CONSTRUCTOR
  868. // (`TFileMem.Create().SetCachePerformance` — `constructor Create` has no `:
  869. // TBar` annotation but returns its own class) or an unannotated function. In
  870. // both cases the receiver's type is the class itself, so resolve the method on
  871. // `factoryClass`. resolveMethodOnType validates against it (and its
  872. // supertypes), so a wrong inference yields no edge — and this never fires when
  873. // a return type WAS captured but lacks the method (absent-method safety above).
  874. if (ref.language === 'pascal' && /^[TI]/.test(factoryClass)) {
  875. return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context));
  876. }
  877. return null;
  878. }
  879. return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
  880. }
  881. /**
  882. * When several classes share a simple type name, the caller file's import of
  883. * that type is the only signal that names WHICH one (#314). Returns the imported
  884. * FQN for `typeName` in the ref's file, or undefined.
  885. */
  886. function importedFqnOf(
  887. typeName: string,
  888. ref: UnresolvedRef,
  889. context: ResolutionContext,
  890. ): string | undefined {
  891. const imports = context.getImportMappings(ref.filePath, ref.language);
  892. return imports.find((i) => i.localName === typeName)?.source;
  893. }
  894. /**
  895. * Java/Kotlin: infer a receiver's declared type by walking field declarations
  896. * in the class enclosing the call site. The field's `signature` is already in
  897. * the form "<TypeName> <fieldName>" (set by tree-sitter.ts extractField), so we
  898. * pull the type from there. Handles Spring `@Resource UserBO userbo;` /
  899. * `@Autowired private UserService userService;` where the receiver field name
  900. * doesn't match the class name by Java naming convention.
  901. *
  902. * Returns the bare type name (generics stripped, dotted package stripped) or
  903. * null when no matching field is in the enclosing class.
  904. */
  905. function inferJavaFieldReceiverType(
  906. receiverName: string,
  907. ref: UnresolvedRef,
  908. context: ResolutionContext,
  909. ): string | null {
  910. const inFile = context.getNodesInFile(ref.filePath);
  911. if (inFile.length === 0) return null;
  912. // Find the class enclosing the call line (tightest match by latest start).
  913. let enclosing: Node | null = null;
  914. for (const n of inFile) {
  915. if (n.kind !== 'class' && n.kind !== 'interface') continue;
  916. if (n.language !== ref.language) continue;
  917. const end = n.endLine ?? n.startLine;
  918. if (n.startLine <= ref.line && end >= ref.line) {
  919. if (!enclosing || n.startLine >= enclosing.startLine) enclosing = n;
  920. }
  921. }
  922. if (!enclosing) return null;
  923. const enclosingEnd = enclosing.endLine ?? enclosing.startLine;
  924. const field = inFile.find(
  925. (n) =>
  926. n.kind === 'field' &&
  927. n.name === receiverName &&
  928. n.language === ref.language &&
  929. n.startLine >= enclosing.startLine &&
  930. (n.endLine ?? n.startLine) <= enclosingEnd,
  931. );
  932. if (!field || !field.signature) return null;
  933. // Signature shape: "<TypeName> <fieldName>" (extractField). Pull the type,
  934. // strip generics + dotted package, drop array/varargs markers.
  935. const beforeName = field.signature.slice(
  936. 0,
  937. field.signature.lastIndexOf(field.name),
  938. );
  939. const typeRaw = beforeName.trim();
  940. if (!typeRaw) return null;
  941. const typeNoGenerics = typeRaw.replace(/<[^>]*>/g, '').trim();
  942. const typeNoArray = typeNoGenerics.replace(/\[\s*\]/g, '').replace(/\.\.\.$/, '').trim();
  943. const parts = typeNoArray.split(/[.\s]+/).filter(Boolean);
  944. const lastPart = parts[parts.length - 1];
  945. if (!lastPart) return null;
  946. if (!/^[A-Z]/.test(lastPart)) return null; // primitives / lowercase → skip
  947. return lastPart;
  948. }
  949. // ── Local-variable receiver-type inference (#1108) ──────────────────────────
  950. //
  951. // Instance calls through a local variable (`const lg = new Logger(); lg.log()`)
  952. // only resolved in C++ before this — no other language could learn the
  953. // receiver's type. Local variables are not indexed as nodes (node-explosion),
  954. // so, like the C++ inferrer above, we read the enclosing function's source and
  955. // match the receiver's declaration/initializer to recover its type. The type is
  956. // then handed to resolveMethodOnType, which VALIDATES that the type actually
  957. // declares the method, so a mis-inference produces NO edge — the safety net
  958. // that lets the patterns below stay simple. C++ keeps its dedicated inferrer
  959. // (header scan + `auto`); this covers every other language.
  960. // Tokens a loose pattern might capture that are never a user-defined type.
  961. const NON_TYPE_RECEIVER_TOKENS = new Set([
  962. 'this', 'self', 'super', 'new', 'return', 'await', 'yield', 'typeof',
  963. 'null', 'nil', 'None', 'true', 'false', 'True', 'False', 'undefined',
  964. ]);
  965. /**
  966. * Normalize a captured type expression to a simple type name: drop generic
  967. * args and pointer/ref markers, take the last `.`/`::`-qualified segment, and
  968. * reject obvious non-types.
  969. */
  970. function normalizeInferredTypeName(raw: string): string | null {
  971. const cleaned = raw.replace(/<[^>]*>/g, '').replace(/[&*]/g, '').trim();
  972. const seg = cleaned.split(/[.:]+/).filter(Boolean).pop();
  973. if (!seg) return null;
  974. if (NON_TYPE_RECEIVER_TOKENS.has(seg)) return null;
  975. return seg;
  976. }
  977. /**
  978. * Per-language patterns that recover a local variable's (or typed parameter's)
  979. * type from its declaration/initializer. Each regex captures the type in group
  980. * 1; `r` is the already-escaped receiver name. Ordered most-specific first.
  981. * PascalCase is required in the capture where the language convention allows,
  982. * as a cheap false-positive guard on top of resolveMethodOnType's validation.
  983. */
  984. function localReceiverTypePatterns(language: Language, r: string): RegExp[] {
  985. switch (language) {
  986. case 'typescript':
  987. case 'javascript':
  988. case 'tsx':
  989. case 'jsx':
  990. return [
  991. new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_$][\\w.$]*)`), // = new Logger()
  992. new RegExp(`\\b(?:const|let|var)\\s+${r}\\s*:\\s*([A-Z][\\w.$]*)`), // lg: Logger
  993. ];
  994. case 'python':
  995. return [
  996. new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // lg = Logger(...)
  997. new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // lg: Logger (PEP 526)
  998. ];
  999. case 'java':
  1000. return [
  1001. new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_][\\w.]*)`), // = new Logger()
  1002. new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;,)]`), // Logger lg; / param
  1003. ];
  1004. case 'kotlin':
  1005. return [
  1006. new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // val lg = Logger(...)
  1007. new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // val lg: Logger / param
  1008. ];
  1009. case 'csharp':
  1010. return [
  1011. new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_][\\w.]*)`), // = new Logger()
  1012. new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;,)]`), // Logger lg; / param
  1013. ];
  1014. case 'swift':
  1015. return [
  1016. new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // let lg = Logger(...)
  1017. new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // let lg: Logger / param
  1018. ];
  1019. case 'rust':
  1020. return [
  1021. new RegExp(`\\blet\\s+(?:mut\\s+)?${r}\\b(?:\\s*:[^=]+)?=\\s*&?(?:mut\\s+)?([A-Z][\\w]*)`), // let lg = Logger::new()/Logger{}/Logger
  1022. new RegExp(`\\blet\\s+(?:mut\\s+)?${r}\\s*:\\s*&?(?:mut\\s+)?([A-Z][\\w]*)`), // let lg: Logger
  1023. ];
  1024. case 'go':
  1025. return [
  1026. new RegExp(`\\b${r}\\b\\s*:=\\s*&?([A-Za-z_][\\w.]*)\\s*{`), // lg := Logger{} / &Logger{}
  1027. new RegExp(`\\bvar\\s+${r}\\s+\\*?([A-Za-z_][\\w.]*)`), // var lg Logger / *Logger
  1028. ];
  1029. case 'ruby':
  1030. return [
  1031. new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w:]*)\\.new\\b`), // lg = Logger.new
  1032. ];
  1033. case 'scala':
  1034. return [
  1035. new RegExp(`\\b${r}\\b\\s*=\\s*(?:new\\s+)?([A-Z][\\w.]*)`), // val lg = new Logger / Logger(...)
  1036. new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // val lg: Logger / param
  1037. ];
  1038. case 'dart':
  1039. return [
  1040. new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // var lg = Logger(...)
  1041. new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;]`), // Logger lg = ...
  1042. ];
  1043. case 'php':
  1044. return [
  1045. new RegExp(`\\$?${r}\\b\\s*=\\s*new\\s+([A-Za-z_\\\\][\\w\\\\]*)`), // $lg = new Logger()
  1046. ];
  1047. default:
  1048. return [];
  1049. }
  1050. }
  1051. /** 1-based start line of the tightest function/method enclosing the call. */
  1052. function enclosingScopeStartLine(ref: UnresolvedRef, context: ResolutionContext): number {
  1053. let start = 1;
  1054. for (const n of context.getNodesInFile(ref.filePath)) {
  1055. if (n.kind !== 'function' && n.kind !== 'method') continue;
  1056. if (n.language !== ref.language) continue;
  1057. const end = n.endLine ?? n.startLine;
  1058. if (n.startLine <= ref.line && end >= ref.line && n.startLine >= start) {
  1059. start = n.startLine;
  1060. }
  1061. }
  1062. return start;
  1063. }
  1064. /**
  1065. * Infer a receiver's type from its local declaration/initializer in the
  1066. * enclosing function body. Language-dispatched; returns null for languages
  1067. * without patterns or when no declaration is found. Bounded to the enclosing
  1068. * scope so a same-named variable in another function can't leak in.
  1069. */
  1070. function inferLocalReceiverType(
  1071. receiverName: string,
  1072. ref: UnresolvedRef,
  1073. context: ResolutionContext,
  1074. ): string | null {
  1075. const patterns = localReceiverTypePatterns(
  1076. ref.language,
  1077. receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'),
  1078. );
  1079. if (patterns.length === 0) return null;
  1080. const source = context.readFile(ref.filePath);
  1081. if (!source) return null;
  1082. const lines = source.split(/\r?\n/);
  1083. const callIdx = Math.max(0, Math.min(lines.length - 1, ref.line - 1));
  1084. const startIdx = Math.max(0, enclosingScopeStartLine(ref, context) - 1);
  1085. // Nearest declaration wins: scan backward from the call to the scope start.
  1086. for (let i = callIdx; i >= startIdx; i--) {
  1087. const line = lines[i];
  1088. if (!line) continue;
  1089. for (const re of patterns) {
  1090. const m = line.match(re);
  1091. if (m && m[1]) {
  1092. const type = normalizeInferredTypeName(m[1]);
  1093. if (type) return type;
  1094. }
  1095. }
  1096. }
  1097. return null;
  1098. }
  1099. /**
  1100. * Try to resolve by method name on a class/object
  1101. */
  1102. export function matchMethodCall(
  1103. ref: UnresolvedRef,
  1104. context: ResolutionContext
  1105. ): ResolvedRef | null {
  1106. // Parse method call patterns like "obj.method" or "Class::method". The method
  1107. // part allows trailing `:` keywords so Objective-C selectors resolve
  1108. // (`SDImageCache.storeImage:`, `obj.setX:y:`); colons never appear in other
  1109. // languages' method refs, so this is a no-op for them.
  1110. // The receiver allows dots (`builder.Services.AddCoreServices`) so a CHAINED
  1111. // call resolves by its last segment — Strategy 3 below name-matches the method
  1112. // (with its existing single-candidate / receiver-overlap guards). Without this
  1113. // a multi-dot extension-method call (C# DI `builder.Services.AddCoreServices()`,
  1114. // `Guard.Against.X()`) matched no pattern and never resolved.
  1115. const dotMatch = ref.referenceName.match(/^([\w.]+)\.(\w+:?(?:\w+:)*)$/);
  1116. const colonMatch = ref.referenceName.match(/^(\w+)::(\w+)$/);
  1117. const match = dotMatch || colonMatch;
  1118. if (!match) {
  1119. return null;
  1120. }
  1121. const [, objectOrClass, methodName] = match;
  1122. // Infer the receiver's type from its local declaration/initializer in the
  1123. // enclosing scope, then resolve the method on that type (#1108). C++ keeps its
  1124. // dedicated inferrer (header scan + `auto`); every other language uses the
  1125. // shared source-based inferrer. resolveMethodOnType validates the method
  1126. // exists on the inferred type, so a mis-inference produces no edge.
  1127. if (dotMatch) {
  1128. const inferredType =
  1129. ref.language === 'cpp'
  1130. ? inferCppReceiverType(objectOrClass!, ref, context)
  1131. : inferLocalReceiverType(objectOrClass!, ref, context);
  1132. if (inferredType) {
  1133. // Java/Kotlin: when two classes share the simple name, the file's import
  1134. // pins WHICH one (#314). Other languages disambiguate by call-site file.
  1135. const importedFqn =
  1136. ref.language === 'java' || ref.language === 'kotlin'
  1137. ? context
  1138. .getImportMappings(ref.filePath, ref.language)
  1139. .find((i) => i.localName === inferredType)?.source
  1140. : undefined;
  1141. const typedMatch = resolveMethodOnType(
  1142. inferredType,
  1143. methodName!,
  1144. ref,
  1145. context,
  1146. 0.9,
  1147. 'instance-method',
  1148. importedFqn,
  1149. );
  1150. if (typedMatch) {
  1151. return typedMatch;
  1152. }
  1153. }
  1154. }
  1155. // Java/Kotlin: receiver may be a field whose name doesn't match the type by
  1156. // Java naming convention (`userbo` → class `UserBO`, abbreviated). Look up
  1157. // the field in the enclosing class to get its declared type, then resolve
  1158. // the method on that type. Covers Spring `@Resource`/`@Autowired` field
  1159. // injection where the field type is the concrete bean class.
  1160. if ((ref.language === 'java' || ref.language === 'kotlin') && dotMatch) {
  1161. const inferredType = inferJavaFieldReceiverType(objectOrClass!, ref, context);
  1162. if (inferredType) {
  1163. // When two classes share the same simple name, the caller file's
  1164. // import is the only signal that names WHICH one — pass the
  1165. // imported FQN so resolveMethodOnType can disambiguate (#314).
  1166. const imports = context.getImportMappings(ref.filePath, ref.language);
  1167. const importedFqn = imports.find((i) => i.localName === inferredType)?.source;
  1168. const typedMatch = resolveMethodOnType(
  1169. inferredType,
  1170. methodName!,
  1171. ref,
  1172. context,
  1173. 0.9,
  1174. 'instance-method',
  1175. importedFqn,
  1176. );
  1177. if (typedMatch) {
  1178. return typedMatch;
  1179. }
  1180. }
  1181. }
  1182. // Strategy 1: Direct class name match (existing logic). When the receiver
  1183. // names a class that exists in several files (`Logger.log()` / `Logger::log()`
  1184. // with a `Logger` in both `a/` and `b/`), try the class in the call site's
  1185. // own file first — otherwise the first-indexed class wins and a call in `b/`
  1186. // resolves to `a/`'s method (#1079).
  1187. const classCandidates = preferCallSiteFile(
  1188. context.getNodesByName(objectOrClass!),
  1189. ref.filePath,
  1190. );
  1191. for (const classNode of classCandidates) {
  1192. if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
  1193. // Skip cross-language class matches
  1194. if (classNode.language !== ref.language) continue;
  1195. const nodesInFile = context.getNodesInFile(classNode.filePath);
  1196. const methodNode = nodesInFile.find(
  1197. (n) =>
  1198. n.kind === 'method' &&
  1199. n.name === methodName &&
  1200. n.qualifiedName.includes(classNode.name)
  1201. );
  1202. if (methodNode) {
  1203. return {
  1204. original: ref,
  1205. targetNodeId: methodNode.id,
  1206. confidence: 0.85,
  1207. resolvedBy: 'qualified-name',
  1208. };
  1209. }
  1210. }
  1211. }
  1212. // Strategy 2: Instance variable receiver - try capitalized form to find class
  1213. // e.g., "permissionEngine" → look for classes containing "PermissionEngine"
  1214. const capitalizedReceiver = objectOrClass!.charAt(0).toUpperCase() + objectOrClass!.slice(1);
  1215. if (capitalizedReceiver !== objectOrClass) {
  1216. const fuzzyClassCandidates = preferCallSiteFile(
  1217. context.getNodesByName(capitalizedReceiver),
  1218. ref.filePath,
  1219. );
  1220. for (const classNode of fuzzyClassCandidates) {
  1221. if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
  1222. // Skip cross-language class matches
  1223. if (classNode.language !== ref.language) continue;
  1224. const nodesInFile = context.getNodesInFile(classNode.filePath);
  1225. const methodNode = nodesInFile.find(
  1226. (n) =>
  1227. n.kind === 'method' &&
  1228. n.name === methodName &&
  1229. n.qualifiedName.includes(classNode.name)
  1230. );
  1231. if (methodNode) {
  1232. return {
  1233. original: ref,
  1234. targetNodeId: methodNode.id,
  1235. confidence: 0.8,
  1236. resolvedBy: 'instance-method',
  1237. };
  1238. }
  1239. }
  1240. }
  1241. }
  1242. // Strategy 3: Find methods by name across the codebase, match by receiver
  1243. // name similarity with the containing class. Handles abbreviated variable
  1244. // names like permissionEngine → PermissionRuleEngine.
  1245. if (methodName) {
  1246. const methodCandidates = context.getNodesByName(methodName!);
  1247. // Ubiquitous-method ceiling (#999): a method name re-declared across a
  1248. // vendored theme/SDK (Metronic's `init`/`update`/… on every widget) yields
  1249. // K candidates that receiver-word overlap can't reliably disambiguate —
  1250. // and filtering + scoring all K per call is the O(K²) cost that wedged
  1251. // "Resolving refs" for 15-28 min. Bail before the O(K) work; Strategy 1/2
  1252. // (class-name match) already had their precise shot above.
  1253. if (methodCandidates.length > AMBIGUOUS_NAME_CEILING) {
  1254. return null;
  1255. }
  1256. const methods = methodCandidates.filter(
  1257. (n) => n.kind === 'method' && n.name === methodName
  1258. );
  1259. // Filter to same-language candidates first
  1260. const sameLanguageMethods = methods.filter(m => m.language === ref.language);
  1261. const targetMethods = sameLanguageMethods.length > 0 ? sameLanguageMethods : methods;
  1262. // If only one same-language method with this name exists, use it
  1263. if (targetMethods.length === 1 && targetMethods[0]!.language === ref.language) {
  1264. return {
  1265. original: ref,
  1266. targetNodeId: targetMethods[0]!.id,
  1267. confidence: 0.7,
  1268. resolvedBy: 'instance-method',
  1269. };
  1270. }
  1271. // Multiple methods: score by receiver name word overlap with class name
  1272. if (targetMethods.length > 1) {
  1273. const receiverWords = splitCamelCase(objectOrClass!);
  1274. let bestMatch: typeof targetMethods[0] | undefined;
  1275. let bestScore = 0;
  1276. // Same-file candidates first, so a score tie (`score > bestScore` keeps
  1277. // the first seen) resolves to the call site's own file rather than the
  1278. // first-indexed duplicate (#1079).
  1279. for (const method of preferCallSiteFile(targetMethods, ref.filePath)) {
  1280. const classWords = splitCamelCase(method.qualifiedName);
  1281. let score = receiverWords.filter(w =>
  1282. classWords.some(cw => cw.toLowerCase() === w.toLowerCase())
  1283. ).length;
  1284. // Bonus for same language
  1285. if (method.language === ref.language) score += 1;
  1286. if (score > bestScore) {
  1287. bestScore = score;
  1288. bestMatch = method;
  1289. }
  1290. }
  1291. if (bestMatch && bestScore >= 2) {
  1292. return {
  1293. original: ref,
  1294. targetNodeId: bestMatch.id,
  1295. confidence: 0.65,
  1296. resolvedBy: 'instance-method',
  1297. };
  1298. }
  1299. }
  1300. }
  1301. return null;
  1302. }
  1303. /**
  1304. * Split a camelCase or PascalCase string into words.
  1305. */
  1306. function splitCamelCase(str: string): string[] {
  1307. return str.replace(/([a-z])([A-Z])/g, '$1 $2')
  1308. .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
  1309. .split(/[\s._:\/\\]+/)
  1310. .filter(w => w.length > 1);
  1311. }
  1312. /**
  1313. * Compute directory proximity from a pre-split list of directory segments
  1314. * (`filePath1` minus its filename) and a second file path.
  1315. * Returns a score based on the number of shared leading directory segments.
  1316. * Higher score = closer in directory tree.
  1317. *
  1318. * Split into a pre-split variant because findBestMatch scores every candidate
  1319. * against the SAME `ref.filePath`; re-splitting it per candidate was a hot spot
  1320. * on large repos (#915), so the caller splits it once and passes the segments.
  1321. */
  1322. function pathProximityFromDirs(dir1: string[], filePath2: string): number {
  1323. const dir2 = filePath2.split('/');
  1324. dir2.pop(); // drop filename — matches the original slice(0, -1) on both paths
  1325. let shared = 0;
  1326. const limit = Math.min(dir1.length, dir2.length);
  1327. for (let i = 0; i < limit; i++) {
  1328. if (dir1[i] === dir2[i]) {
  1329. shared++;
  1330. } else {
  1331. break;
  1332. }
  1333. }
  1334. // Each shared directory segment contributes 15 points, capped at 80
  1335. return Math.min(shared * 15, 80);
  1336. }
  1337. /**
  1338. * Compute directory proximity between two file paths.
  1339. * Returns a score based on the number of shared directory segments.
  1340. */
  1341. function computePathProximity(filePath1: string, filePath2: string): number {
  1342. const dir1 = filePath1.split('/');
  1343. dir1.pop();
  1344. return pathProximityFromDirs(dir1, filePath2);
  1345. }
  1346. /**
  1347. * Find the best matching node when there are multiple candidates
  1348. */
  1349. function findBestMatch(
  1350. ref: UnresolvedRef,
  1351. candidates: Node[],
  1352. _context: ResolutionContext
  1353. ): Node | null {
  1354. // Prioritization rules:
  1355. // 1. Same file > different file
  1356. // 2. Directory proximity (same module/package > different module)
  1357. // 3. Same language > different language
  1358. // 4. Functions/methods > classes/types (for call references)
  1359. // 5. Exported > non-exported
  1360. let bestScore = -1;
  1361. let bestNode: Node | null = null;
  1362. // Split the ref's path once (it's the same across every candidate) instead of
  1363. // re-splitting it inside computePathProximity per candidate (#915 hot spot).
  1364. const refDirs = ref.filePath.split('/');
  1365. refDirs.pop();
  1366. // A same-language candidate ALWAYS outscores a cross-language one: same-language
  1367. // scores at least +50 (language bonus), while a cross-language candidate maxes
  1368. // out at +35 (−80 language, +80 proximity, +25 kind, +10 exported; it can never
  1369. // be in the same file). So when any same-language candidate exists, skip the
  1370. // cross-language ones — provably the same winner, without paying the per-candidate
  1371. // scoring. Cuts the candidate set to same-language size on mixed front-end +
  1372. // back-end repos (#915). When ALL candidates are cross-language (a legitimate
  1373. // cross-language `calls` bridge), none are skipped and behavior is unchanged.
  1374. const hasSameLanguage = candidates.some((c) => c.language === ref.language);
  1375. for (const candidate of candidates) {
  1376. if (hasSameLanguage && candidate.language !== ref.language) continue;
  1377. let score = 0;
  1378. // Same file bonus
  1379. if (candidate.filePath === ref.filePath) {
  1380. score += 100;
  1381. }
  1382. // Directory proximity bonus — strongly prefer same module/package
  1383. score += pathProximityFromDirs(refDirs, candidate.filePath);
  1384. // Language matching: strongly prefer same language, penalize cross-language
  1385. if (candidate.language === ref.language) {
  1386. score += 50;
  1387. } else {
  1388. score -= 80;
  1389. }
  1390. // For call references, prefer functions/methods
  1391. if (ref.referenceKind === 'calls') {
  1392. if (candidate.kind === 'function' || candidate.kind === 'method') {
  1393. score += 25;
  1394. }
  1395. }
  1396. // For instantiation references (`new Foo()`), prefer class-like
  1397. // targets — without this, a function named `Foo` in another module
  1398. // could outscore the actual class.
  1399. if (ref.referenceKind === 'instantiates') {
  1400. if (
  1401. candidate.kind === 'class' ||
  1402. candidate.kind === 'struct' ||
  1403. candidate.kind === 'interface'
  1404. ) {
  1405. score += 25;
  1406. }
  1407. }
  1408. // For decorator references (`@Foo`), prefer functions. Class
  1409. // decorators (Python `@SomeClass`, Java annotation interfaces)
  1410. // also resolve here, hence the smaller class bonus.
  1411. if (ref.referenceKind === 'decorates') {
  1412. if (candidate.kind === 'function' || candidate.kind === 'method') {
  1413. score += 25;
  1414. } else if (candidate.kind === 'class' || candidate.kind === 'interface') {
  1415. score += 15;
  1416. }
  1417. }
  1418. // Exported bonus
  1419. if (candidate.isExported) {
  1420. score += 10;
  1421. }
  1422. // Closer line number (within same file)
  1423. if (candidate.filePath === ref.filePath && candidate.startLine) {
  1424. const distance = Math.abs(candidate.startLine - ref.line);
  1425. score += Math.max(0, 20 - distance / 10);
  1426. }
  1427. if (score > bestScore) {
  1428. bestScore = score;
  1429. bestNode = candidate;
  1430. }
  1431. }
  1432. return bestNode;
  1433. }
  1434. /**
  1435. * Fuzzy match - last resort with lower confidence
  1436. */
  1437. export function matchFuzzy(
  1438. ref: UnresolvedRef,
  1439. context: ResolutionContext
  1440. ): ResolvedRef | null {
  1441. const lowerName = ref.referenceName.toLowerCase();
  1442. // Use pre-built lowercase index for O(1) lookup instead of scanning all nodes
  1443. const candidates = context.getNodesByLowerName(lowerName);
  1444. // Filter to callable kinds only (function, method, class)
  1445. const callableKinds = new Set(['function', 'method', 'class']);
  1446. const callableCandidates = applyLanguageGate(candidates.filter((n) => callableKinds.has(n.kind)), ref);
  1447. // Prefer same-language matches
  1448. const sameLanguageCandidates = callableCandidates.filter(n => n.language === ref.language);
  1449. const finalCandidates = sameLanguageCandidates.length > 0 ? sameLanguageCandidates : callableCandidates;
  1450. if (finalCandidates.length === 1) {
  1451. const isCrossLanguage = finalCandidates[0]!.language !== ref.language;
  1452. return {
  1453. original: ref,
  1454. targetNodeId: finalCandidates[0]!.id,
  1455. confidence: isCrossLanguage ? 0.3 : 0.5,
  1456. resolvedBy: 'fuzzy',
  1457. };
  1458. }
  1459. return null;
  1460. }
  1461. /**
  1462. * Match all strategies in order of confidence
  1463. */
  1464. export function matchReference(
  1465. ref: UnresolvedRef,
  1466. context: ResolutionContext
  1467. ): ResolvedRef | null {
  1468. // Function-as-value refs (#756) resolve ONLY through the dedicated matcher —
  1469. // never the fuzzy/qualified fallthrough below (a wrong callback edge is
  1470. // worse than none).
  1471. if (ref.referenceKind === 'function_ref') {
  1472. return matchFunctionRef(ref, context);
  1473. }
  1474. // Try strategies in order of confidence
  1475. let result: ResolvedRef | null;
  1476. // 0. File path match (e.g., "snippets/drawer-menu.liquid" → file node)
  1477. result = matchByFilePath(ref, context);
  1478. if (result) return result;
  1479. // 1. Qualified name match (highest confidence)
  1480. result = matchByQualifiedName(ref, context);
  1481. if (result) return result;
  1482. // 1b. C++ chained call whose receiver is another call — `Foo::instance().bar()`
  1483. // encoded as `Foo::instance().bar` by the extractor (#645). Resolve the
  1484. // receiver's type from what the inner call returns, then the method on it.
  1485. if (ref.language === 'cpp' || ref.language === 'c') {
  1486. result = matchCppCallChain(ref, context);
  1487. if (result) return result;
  1488. }
  1489. // 1c. `::`-scoped factory chain — PHP `Cls::for($x)->method()` (#608) or Rust
  1490. // `Foo::new().bar()`, both encoded as `Cls::factory().method`. The receiver's
  1491. // type is the factory's `self` (PHP `: self`/`: static`, Rust `-> Self`) or
  1492. // concrete return type.
  1493. if (ref.language === 'php' || ref.language === 'rust') {
  1494. result = matchScopedCallChain(ref, context);
  1495. if (result) return result;
  1496. }
  1497. // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C# / Swift /
  1498. // Go / Scala / Dart / Objective-C) — `Foo.getInstance().bar()` encoded as
  1499. // `Foo.getInstance().bar`, Go's bare-factory `New().Method()` as `New().Method`,
  1500. // Scala's companion factory, Dart's static factory / factory-constructor, or
  1501. // ObjC's chained message send `[[Foo create] doIt]` encoded as `Foo.create().doIt`
  1502. // (#645/#608 mechanism). Resolve the method's class from the inner call's
  1503. // declared return type, then validate it.
  1504. if (
  1505. ref.language === 'java' ||
  1506. ref.language === 'kotlin' ||
  1507. ref.language === 'csharp' ||
  1508. ref.language === 'swift' ||
  1509. ref.language === 'go' ||
  1510. ref.language === 'scala' ||
  1511. ref.language === 'dart' ||
  1512. ref.language === 'objc' ||
  1513. ref.language === 'pascal'
  1514. ) {
  1515. result = matchDottedCallChain(ref, context);
  1516. if (result) return result;
  1517. }
  1518. // 2. Method call pattern
  1519. result = matchMethodCall(ref, context);
  1520. if (result) return result;
  1521. // 3. Exact name match
  1522. result = matchByExactName(ref, context);
  1523. if (result) return result;
  1524. // 4. Fuzzy match (lowest confidence)
  1525. result = matchFuzzy(ref, context);
  1526. if (result) return result;
  1527. return null;
  1528. }