name-matcher.ts 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106
  1. /**
  2. * Name Matcher
  3. *
  4. * Handles symbol name matching for reference resolution.
  5. */
  6. import { Node } from '../types';
  7. import { UnresolvedRef, ResolvedRef, ResolutionContext } from './types';
  8. /**
  9. * Try to resolve a path-like reference (e.g., "snippets/drawer-menu.liquid")
  10. * by matching the filename against file nodes.
  11. */
  12. export function matchByFilePath(
  13. ref: UnresolvedRef,
  14. context: ResolutionContext
  15. ): ResolvedRef | null {
  16. // Path-like (`a/b.liquid`) OR a bare filename ending in a short extension
  17. // (`Foo.h` — an Objective-C `#import "Foo.h"`, resolved to the header by
  18. // basename). A bare ref WITHOUT an extension is a symbol name, not a file, so
  19. // leave it to the symbol-matching strategies.
  20. if (!ref.referenceName.includes('/') && !/\.[A-Za-z][A-Za-z0-9]{0,3}$/.test(ref.referenceName)) {
  21. return null;
  22. }
  23. // Extract the filename from the path
  24. const fileName = ref.referenceName.split('/').pop();
  25. if (!fileName) return null;
  26. // Search for file nodes with this name
  27. const candidates = context.getNodesByName(fileName);
  28. const fileNodes = candidates.filter(n => n.kind === 'file');
  29. if (fileNodes.length === 0) return null;
  30. // Prefer exact path match on qualified_name
  31. const exactMatch = fileNodes.find(n => n.qualifiedName === ref.referenceName || n.filePath === ref.referenceName);
  32. if (exactMatch) {
  33. return {
  34. original: ref,
  35. targetNodeId: exactMatch.id,
  36. confidence: 0.95,
  37. resolvedBy: 'file-path',
  38. };
  39. }
  40. // Fall back to suffix match (e.g., ref="snippets/foo.liquid" matches
  41. // "src/snippets/foo.liquid"). When several files share the basename — a
  42. // `#include "RNCAsyncStorage.h"` with a same-named header on another platform
  43. // (windows/code/ vs apple/) — prefer the one in the includer's own directory,
  44. // then by directory proximity / same language family. A C/C++ include (and any
  45. // bare-filename import) resolves relative to the including file, not to an
  46. // arbitrary same-named header elsewhere in the tree.
  47. const suffixMatches = fileNodes.filter(
  48. n => n.qualifiedName.endsWith(ref.referenceName) || n.filePath.endsWith(ref.referenceName)
  49. );
  50. if (suffixMatches.length > 0) {
  51. return {
  52. original: ref,
  53. targetNodeId: pickClosestFileNode(suffixMatches, ref).id,
  54. confidence: 0.85,
  55. resolvedBy: 'file-path',
  56. };
  57. }
  58. // If only one file node with this name, use it with lower confidence
  59. if (fileNodes.length === 1) {
  60. return {
  61. original: ref,
  62. targetNodeId: fileNodes[0]!.id,
  63. confidence: 0.7,
  64. resolvedBy: 'file-path',
  65. };
  66. }
  67. return null;
  68. }
  69. /**
  70. * Among several file nodes that all match a bare include/import by basename,
  71. * pick the one closest to the referencing file: same directory first, then by
  72. * directory-tree proximity, with the same language family as a tiebreak. A
  73. * C/C++ `#include "X.h"` (and any bare-filename import) resolves relative to the
  74. * including file — not to an arbitrary same-named header on another platform.
  75. */
  76. function pickClosestFileNode(candidates: Node[], ref: UnresolvedRef): Node {
  77. const dirOf = (p: string): string => {
  78. const i = p.lastIndexOf('/');
  79. return i >= 0 ? p.slice(0, i) : '';
  80. };
  81. const refDir = dirOf(ref.filePath);
  82. const sameDir = candidates.filter((c) => dirOf(c.filePath) === refDir);
  83. const pool = sameDir.length > 0 ? sameDir : candidates;
  84. let best = pool[0]!;
  85. let bestScore = -Infinity;
  86. for (const c of pool) {
  87. const score =
  88. computePathProximity(ref.filePath, c.filePath) +
  89. (sameLanguageFamily(c.language, ref.language) ? 5 : 0);
  90. if (score > bestScore) {
  91. bestScore = score;
  92. best = c;
  93. }
  94. }
  95. return best;
  96. }
  97. /**
  98. * Language families that share a type system / runtime, so a same-language-only
  99. * reference may still resolve across them (a Kotlin `Foo.BAR` can name a Java
  100. * `Foo`). Anything not listed forms its own singleton family.
  101. */
  102. const LANGUAGE_FAMILY: Record<string, string> = {
  103. java: 'jvm', kotlin: 'jvm', scala: 'jvm',
  104. swift: 'apple', objc: 'apple',
  105. typescript: 'web', tsx: 'web', javascript: 'web', jsx: 'web',
  106. c: 'c', cpp: 'c',
  107. // Razor/Blazor markup names C# types — same family so `@model Foo` /
  108. // `<MyComponent/>` resolve to their `.cs` class through the cross-family gate.
  109. csharp: 'dotnet', razor: 'dotnet',
  110. };
  111. export function sameLanguageFamily(a: string, b: string): boolean {
  112. if (a === b) return true;
  113. const fa = LANGUAGE_FAMILY[a];
  114. return fa !== undefined && fa === LANGUAGE_FAMILY[b];
  115. }
  116. /**
  117. * True when `lang` belongs to a known multi-language family (jvm/apple/web/c).
  118. * Languages not listed (php, python, go, ruby, rust, dart, …) and config
  119. * formats (yaml/xml/blade) form their own singleton families and return
  120. * `false` — used to leave config↔code framework bridges (whose config side is
  121. * never a known programming-language family) out of the cross-family gate.
  122. */
  123. export function isKnownLanguageFamily(lang: string): boolean {
  124. return LANGUAGE_FAMILY[lang] !== undefined;
  125. }
  126. /**
  127. * True when `a` and `b` are two DIFFERENT *known* language families — the
  128. * signature of a coincidental cross-language name collision (a TS `import
  129. * React` matching a Swift `import React`, a C++ `#include "X.h"` matching a
  130. * same-named ObjC header on another platform). The both-*known* test is
  131. * deliberately weaker than {@link sameLanguageFamily}'s negation: a
  132. * single-file-component language that carries its own tag (`vue`/`svelte`)
  133. * importing a `.ts` module, or any singleton-family language (php/go/ruby/…),
  134. * returns `false` here and is left alone.
  135. */
  136. export function crossesKnownFamily(a: string, b: string): boolean {
  137. return isKnownLanguageFamily(a) && isKnownLanguageFamily(b) && !sameLanguageFamily(a, b);
  138. }
  139. /**
  140. * Drop cross-language candidates from a name lookup. Two regimes:
  141. * - `references` (type-usage): a type named in language X resolves to a
  142. * SAME-family type, never a coincidentally same-named symbol in another
  143. * language (the Android `BatteryManager` system class vs a JS one). Strict
  144. * same-family filter — cross-language communication is `calls`, not refs.
  145. * - `imports` (import binding): an `import`/`#include` never crosses two
  146. * KNOWN families (TS `import React` ↮ Swift `import React`). Weaker
  147. * both-known filter so `.vue`/`.svelte` (own tag) importing `.ts` survives.
  148. */
  149. function applyLanguageGate(candidates: Node[], ref: UnresolvedRef): Node[] {
  150. if (ref.referenceKind === 'references') {
  151. return candidates.filter((c) => sameLanguageFamily(c.language, ref.language));
  152. }
  153. if (ref.referenceKind === 'imports') {
  154. return candidates.filter((c) => !crossesKnownFamily(c.language, ref.language));
  155. }
  156. return candidates;
  157. }
  158. /**
  159. * Try to resolve a reference by exact name match
  160. */
  161. export function matchByExactName(
  162. ref: UnresolvedRef,
  163. context: ResolutionContext
  164. ): ResolvedRef | null {
  165. const candidates = applyLanguageGate(context.getNodesByName(ref.referenceName), ref);
  166. if (candidates.length === 0) {
  167. return null;
  168. }
  169. // If only one match, use it — but penalize cross-language matches
  170. if (candidates.length === 1) {
  171. const isCrossLanguage = candidates[0]!.language !== ref.language;
  172. return {
  173. original: ref,
  174. targetNodeId: candidates[0]!.id,
  175. confidence: isCrossLanguage ? 0.5 : 0.9,
  176. resolvedBy: 'exact-match',
  177. };
  178. }
  179. // Multiple matches - try to narrow down
  180. const bestMatch = findBestMatch(ref, candidates, context);
  181. if (bestMatch) {
  182. // Lower confidence when the match is from a distant/unrelated module
  183. const proximity = computePathProximity(ref.filePath, bestMatch.filePath);
  184. const confidence = proximity >= 30 ? 0.7 : 0.4;
  185. return {
  186. original: ref,
  187. targetNodeId: bestMatch.id,
  188. confidence,
  189. resolvedBy: 'exact-match',
  190. };
  191. }
  192. return null;
  193. }
  194. /**
  195. * Try to resolve by qualified name
  196. */
  197. export function matchByQualifiedName(
  198. ref: UnresolvedRef,
  199. context: ResolutionContext
  200. ): ResolvedRef | null {
  201. // Check if the reference name looks qualified (contains :: or .)
  202. if (!ref.referenceName.includes('::') && !ref.referenceName.includes('.')) {
  203. return null;
  204. }
  205. const candidates = context.getNodesByQualifiedName(ref.referenceName);
  206. if (candidates.length === 1) {
  207. return {
  208. original: ref,
  209. targetNodeId: candidates[0]!.id,
  210. confidence: 0.95,
  211. resolvedBy: 'qualified-name',
  212. };
  213. }
  214. // Try partial qualified name match
  215. const parts = ref.referenceName.split(/[:.]/);
  216. const lastName = parts[parts.length - 1];
  217. if (lastName) {
  218. const partialCandidates = context.getNodesByName(lastName);
  219. for (const candidate of partialCandidates) {
  220. if (candidate.qualifiedName.endsWith(ref.referenceName)) {
  221. return {
  222. original: ref,
  223. targetNodeId: candidate.id,
  224. confidence: 0.85,
  225. resolvedBy: 'qualified-name',
  226. };
  227. }
  228. }
  229. }
  230. return null;
  231. }
  232. function resolveMethodOnType(
  233. typeName: string,
  234. methodName: string,
  235. ref: UnresolvedRef,
  236. context: ResolutionContext,
  237. confidence: number,
  238. resolvedBy: ResolvedRef['resolvedBy'],
  239. /**
  240. * Optional FQN that identifies WHICH class declaration `typeName`
  241. * refers to in the caller's file. When multiple candidates share
  242. * the same qualifiedName (`FooConverter::convert` in both
  243. * `dao/converter/` and `service/converter/`), the FQN's
  244. * file-path-suffix picks the right one — the disambiguation
  245. * signal Java imports carry but the call site doesn't (#314).
  246. */
  247. preferredFqn?: string,
  248. /** Recursion guard for the supertype/conformance walk. */
  249. depth = 0,
  250. ): ResolvedRef | null {
  251. // Look up methods by name and match by qualifiedName ending in
  252. // `<typeName>::<methodName>`. This works whether the method is defined
  253. // in-class (`class Foo { int bar() { ... } }`) or out-of-line in a separate
  254. // file (`int Foo::bar() { ... }` in foo.cpp while class Foo is in foo.hpp).
  255. // The previous same-file approach missed the latter — the typical C++ layout.
  256. const methodCandidates = context.getNodesByName(methodName);
  257. const want = `${typeName}::${methodName}`;
  258. const matches: Node[] = [];
  259. for (const m of methodCandidates) {
  260. if (m.kind !== 'method') continue;
  261. if (m.language !== ref.language) continue;
  262. const qn = m.qualifiedName;
  263. if (qn === want || qn.endsWith(`::${want}`)) {
  264. matches.push(m);
  265. }
  266. }
  267. if (matches.length === 0) {
  268. // Conformance fallback: the method may be defined on a supertype `typeName`
  269. // extends, or on a protocol / trait it conforms to (e.g. a Swift protocol-
  270. // extension method, a C# default-interface or extension method, a Kotlin
  271. // extension on a supertype). Walk supertypes transitively (depth-capped) via
  272. // the resolved implements/extends edges — empty in the first resolution pass,
  273. // populated in the conformance pass. Still VALIDATED (the method must exist on
  274. // a supertype), so a wrong inference produces no edge.
  275. if (depth < 4 && context.getSupertypes) {
  276. for (const supertype of context.getSupertypes(typeName, ref.language)) {
  277. const via = resolveMethodOnType(
  278. supertype, methodName, ref, context, confidence, resolvedBy, preferredFqn, depth + 1,
  279. );
  280. if (via) return via;
  281. }
  282. }
  283. return null;
  284. }
  285. if (matches.length > 1 && preferredFqn) {
  286. const ext = ref.language === 'kotlin' ? '.kt' : '.java';
  287. const fqnPath = preferredFqn.replace(/\./g, '/') + ext;
  288. const chosen = matches.find((m) => {
  289. const fp = m.filePath.replace(/\\/g, '/');
  290. return fp.endsWith(fqnPath) || fp.endsWith('/' + fqnPath);
  291. });
  292. if (chosen) {
  293. return {
  294. original: ref,
  295. targetNodeId: chosen.id,
  296. confidence,
  297. resolvedBy,
  298. };
  299. }
  300. }
  301. return {
  302. original: ref,
  303. targetNodeId: matches[0]!.id,
  304. confidence,
  305. resolvedBy,
  306. };
  307. }
  308. // C++ keywords/control-flow tokens that can appear right before a receiver
  309. // (e.g. `return ptr->m()`) and must NOT be treated as a type.
  310. const CPP_NON_TYPE_TOKENS = new Set([
  311. 'return', 'if', 'else', 'for', 'while', 'do', 'switch', 'case', 'default',
  312. 'break', 'continue', 'goto', 'throw', 'new', 'delete', 'co_await', 'co_yield',
  313. 'co_return', 'static_cast', 'const_cast', 'dynamic_cast', 'reinterpret_cast',
  314. 'sizeof', 'alignof', 'typeid', 'and', 'or', 'not', 'xor',
  315. ]);
  316. function normalizeCppTypeName(typeName: string): string | null {
  317. const normalized = typeName
  318. .replace(/\b(const|volatile|mutable|typename|class|struct)\b/g, ' ')
  319. .replace(/[&*]+/g, ' ')
  320. .replace(/<[^>]*>/g, ' ')
  321. .replace(/\s+/g, ' ')
  322. .trim();
  323. if (!normalized) return null;
  324. const parts = normalized.split(/::/).filter(Boolean);
  325. const last = parts[parts.length - 1];
  326. if (!last) return null;
  327. if (CPP_NON_TYPE_TOKENS.has(last)) return null;
  328. return last;
  329. }
  330. // Declarator regex: matches `Type receiver`, `Type* receiver`, `Type *receiver`,
  331. // `Type*receiver`, `Type<X> receiver`, etc., REQUIRING a declarator terminator
  332. // (`;`, `=`, `,`, `)`, `[`, `{`, `(`, or end-of-line) after the receiver. The
  333. // terminator rules out uses like `return receiver->m()` where the preceding
  334. // token is a keyword, not a type.
  335. function buildDeclaratorRegex(escapedReceiver: string): RegExp {
  336. return new RegExp(
  337. `([A-Za-z_][\\w:]*(?:\\s*<[^;=(){}]+>)?(?:\\s*[*&]+)?)\\s*\\b${escapedReceiver}\\b\\s*(?=[;=,)\\[{(]|$)`,
  338. );
  339. }
  340. function inferCppReceiverType(
  341. receiverName: string,
  342. ref: UnresolvedRef,
  343. context: ResolutionContext,
  344. depth = 0,
  345. ): string | null {
  346. const source = context.readFile(ref.filePath);
  347. if (!source) return null;
  348. const lines = source.split(/\r?\n/);
  349. const callLineIndex = Math.max(0, Math.min(lines.length - 1, ref.line - 1));
  350. const escapedReceiver = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
  351. const receiverPattern = new RegExp(`\\b${escapedReceiver}\\b`);
  352. const declaratorRegex = buildDeclaratorRegex(escapedReceiver);
  353. for (let i = callLineIndex; i >= 0; i--) {
  354. const line = lines[i];
  355. if (!line || !receiverPattern.test(line)) continue;
  356. const declaratorMatch = line.match(declaratorRegex);
  357. if (declaratorMatch) {
  358. const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
  359. if (normalized === 'auto') {
  360. // `auto x = Foo::instance();` — the declared type is deduced; recover it
  361. // from the initializer (call return type / construction) (#645).
  362. const initType = inferCppAutoInitializerType(line, receiverName, ref, context, depth);
  363. if (initType) return initType;
  364. // No usable initializer on this line — keep scanning earlier ones.
  365. } else if (normalized) {
  366. return normalized;
  367. }
  368. }
  369. }
  370. const headerCandidates = [
  371. ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.h'),
  372. ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hpp'),
  373. ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hxx'),
  374. ].filter((candidate, index, arr) => arr.indexOf(candidate) === index && candidate !== ref.filePath);
  375. for (const headerPath of headerCandidates) {
  376. if (!context.fileExists(headerPath)) continue;
  377. const headerSource = context.readFile(headerPath);
  378. if (!headerSource) continue;
  379. for (const line of headerSource.split(/\r?\n/)) {
  380. if (!receiverPattern.test(line)) continue;
  381. const declaratorMatch = line.match(declaratorRegex);
  382. if (!declaratorMatch) continue;
  383. const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
  384. if (normalized && normalized !== 'auto') return normalized;
  385. }
  386. }
  387. return null;
  388. }
  389. /**
  390. * Last `::`-separated segment of a (possibly namespace-qualified) C++ name.
  391. */
  392. function cppLastSegment(name: string): string {
  393. const parts = name.split('::').filter(Boolean);
  394. return parts[parts.length - 1] ?? name;
  395. }
  396. /**
  397. * Return type captured at extraction for `Class::method` (or a free function),
  398. * read off the indexed node's `returnType` — used by the C++ (#645) and PHP
  399. * (#608) chained-call resolvers. Language-filtered. Null when not indexed or no
  400. * return type was recorded (a `void`/primitive return).
  401. */
  402. function lookupCalleeReturnType(
  403. callee: string,
  404. ref: UnresolvedRef,
  405. context: ResolutionContext,
  406. ): string | null {
  407. let method = callee;
  408. let cls: string | null = null;
  409. if (callee.includes('::')) {
  410. const parts = callee.split('::').filter(Boolean);
  411. method = parts[parts.length - 1] ?? callee;
  412. cls = parts.slice(0, -1).join('::');
  413. }
  414. const candidates = context.getNodesByName(method).filter(
  415. (n) =>
  416. (n.kind === 'method' || n.kind === 'function') &&
  417. n.language === ref.language &&
  418. !!n.returnType,
  419. );
  420. if (cls) {
  421. const want = `${cls}::${method}`;
  422. // The call site may name the class with MORE namespace qualification than
  423. // the stored node (`details::registry::instance` at the call vs
  424. // `registry::instance` on the node — the receiver type only carries the
  425. // immediate class), or LESS. Accept an exact match or either being a
  426. // namespace-suffix of the other; the shared `::<class>::<method>` tail keeps
  427. // it specific.
  428. const m = candidates.find(
  429. (n) =>
  430. n.qualifiedName === want ||
  431. n.qualifiedName.endsWith(`::${want}`) ||
  432. want.endsWith(`::${n.qualifiedName}`),
  433. );
  434. return m?.returnType ?? null;
  435. }
  436. return candidates.find((n) => n.kind === 'function')?.returnType ?? null;
  437. }
  438. /** Does the graph contain a class/struct named `name`'s last segment? */
  439. function cppClassExists(name: string, ref: UnresolvedRef, context: ResolutionContext): boolean {
  440. const last = cppLastSegment(name);
  441. return context
  442. .getNodesByName(last)
  443. .some((n) => (n.kind === 'class' || n.kind === 'struct') && n.language === ref.language);
  444. }
  445. /**
  446. * Infer the class produced by a C++ call/construction expression, using return
  447. * types captured at extraction (#645). Handles, in order:
  448. * - `make_unique<T>()` / `make_shared<T>()` → T
  449. * - single-level member call `recv.method()` → recv's type, then method's return
  450. * - `Class::method()` / free `func()` → the callee's recorded return type
  451. * - direct construction `Type()` / `ns::Type()` → Type
  452. * Returns null when undeterminable. Callers MUST still validate the outer method
  453. * exists on the result before creating an edge, so a wrong guess stays silent.
  454. */
  455. function resolveCppCallResultType(
  456. inner: string,
  457. ref: UnresolvedRef,
  458. context: ResolutionContext,
  459. depth = 0,
  460. ): string | null {
  461. if (depth > 3) return null; // guard against pathological mutual recursion
  462. const expr = inner.trim();
  463. const make = expr.match(/(?:^|::)(?:make_unique|make_shared)\s*<\s*([A-Za-z_]\w*)/);
  464. if (make) return make[1] ?? null;
  465. // Single-level member call `recv.method` (the `manager.view().render()` shape).
  466. const dotIdx = expr.lastIndexOf('.');
  467. if (dotIdx > 0) {
  468. const recv = expr.slice(0, dotIdx);
  469. const method = expr.slice(dotIdx + 1);
  470. if (recv.includes('.') || recv.includes('(') || recv.includes('::')) return null; // single level only
  471. const recvType = inferCppReceiverType(recv, ref, context, depth + 1);
  472. if (!recvType) return null;
  473. return lookupCalleeReturnType(`${recvType}::${method}`, ref, context);
  474. }
  475. const ret = lookupCalleeReturnType(expr, ref, context);
  476. if (ret) return ret;
  477. // Direct construction — the callee itself names a class/struct.
  478. if (cppClassExists(expr, ref, context)) return cppLastSegment(expr);
  479. return null;
  480. }
  481. /**
  482. * Recover the type of an `auto`-declared local from its initializer on the
  483. * declaration line — `auto x = Foo::instance();`, `auto w = make_unique<W>();`,
  484. * `auto p = new W();`, `auto w = Widget();` (#645).
  485. */
  486. function inferCppAutoInitializerType(
  487. line: string,
  488. receiverName: string,
  489. ref: UnresolvedRef,
  490. context: ResolutionContext,
  491. depth: number,
  492. ): string | null {
  493. const escaped = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
  494. const m = line.match(new RegExp(`\\b${escaped}\\b\\s*=\\s*([^;]+)`));
  495. if (!m || !m[1]) return null;
  496. const init = m[1].trim();
  497. const neu = init.match(/^new\s+([A-Za-z_][\w:]*)/);
  498. if (neu && neu[1]) return cppLastSegment(neu[1]);
  499. // A call or construction: `Foo(...)`, `A::b(...)`, `make_unique<T>(...)`.
  500. const call = init.match(/^([A-Za-z_][\w:]*(?:\s*<[^>;]*>)?)\s*\(/);
  501. if (call && call[1]) return resolveCppCallResultType(call[1].replace(/\s+/g, ''), ref, context, depth + 1);
  502. return null;
  503. }
  504. /**
  505. * Resolve a C++ chained call whose receiver is itself a call — encoded by the
  506. * extractor as `<innerCallee>().<method>` (#645). The receiver's type is what
  507. * the inner call returns; the outer method is then resolved and VALIDATED on it
  508. * (resolveMethodOnType requires `cls::method` to exist), so a wrong inference
  509. * produces no edge rather than a wrong one.
  510. */
  511. export function matchCppCallChain(
  512. ref: UnresolvedRef,
  513. context: ResolutionContext,
  514. ): ResolvedRef | null {
  515. const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
  516. if (!m || !m[1] || !m[2]) return null;
  517. const cls = resolveCppCallResultType(m[1], ref, context);
  518. if (!cls) return null;
  519. return resolveMethodOnType(cls, m[2], ref, context, 0.85, 'instance-method');
  520. }
  521. /**
  522. * Resolve a PHP fluent static-factory chain whose receiver is a static call —
  523. * `Cls::for($x)->method()`, encoded by the extractor as `Cls::for().method`
  524. * (#608, the per-credential Laravel client idiom). The receiver's type is what
  525. * `Cls::for` returns: a `: self` / `: static` resolves to `Cls` itself, a
  526. * concrete `: Type` to that type. The outer method is then resolved and
  527. * VALIDATED on it (resolveMethodOnType requires the method to exist), so a
  528. * wrong inference yields no edge rather than a wrong one.
  529. */
  530. export function matchPhpCallChain(
  531. ref: UnresolvedRef,
  532. context: ResolutionContext,
  533. ): ResolvedRef | null {
  534. const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
  535. if (!m || !m[1] || !m[2]) return null;
  536. const inner = m[1];
  537. const method = m[2];
  538. if (!inner.includes('::')) return null; // only static-factory (`Cls::method`) chains
  539. const factoryClass = inner.slice(0, inner.lastIndexOf('::'));
  540. const ret = lookupCalleeReturnType(inner, ref, context);
  541. if (!ret) return null;
  542. // `self` (the extractor's marker for self/static/$this) → the factory's class.
  543. const resolvedClass = ret === 'self' ? factoryClass : ret;
  544. return resolveMethodOnType(resolvedClass, method, ref, context, 0.85, 'instance-method');
  545. }
  546. /**
  547. * Resolve a dotted chained call whose receiver is a static factory / fluent call —
  548. * `Foo.getInstance().bar()`, encoded by the extractor as `Foo.getInstance().bar`
  549. * (#645/#608 mechanism). The receiver's type is what `Foo.getInstance` returns
  550. * (its declared return type); the outer method is then resolved and VALIDATED on
  551. * it (resolveMethodOnType requires `Type::method` to exist), so a wrong inference
  552. * yields no edge rather than a wrong one (e.g. a same-named `bar()` on an
  553. * unrelated class is never matched). Shared by the dot-notation languages
  554. * (Java, Kotlin, C#) — same receiver shape, same `Class::method` qualified names.
  555. */
  556. export function matchDottedCallChain(
  557. ref: UnresolvedRef,
  558. context: ResolutionContext,
  559. ): ResolvedRef | null {
  560. const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
  561. if (!m || !m[1] || !m[2]) return null;
  562. const inner = m[1]; // `Foo.getInstance`
  563. const method = m[2]; // `bar`
  564. const lastDot = inner.lastIndexOf('.');
  565. // Constructor receiver `Foo(args).method()` (encoded `Foo().method`): a bare,
  566. // capitalized inner is a class construction, so the receiver's type is the
  567. // class itself — resolve the method on it. Kotlin only: there an unprefixed
  568. // capitalized call constructs the class, whereas in Java a bare `Foo()` is a
  569. // method call (constructors need `new`), so we must not assume construction.
  570. // A lowercase bare inner is a top-level `factory().method()` whose type we
  571. // can't recover — bail.
  572. if (lastDot <= 0) {
  573. if (ref.language !== 'kotlin' || !/^[A-Z]/.test(inner)) return null;
  574. return resolveMethodOnType(inner, method, ref, context, 0.85, 'instance-method', importedFqnOf(inner, ref, context));
  575. }
  576. // Factory/fluent receiver `Receiver.factory(args).method()`: the receiver's
  577. // type is what `Receiver.factory` returns (its declared return type).
  578. const factoryClass = inner.slice(0, lastDot).split('.').pop(); // simple class name
  579. const factoryMethod = inner.slice(lastDot + 1);
  580. if (!factoryClass || !factoryMethod) return null;
  581. const ret = lookupCalleeReturnType(`${factoryClass}::${factoryMethod}`, ref, context);
  582. if (!ret) return null;
  583. return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
  584. }
  585. /**
  586. * When several classes share a simple type name, the caller file's import of
  587. * that type is the only signal that names WHICH one (#314). Returns the imported
  588. * FQN for `typeName` in the ref's file, or undefined.
  589. */
  590. function importedFqnOf(
  591. typeName: string,
  592. ref: UnresolvedRef,
  593. context: ResolutionContext,
  594. ): string | undefined {
  595. const imports = context.getImportMappings(ref.filePath, ref.language);
  596. return imports.find((i) => i.localName === typeName)?.source;
  597. }
  598. /**
  599. * Java/Kotlin: infer a receiver's declared type by walking field declarations
  600. * in the class enclosing the call site. The field's `signature` is already in
  601. * the form "<TypeName> <fieldName>" (set by tree-sitter.ts extractField), so we
  602. * pull the type from there. Handles Spring `@Resource UserBO userbo;` /
  603. * `@Autowired private UserService userService;` where the receiver field name
  604. * doesn't match the class name by Java naming convention.
  605. *
  606. * Returns the bare type name (generics stripped, dotted package stripped) or
  607. * null when no matching field is in the enclosing class.
  608. */
  609. function inferJavaFieldReceiverType(
  610. receiverName: string,
  611. ref: UnresolvedRef,
  612. context: ResolutionContext,
  613. ): string | null {
  614. const inFile = context.getNodesInFile(ref.filePath);
  615. if (inFile.length === 0) return null;
  616. // Find the class enclosing the call line (tightest match by latest start).
  617. let enclosing: Node | null = null;
  618. for (const n of inFile) {
  619. if (n.kind !== 'class' && n.kind !== 'interface') continue;
  620. if (n.language !== ref.language) continue;
  621. const end = n.endLine ?? n.startLine;
  622. if (n.startLine <= ref.line && end >= ref.line) {
  623. if (!enclosing || n.startLine >= enclosing.startLine) enclosing = n;
  624. }
  625. }
  626. if (!enclosing) return null;
  627. const enclosingEnd = enclosing.endLine ?? enclosing.startLine;
  628. const field = inFile.find(
  629. (n) =>
  630. n.kind === 'field' &&
  631. n.name === receiverName &&
  632. n.language === ref.language &&
  633. n.startLine >= enclosing.startLine &&
  634. (n.endLine ?? n.startLine) <= enclosingEnd,
  635. );
  636. if (!field || !field.signature) return null;
  637. // Signature shape: "<TypeName> <fieldName>" (extractField). Pull the type,
  638. // strip generics + dotted package, drop array/varargs markers.
  639. const beforeName = field.signature.slice(
  640. 0,
  641. field.signature.lastIndexOf(field.name),
  642. );
  643. const typeRaw = beforeName.trim();
  644. if (!typeRaw) return null;
  645. const typeNoGenerics = typeRaw.replace(/<[^>]*>/g, '').trim();
  646. const typeNoArray = typeNoGenerics.replace(/\[\s*\]/g, '').replace(/\.\.\.$/, '').trim();
  647. const parts = typeNoArray.split(/[.\s]+/).filter(Boolean);
  648. const lastPart = parts[parts.length - 1];
  649. if (!lastPart) return null;
  650. if (!/^[A-Z]/.test(lastPart)) return null; // primitives / lowercase → skip
  651. return lastPart;
  652. }
  653. /**
  654. * Try to resolve by method name on a class/object
  655. */
  656. export function matchMethodCall(
  657. ref: UnresolvedRef,
  658. context: ResolutionContext
  659. ): ResolvedRef | null {
  660. // Parse method call patterns like "obj.method" or "Class::method". The method
  661. // part allows trailing `:` keywords so Objective-C selectors resolve
  662. // (`SDImageCache.storeImage:`, `obj.setX:y:`); colons never appear in other
  663. // languages' method refs, so this is a no-op for them.
  664. // The receiver allows dots (`builder.Services.AddCoreServices`) so a CHAINED
  665. // call resolves by its last segment — Strategy 3 below name-matches the method
  666. // (with its existing single-candidate / receiver-overlap guards). Without this
  667. // a multi-dot extension-method call (C# DI `builder.Services.AddCoreServices()`,
  668. // `Guard.Against.X()`) matched no pattern and never resolved.
  669. const dotMatch = ref.referenceName.match(/^([\w.]+)\.(\w+:?(?:\w+:)*)$/);
  670. const colonMatch = ref.referenceName.match(/^(\w+)::(\w+)$/);
  671. const match = dotMatch || colonMatch;
  672. if (!match) {
  673. return null;
  674. }
  675. const [, objectOrClass, methodName] = match;
  676. if (ref.language === 'cpp' && dotMatch) {
  677. const inferredType = inferCppReceiverType(objectOrClass!, ref, context);
  678. if (inferredType) {
  679. const typedMatch = resolveMethodOnType(
  680. inferredType,
  681. methodName!,
  682. ref,
  683. context,
  684. 0.9,
  685. 'instance-method',
  686. );
  687. if (typedMatch) {
  688. return typedMatch;
  689. }
  690. }
  691. }
  692. // Java/Kotlin: receiver may be a field whose name doesn't match the type by
  693. // Java naming convention (`userbo` → class `UserBO`, abbreviated). Look up
  694. // the field in the enclosing class to get its declared type, then resolve
  695. // the method on that type. Covers Spring `@Resource`/`@Autowired` field
  696. // injection where the field type is the concrete bean class.
  697. if ((ref.language === 'java' || ref.language === 'kotlin') && dotMatch) {
  698. const inferredType = inferJavaFieldReceiverType(objectOrClass!, ref, context);
  699. if (inferredType) {
  700. // When two classes share the same simple name, the caller file's
  701. // import is the only signal that names WHICH one — pass the
  702. // imported FQN so resolveMethodOnType can disambiguate (#314).
  703. const imports = context.getImportMappings(ref.filePath, ref.language);
  704. const importedFqn = imports.find((i) => i.localName === inferredType)?.source;
  705. const typedMatch = resolveMethodOnType(
  706. inferredType,
  707. methodName!,
  708. ref,
  709. context,
  710. 0.9,
  711. 'instance-method',
  712. importedFqn,
  713. );
  714. if (typedMatch) {
  715. return typedMatch;
  716. }
  717. }
  718. }
  719. // Strategy 1: Direct class name match (existing logic)
  720. const classCandidates = context.getNodesByName(objectOrClass!);
  721. for (const classNode of classCandidates) {
  722. if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
  723. // Skip cross-language class matches
  724. if (classNode.language !== ref.language) continue;
  725. const nodesInFile = context.getNodesInFile(classNode.filePath);
  726. const methodNode = nodesInFile.find(
  727. (n) =>
  728. n.kind === 'method' &&
  729. n.name === methodName &&
  730. n.qualifiedName.includes(classNode.name)
  731. );
  732. if (methodNode) {
  733. return {
  734. original: ref,
  735. targetNodeId: methodNode.id,
  736. confidence: 0.85,
  737. resolvedBy: 'qualified-name',
  738. };
  739. }
  740. }
  741. }
  742. // Strategy 2: Instance variable receiver - try capitalized form to find class
  743. // e.g., "permissionEngine" → look for classes containing "PermissionEngine"
  744. const capitalizedReceiver = objectOrClass!.charAt(0).toUpperCase() + objectOrClass!.slice(1);
  745. if (capitalizedReceiver !== objectOrClass) {
  746. const fuzzyClassCandidates = context.getNodesByName(capitalizedReceiver);
  747. for (const classNode of fuzzyClassCandidates) {
  748. if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
  749. // Skip cross-language class matches
  750. if (classNode.language !== ref.language) continue;
  751. const nodesInFile = context.getNodesInFile(classNode.filePath);
  752. const methodNode = nodesInFile.find(
  753. (n) =>
  754. n.kind === 'method' &&
  755. n.name === methodName &&
  756. n.qualifiedName.includes(classNode.name)
  757. );
  758. if (methodNode) {
  759. return {
  760. original: ref,
  761. targetNodeId: methodNode.id,
  762. confidence: 0.8,
  763. resolvedBy: 'instance-method',
  764. };
  765. }
  766. }
  767. }
  768. }
  769. // Strategy 3: Find methods by name across the codebase, match by receiver
  770. // name similarity with the containing class. Handles abbreviated variable
  771. // names like permissionEngine → PermissionRuleEngine.
  772. if (methodName) {
  773. const methodCandidates = context.getNodesByName(methodName!);
  774. const methods = methodCandidates.filter(
  775. (n) => n.kind === 'method' && n.name === methodName
  776. );
  777. // Filter to same-language candidates first
  778. const sameLanguageMethods = methods.filter(m => m.language === ref.language);
  779. const targetMethods = sameLanguageMethods.length > 0 ? sameLanguageMethods : methods;
  780. // If only one same-language method with this name exists, use it
  781. if (targetMethods.length === 1 && targetMethods[0]!.language === ref.language) {
  782. return {
  783. original: ref,
  784. targetNodeId: targetMethods[0]!.id,
  785. confidence: 0.7,
  786. resolvedBy: 'instance-method',
  787. };
  788. }
  789. // Multiple methods: score by receiver name word overlap with class name
  790. if (targetMethods.length > 1) {
  791. const receiverWords = splitCamelCase(objectOrClass!);
  792. let bestMatch: typeof targetMethods[0] | undefined;
  793. let bestScore = 0;
  794. for (const method of targetMethods) {
  795. const classWords = splitCamelCase(method.qualifiedName);
  796. let score = receiverWords.filter(w =>
  797. classWords.some(cw => cw.toLowerCase() === w.toLowerCase())
  798. ).length;
  799. // Bonus for same language
  800. if (method.language === ref.language) score += 1;
  801. if (score > bestScore) {
  802. bestScore = score;
  803. bestMatch = method;
  804. }
  805. }
  806. if (bestMatch && bestScore >= 2) {
  807. return {
  808. original: ref,
  809. targetNodeId: bestMatch.id,
  810. confidence: 0.65,
  811. resolvedBy: 'instance-method',
  812. };
  813. }
  814. }
  815. }
  816. return null;
  817. }
  818. /**
  819. * Split a camelCase or PascalCase string into words.
  820. */
  821. function splitCamelCase(str: string): string[] {
  822. return str.replace(/([a-z])([A-Z])/g, '$1 $2')
  823. .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
  824. .split(/[\s._:\/\\]+/)
  825. .filter(w => w.length > 1);
  826. }
  827. /**
  828. * Compute directory proximity between two file paths.
  829. * Returns a score based on the number of shared directory segments.
  830. * Higher score = closer in directory tree.
  831. */
  832. function computePathProximity(filePath1: string, filePath2: string): number {
  833. const dir1 = filePath1.split('/').slice(0, -1);
  834. const dir2 = filePath2.split('/').slice(0, -1);
  835. let shared = 0;
  836. for (let i = 0; i < Math.min(dir1.length, dir2.length); i++) {
  837. if (dir1[i] === dir2[i]) {
  838. shared++;
  839. } else {
  840. break;
  841. }
  842. }
  843. // Each shared directory segment contributes 15 points, capped at 80
  844. return Math.min(shared * 15, 80);
  845. }
  846. /**
  847. * Find the best matching node when there are multiple candidates
  848. */
  849. function findBestMatch(
  850. ref: UnresolvedRef,
  851. candidates: Node[],
  852. _context: ResolutionContext
  853. ): Node | null {
  854. // Prioritization rules:
  855. // 1. Same file > different file
  856. // 2. Directory proximity (same module/package > different module)
  857. // 3. Same language > different language
  858. // 4. Functions/methods > classes/types (for call references)
  859. // 5. Exported > non-exported
  860. let bestScore = -1;
  861. let bestNode: Node | null = null;
  862. for (const candidate of candidates) {
  863. let score = 0;
  864. // Same file bonus
  865. if (candidate.filePath === ref.filePath) {
  866. score += 100;
  867. }
  868. // Directory proximity bonus — strongly prefer same module/package
  869. score += computePathProximity(ref.filePath, candidate.filePath);
  870. // Language matching: strongly prefer same language, penalize cross-language
  871. if (candidate.language === ref.language) {
  872. score += 50;
  873. } else {
  874. score -= 80;
  875. }
  876. // For call references, prefer functions/methods
  877. if (ref.referenceKind === 'calls') {
  878. if (candidate.kind === 'function' || candidate.kind === 'method') {
  879. score += 25;
  880. }
  881. }
  882. // For instantiation references (`new Foo()`), prefer class-like
  883. // targets — without this, a function named `Foo` in another module
  884. // could outscore the actual class.
  885. if (ref.referenceKind === 'instantiates') {
  886. if (
  887. candidate.kind === 'class' ||
  888. candidate.kind === 'struct' ||
  889. candidate.kind === 'interface'
  890. ) {
  891. score += 25;
  892. }
  893. }
  894. // For decorator references (`@Foo`), prefer functions. Class
  895. // decorators (Python `@SomeClass`, Java annotation interfaces)
  896. // also resolve here, hence the smaller class bonus.
  897. if (ref.referenceKind === 'decorates') {
  898. if (candidate.kind === 'function' || candidate.kind === 'method') {
  899. score += 25;
  900. } else if (candidate.kind === 'class' || candidate.kind === 'interface') {
  901. score += 15;
  902. }
  903. }
  904. // Exported bonus
  905. if (candidate.isExported) {
  906. score += 10;
  907. }
  908. // Closer line number (within same file)
  909. if (candidate.filePath === ref.filePath && candidate.startLine) {
  910. const distance = Math.abs(candidate.startLine - ref.line);
  911. score += Math.max(0, 20 - distance / 10);
  912. }
  913. if (score > bestScore) {
  914. bestScore = score;
  915. bestNode = candidate;
  916. }
  917. }
  918. return bestNode;
  919. }
  920. /**
  921. * Fuzzy match - last resort with lower confidence
  922. */
  923. export function matchFuzzy(
  924. ref: UnresolvedRef,
  925. context: ResolutionContext
  926. ): ResolvedRef | null {
  927. const lowerName = ref.referenceName.toLowerCase();
  928. // Use pre-built lowercase index for O(1) lookup instead of scanning all nodes
  929. const candidates = context.getNodesByLowerName(lowerName);
  930. // Filter to callable kinds only (function, method, class)
  931. const callableKinds = new Set(['function', 'method', 'class']);
  932. const callableCandidates = applyLanguageGate(candidates.filter((n) => callableKinds.has(n.kind)), ref);
  933. // Prefer same-language matches
  934. const sameLanguageCandidates = callableCandidates.filter(n => n.language === ref.language);
  935. const finalCandidates = sameLanguageCandidates.length > 0 ? sameLanguageCandidates : callableCandidates;
  936. if (finalCandidates.length === 1) {
  937. const isCrossLanguage = finalCandidates[0]!.language !== ref.language;
  938. return {
  939. original: ref,
  940. targetNodeId: finalCandidates[0]!.id,
  941. confidence: isCrossLanguage ? 0.3 : 0.5,
  942. resolvedBy: 'fuzzy',
  943. };
  944. }
  945. return null;
  946. }
  947. /**
  948. * Match all strategies in order of confidence
  949. */
  950. export function matchReference(
  951. ref: UnresolvedRef,
  952. context: ResolutionContext
  953. ): ResolvedRef | null {
  954. // Try strategies in order of confidence
  955. let result: ResolvedRef | null;
  956. // 0. File path match (e.g., "snippets/drawer-menu.liquid" → file node)
  957. result = matchByFilePath(ref, context);
  958. if (result) return result;
  959. // 1. Qualified name match (highest confidence)
  960. result = matchByQualifiedName(ref, context);
  961. if (result) return result;
  962. // 1b. C++ chained call whose receiver is another call — `Foo::instance().bar()`
  963. // encoded as `Foo::instance().bar` by the extractor (#645). Resolve the
  964. // receiver's type from what the inner call returns, then the method on it.
  965. if (ref.language === 'cpp' || ref.language === 'c') {
  966. result = matchCppCallChain(ref, context);
  967. if (result) return result;
  968. }
  969. // 1c. PHP fluent static-factory chain — `Cls::for($x)->method()` encoded as
  970. // `Cls::for().method` (#608). Same idea as 1b: the receiver's type is the
  971. // factory's `: self` / `: Type` return.
  972. if (ref.language === 'php') {
  973. result = matchPhpCallChain(ref, context);
  974. if (result) return result;
  975. }
  976. // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C#) —
  977. // `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar` (#645/#608
  978. // mechanism). Resolve bar's class from getInstance's declared return type, then
  979. // validate the method on it.
  980. if (ref.language === 'java' || ref.language === 'kotlin' || ref.language === 'csharp') {
  981. result = matchDottedCallChain(ref, context);
  982. if (result) return result;
  983. }
  984. // 2. Method call pattern
  985. result = matchMethodCall(ref, context);
  986. if (result) return result;
  987. // 3. Exact name match
  988. result = matchByExactName(ref, context);
  989. if (result) return result;
  990. // 4. Fuzzy match (lowest confidence)
  991. result = matchFuzzy(ref, context);
  992. if (result) return result;
  993. return null;
  994. }