| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666 |
- /**
- * Name Matcher
- *
- * Handles symbol name matching for reference resolution.
- */
- import { Language, Node } from '../types';
- import { UnresolvedRef, ResolvedRef, ResolutionContext } from './types';
- /**
- * Ceiling on how many same-named definitions a FUZZY name-match strategy will
- * score. A name defined more times than this is "ubiquitous" — a method/symbol
- * re-declared across a vendored theme or SDK (e.g. `init`/`update`/`render` on
- * every widget of a committed Metronic theme — #999). No directory-proximity or
- * receiver-word-overlap score can reliably pick THE one true target among
- * thousands, so the fuzzy strategies (matchByExactName's findBestMatch, and
- * matchMethodCall Strategy 3) decline above the ceiling instead of emitting a
- * low-confidence, almost-certainly-wrong edge. This also caps their per-ref cost
- * at O(ceiling): without it, K same-named refs each scored K candidates — the
- * O(K²) blow-up that pinned a core for 15-28 min at "Resolving refs … 94%" on a
- * repo vendoring a large JS/TS theme (#999). The PRECISE strategies are
- * unaffected: qualified-name, import-based, and class-name (Strategy 1/2)
- * resolution all still run and resolve a ubiquitous name when the context names
- * its exact target. Real repos top out near ~40 same-named methods, so a normal
- * codebase never reaches this; only bulk-vendored code does. Tune via
- * `CODEGRAPH_AMBIGUOUS_NAME_CEILING`.
- */
- const DEFAULT_AMBIGUOUS_NAME_CEILING = 500;
- function resolveAmbiguousNameCeiling(): number {
- const raw = process.env.CODEGRAPH_AMBIGUOUS_NAME_CEILING;
- if (!raw) return DEFAULT_AMBIGUOUS_NAME_CEILING;
- const parsed = Number.parseInt(raw, 10);
- return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_AMBIGUOUS_NAME_CEILING;
- }
- const AMBIGUOUS_NAME_CEILING = resolveAmbiguousNameCeiling();
- /**
- * Try to resolve a path-like reference (e.g., "snippets/drawer-menu.liquid")
- * by matching the filename against file nodes.
- */
- export function matchByFilePath(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // Path-like (`a/b.liquid`) OR a bare filename ending in a short extension
- // (`Foo.h` — an Objective-C `#import "Foo.h"`, resolved to the header by
- // basename). A bare ref WITHOUT an extension is a symbol name, not a file, so
- // leave it to the symbol-matching strategies.
- if (!ref.referenceName.includes('/') && !/\.[A-Za-z][A-Za-z0-9]{0,3}$/.test(ref.referenceName)) {
- return null;
- }
- // Extract the filename from the path
- const fileName = ref.referenceName.split('/').pop();
- if (!fileName) return null;
- // Search for file nodes with this name
- const candidates = context.getNodesByName(fileName);
- const fileNodes = candidates.filter(n => n.kind === 'file');
- if (fileNodes.length === 0) return null;
- // Prefer exact path match on qualified_name
- const exactMatch = fileNodes.find(n => n.qualifiedName === ref.referenceName || n.filePath === ref.referenceName);
- if (exactMatch) {
- return {
- original: ref,
- targetNodeId: exactMatch.id,
- confidence: 0.95,
- resolvedBy: 'file-path',
- };
- }
- // Fall back to suffix match (e.g., ref="snippets/foo.liquid" matches
- // "src/snippets/foo.liquid"). When several files share the basename — a
- // `#include "RNCAsyncStorage.h"` with a same-named header on another platform
- // (windows/code/ vs apple/) — prefer the one in the includer's own directory,
- // then by directory proximity / same language family. A C/C++ include (and any
- // bare-filename import) resolves relative to the including file, not to an
- // arbitrary same-named header elsewhere in the tree.
- const suffixMatches = fileNodes.filter(
- n => n.qualifiedName.endsWith(ref.referenceName) || n.filePath.endsWith(ref.referenceName)
- );
- if (suffixMatches.length > 0) {
- return {
- original: ref,
- targetNodeId: pickClosestFileNode(suffixMatches, ref).id,
- confidence: 0.85,
- resolvedBy: 'file-path',
- };
- }
- // If only one file node with this name, use it with lower confidence
- if (fileNodes.length === 1) {
- return {
- original: ref,
- targetNodeId: fileNodes[0]!.id,
- confidence: 0.7,
- resolvedBy: 'file-path',
- };
- }
- return null;
- }
- /**
- * Among several file nodes that all match a bare include/import by basename,
- * pick the one closest to the referencing file: same directory first, then by
- * directory-tree proximity, with the same language family as a tiebreak. A
- * C/C++ `#include "X.h"` (and any bare-filename import) resolves relative to the
- * including file — not to an arbitrary same-named header on another platform.
- */
- function pickClosestFileNode(candidates: Node[], ref: UnresolvedRef): Node {
- const dirOf = (p: string): string => {
- const i = p.lastIndexOf('/');
- return i >= 0 ? p.slice(0, i) : '';
- };
- const refDir = dirOf(ref.filePath);
- const sameDir = candidates.filter((c) => dirOf(c.filePath) === refDir);
- const pool = sameDir.length > 0 ? sameDir : candidates;
- let best = pool[0]!;
- let bestScore = -Infinity;
- for (const c of pool) {
- const score =
- computePathProximity(ref.filePath, c.filePath) +
- (sameLanguageFamily(c.language, ref.language) ? 5 : 0);
- if (score > bestScore) {
- bestScore = score;
- best = c;
- }
- }
- return best;
- }
- /**
- * Language families that share a type system / runtime, so a same-language-only
- * reference may still resolve across them (a Kotlin `Foo.BAR` can name a Java
- * `Foo`). Anything not listed forms its own singleton family.
- */
- const LANGUAGE_FAMILY: Record<string, string> = {
- java: 'jvm', kotlin: 'jvm', scala: 'jvm',
- swift: 'apple', objc: 'apple',
- typescript: 'web', tsx: 'web', javascript: 'web', jsx: 'web',
- c: 'c', cpp: 'c',
- // Razor/Blazor markup names C# types — same family so `@model Foo` /
- // `<MyComponent/>` resolve to their `.cs` class through the cross-family gate.
- csharp: 'dotnet', razor: 'dotnet',
- };
- export function sameLanguageFamily(a: string, b: string): boolean {
- if (a === b) return true;
- const fa = LANGUAGE_FAMILY[a];
- return fa !== undefined && fa === LANGUAGE_FAMILY[b];
- }
- /**
- * True when `lang` belongs to a known multi-language family (jvm/apple/web/c).
- * Languages not listed (php, python, go, ruby, rust, dart, …) and config
- * formats (yaml/xml/blade) form their own singleton families and return
- * `false` — used to leave config↔code framework bridges (whose config side is
- * never a known programming-language family) out of the cross-family gate.
- */
- export function isKnownLanguageFamily(lang: string): boolean {
- return LANGUAGE_FAMILY[lang] !== undefined;
- }
- /**
- * True when `a` and `b` are two DIFFERENT *known* language families — the
- * signature of a coincidental cross-language name collision (a TS `import
- * React` matching a Swift `import React`, a C++ `#include "X.h"` matching a
- * same-named ObjC header on another platform). The both-*known* test is
- * deliberately weaker than {@link sameLanguageFamily}'s negation: a
- * single-file-component language that carries its own tag (`vue`/`svelte`)
- * importing a `.ts` module, or any singleton-family language (php/go/ruby/…),
- * returns `false` here and is left alone.
- */
- export function crossesKnownFamily(a: string, b: string): boolean {
- return isKnownLanguageFamily(a) && isKnownLanguageFamily(b) && !sameLanguageFamily(a, b);
- }
- /**
- * Drop cross-language candidates from a name lookup. Two regimes:
- * - `references` (type-usage): a type named in language X resolves to a
- * SAME-family type, never a coincidentally same-named symbol in another
- * language (the Android `BatteryManager` system class vs a JS one). Strict
- * same-family filter — cross-language communication is `calls`, not refs.
- * - `imports` (import binding): an `import`/`#include` never crosses two
- * KNOWN families (TS `import React` ↮ Swift `import React`). Weaker
- * both-known filter so `.vue`/`.svelte` (own tag) importing `.ts` survives.
- */
- function applyLanguageGate(candidates: Node[], ref: UnresolvedRef): Node[] {
- if (ref.referenceKind === 'references' || ref.referenceKind === 'function_ref') {
- return candidates.filter((c) => sameLanguageFamily(c.language, ref.language));
- }
- if (ref.referenceKind === 'imports') {
- return candidates.filter((c) => !crossesKnownFamily(c.language, ref.language));
- }
- return candidates;
- }
- /**
- * Resolve a function-as-value reference (#756) — a function name used as a
- * callback/function-pointer value (`register(handler)`, `o->cb = handler`,
- * `{ .cb = handler }`, `signal(SIGINT, handler)`). The ONLY strategy allowed
- * for `function_ref` refs: exact name, function/method targets only, same
- * language family, same-file first, and cross-file only when the match is
- * UNIQUE. No fuzzy fallback, no qualified-name walking — a wrong callback
- * edge is worse than none.
- */
- export function matchFunctionRef(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // `this.<member>` refs are resolved ONLY by the class-scoped resolver in
- // resolveOne (resolveThisMemberFnRef) — never by name matching here.
- if (ref.referenceName.startsWith('this.')) return null;
- // In JS/TS/Python a bare identifier can never be a method value (methods
- // are only reachable through a receiver — `this.m` / `self.m` /
- // `Cls.m`), so bare fn-refs match FUNCTIONS only. This also sidesteps the
- // pre-existing TS quirk of class fields extracting as method-kind nodes,
- // which otherwise soaked up local names passed as arguments (excalidraw
- // A/B finding; same pattern in vendored docopt.py). Python's `self.m`
- // form keeps method targets via its own capture shape. C++ likewise: a
- // bare identifier can only be a FREE function (member values need
- // `&Cls::method`). PHP string callables name global FUNCTIONS (methods
- // need the `[$obj, 'm']` array form, which carries its own shape). Other
- // languages keep method targets: C# method groups, Swift/Dart
- // implicit-self, Java/Kotlin method references.
- const bareFnOnly =
- ref.language === 'typescript' || ref.language === 'tsx' ||
- ref.language === 'javascript' || ref.language === 'jsx' ||
- ref.language === 'cpp' || ref.language === 'python' ||
- ref.language === 'php';
- // Qualified member-pointer (`&Widget::on_click` → "Widget::on_click"):
- // resolve the member ON THAT SCOPE — exempt from bareFnOnly (the `&Cls::m`
- // shape is an explicit member reference). Unique-or-drop like everything else.
- if (ref.referenceName.includes('::')) {
- const memberName = ref.referenceName.slice(ref.referenceName.lastIndexOf('::') + 2);
- const scoped = context
- .getNodesByName(memberName)
- .filter(
- (n) =>
- (n.kind === 'function' || n.kind === 'method') &&
- sameLanguageFamily(n.language, ref.language) &&
- n.id !== ref.fromNodeId &&
- (n.qualifiedName === ref.referenceName ||
- n.qualifiedName.endsWith(`::${ref.referenceName}`))
- );
- if (scoped.length === 0) return null;
- const sameFileScoped = scoped.filter((n) => n.filePath === ref.filePath);
- const pool = sameFileScoped.length > 0 ? sameFileScoped : scoped;
- if (sameFileScoped.length === 0 && scoped.length > 1) return null;
- const target = pool.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
- return {
- original: ref,
- targetNodeId: target.id,
- confidence: 0.9,
- resolvedBy: 'function-ref',
- };
- }
- let candidates = context
- .getNodesByName(ref.referenceName)
- .filter(
- (n) =>
- (n.kind === 'function' || (!bareFnOnly && n.kind === 'method')) &&
- sameLanguageFamily(n.language, ref.language) &&
- n.id !== ref.fromNodeId // a function registering itself is not a dependency edge
- );
- if (candidates.length === 0) return null;
- // Swift implicit-self: a bare identifier can name a METHOD only of the
- // ENCLOSING type (`Button(action: handleTap)` written inside that type) —
- // a same-named method on any OTHER class is a parameter collision
- // (Alamofire: a `request` parameter resolving to EventMonitor::request).
- // Scope method candidates to the from-symbol's type; top-level code has no
- // implicit self, so method targets are excluded there entirely. Free
- // functions are unaffected.
- if (ref.language === 'swift' && candidates.some((n) => n.kind === 'method')) {
- const fromNode = context.getNodeById?.(ref.fromNodeId);
- const sep = fromNode ? fromNode.qualifiedName.lastIndexOf('::') : -1;
- const classPrefix = fromNode && sep > 0 ? fromNode.qualifiedName.slice(0, sep) : null;
- candidates = candidates.filter((n) => {
- if (n.kind !== 'method') return true;
- if (!classPrefix) return false;
- const mSep = n.qualifiedName.lastIndexOf('::');
- if (mSep <= 0) return false;
- const methodPrefix = n.qualifiedName.slice(0, mSep);
- // Accept exact-scope matches plus suffix relationships either way, so
- // extension-declared members (`Holder::m`) still match a nested
- // from-scope (`Module::Holder::wire`) and vice versa.
- return (
- methodPrefix === classPrefix ||
- methodPrefix.endsWith(`::${classPrefix}`) ||
- classPrefix.endsWith(`::${methodPrefix}`)
- );
- });
- if (candidates.length === 0) return null;
- }
- // Same-file definition wins — the extraction gate guarantees most survivors
- // have one, and it's the dominant C pattern (static callback registered in
- // a same-file ops struct).
- const sameFile = candidates.filter((n) => n.filePath === ref.filePath);
- if (sameFile.length > 0) {
- // Swift: several same-named METHODS in one file is an API overload family
- // (`Session.request(...)` × N), and a bare identifier hitting it is almost
- // always a same-named parameter, not a method value (Alamofire A/B
- // finding) — refuse rather than guess. A single method (SwiftUI's
- // `action: handleTap`) still resolves.
- if (
- ref.language === 'swift' &&
- sameFile.length > 1 &&
- sameFile.every((n) => n.kind === 'method')
- ) {
- return null;
- }
- // Same-name overloads in one file are the same conceptual symbol; pick
- // the first by position for determinism.
- const target = sameFile.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
- return {
- original: ref,
- targetNodeId: target.id,
- confidence: sameFile.length === 1 ? 0.95 : 0.9,
- resolvedBy: 'function-ref',
- };
- }
- // Cross-file (imported names the import resolver didn't already claim):
- // only an unambiguous match resolves.
- if (candidates.length === 1) {
- return {
- original: ref,
- targetNodeId: candidates[0]!.id,
- confidence: 0.8,
- resolvedBy: 'function-ref',
- };
- }
- return null;
- }
- /**
- * Try to resolve a reference by exact name match
- */
- export function matchByExactName(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // `import`-kind nodes are import STATEMENTS, not definitions, so a reference
- // resolving to a sibling file's `import` is a meaningless edge — the real
- // import→definition resolution is the import resolver's job (resolveViaImport),
- // never name-matching here. Excluding them also removes a quadratic blow-up:
- // a ubiquitous package (`react`, `@superset-ui/core`, Python `logging`/`typing`)
- // is re-declared as an `import` node in every file that imports it, so K
- // unresolved import refs each scored K same-named import candidates through
- // findBestMatch — O(K²) per package, the dominant cost of "Resolving refs" on
- // large import-heavy (front-end + back-end) repos (#915).
- const candidates = applyLanguageGate(context.getNodesByName(ref.referenceName), ref)
- .filter((n) => n.kind !== 'import');
- if (candidates.length === 0) {
- return null;
- }
- // If only one match, use it — but penalize cross-language matches
- if (candidates.length === 1) {
- const isCrossLanguage = candidates[0]!.language !== ref.language;
- return {
- original: ref,
- targetNodeId: candidates[0]!.id,
- confidence: isCrossLanguage ? 0.5 : 0.9,
- resolvedBy: 'exact-match',
- };
- }
- // Ubiquitous-name ceiling (#999): above it, picking one target among K
- // same-named defs by directory proximity is unreliable AND O(K) per ref — the
- // quadratic behind the "Resolving refs" wedge on theme/SDK-vendoring repos.
- // Decline; the precise strategies (qualified-name, import, class-name) already
- // ran. Falls through to fuzzy, which itself only resolves a UNIQUE candidate.
- if (candidates.length > AMBIGUOUS_NAME_CEILING) {
- return null;
- }
- // Multiple matches - try to narrow down
- const bestMatch = findBestMatch(ref, candidates, context);
- if (bestMatch) {
- // Lower confidence when the match is from a distant/unrelated module
- const proximity = computePathProximity(ref.filePath, bestMatch.filePath);
- const confidence = proximity >= 30 ? 0.7 : 0.4;
- return {
- original: ref,
- targetNodeId: bestMatch.id,
- confidence,
- resolvedBy: 'exact-match',
- };
- }
- return null;
- }
- /**
- * Try to resolve by qualified name
- */
- export function matchByQualifiedName(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // Check if the reference name looks qualified (contains :: or .)
- if (!ref.referenceName.includes('::') && !ref.referenceName.includes('.')) {
- return null;
- }
- const candidates = context.getNodesByQualifiedName(ref.referenceName);
- if (candidates.length === 1) {
- return {
- original: ref,
- targetNodeId: candidates[0]!.id,
- confidence: 0.95,
- resolvedBy: 'qualified-name',
- };
- }
- // Several symbols share this exact qualified name (e.g. `Logger::log` declared
- // in two files — an ODR clash or separate translation units): prefer the one
- // in the call site's own file before the partial-match fallback below, else
- // the first-indexed def wins and a call in `b/svc` targets `a/svc` (#1079).
- if (candidates.length > 1) {
- const ordered = preferCallSiteFile(candidates, ref.filePath);
- if (ordered[0]!.filePath === ref.filePath) {
- return {
- original: ref,
- targetNodeId: ordered[0]!.id,
- confidence: 0.95,
- resolvedBy: 'qualified-name',
- };
- }
- }
- // Try partial qualified name match — again preferring the call site's own
- // file when more than one symbol's qualifiedName ends with the reference.
- const parts = ref.referenceName.split(/[:.]/);
- const lastName = parts[parts.length - 1];
- if (lastName) {
- const partialCandidates = context
- .getNodesByName(lastName)
- .filter((candidate) => candidate.qualifiedName.endsWith(ref.referenceName));
- const chosen = preferCallSiteFile(partialCandidates, ref.filePath)[0];
- if (chosen) {
- return {
- original: ref,
- targetNodeId: chosen.id,
- confidence: 0.85,
- resolvedBy: 'qualified-name',
- };
- }
- }
- return null;
- }
- /**
- * When a symbol name is ambiguous across files, prefer the candidate(s) declared
- * in the call site's own file, keeping the rest in their original order (#1079).
- * A same-file definition is the strongest language-agnostic signal for which of
- * several same-named symbols a call means; without it, resolution collapses onto
- * whichever was indexed first, so a call in `b/svc` wrongly targets `a/svc`.
- * No-op when there are <2 candidates or none share the call site's file.
- */
- export function preferCallSiteFile(nodes: Node[], callSiteFile: string): Node[] {
- if (nodes.length < 2) return nodes;
- const same: Node[] = [];
- const other: Node[] = [];
- for (const n of nodes) {
- if (n.filePath === callSiteFile) same.push(n);
- else other.push(n);
- }
- return same.length ? [...same, ...other] : nodes;
- }
- // Exported for the precedence unit tests (#1079): they assert the
- // preferredFqn → same-file → matches[0] ordering directly.
- export function resolveMethodOnType(
- typeName: string,
- methodName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- confidence: number,
- resolvedBy: ResolvedRef['resolvedBy'],
- /**
- * Optional FQN that identifies WHICH class declaration `typeName`
- * refers to in the caller's file. When multiple candidates share
- * the same qualifiedName (`FooConverter::convert` in both
- * `dao/converter/` and `service/converter/`), the FQN's
- * file-path-suffix picks the right one — the disambiguation
- * signal Java imports carry but the call site doesn't (#314).
- */
- preferredFqn?: string,
- /** Recursion guard for the supertype/conformance walk. */
- depth = 0,
- ): ResolvedRef | null {
- // Look up methods by name and match by qualifiedName ending in
- // `<typeName>::<methodName>`. This works whether the method is defined
- // in-class (`class Foo { int bar() { ... } }`) or out-of-line in a separate
- // file (`int Foo::bar() { ... }` in foo.cpp while class Foo is in foo.hpp).
- // The previous same-file approach missed the latter — the typical C++ layout.
- const methodCandidates = context.getNodesByName(methodName);
- const want = `${typeName}::${methodName}`;
- const matches: Node[] = [];
- for (const m of methodCandidates) {
- if (m.kind !== 'method') continue;
- if (m.language !== ref.language) continue;
- const qn = m.qualifiedName;
- if (qn === want || qn.endsWith(`::${want}`)) {
- matches.push(m);
- }
- }
- if (matches.length === 0) {
- // Conformance fallback: the method may be defined on a supertype `typeName`
- // extends, or on a protocol / trait it conforms to (e.g. a Swift protocol-
- // extension method, a C# default-interface or extension method, a Kotlin
- // extension on a supertype). Walk supertypes transitively (depth-capped) via
- // the resolved implements/extends edges — empty in the first resolution pass,
- // populated in the conformance pass. Still VALIDATED (the method must exist on
- // a supertype), so a wrong inference produces no edge.
- if (depth < 4 && context.getSupertypes) {
- for (const supertype of context.getSupertypes(typeName, ref.language)) {
- const via = resolveMethodOnType(
- supertype, methodName, ref, context, confidence, resolvedBy, preferredFqn, depth + 1,
- );
- if (via) return via;
- }
- }
- return null;
- }
- if (matches.length > 1 && preferredFqn) {
- const ext = ref.language === 'kotlin' ? '.kt' : '.java';
- const fqnPath = preferredFqn.replace(/\./g, '/') + ext;
- const chosen = matches.find((m) => {
- const fp = m.filePath.replace(/\\/g, '/');
- return fp.endsWith(fqnPath) || fp.endsWith('/' + fqnPath);
- });
- if (chosen) {
- return {
- original: ref,
- targetNodeId: chosen.id,
- confidence,
- resolvedBy,
- };
- }
- }
- // Language-agnostic disambiguation: when several same-named methods survive
- // (e.g. two files each declaring `class Logger { void log(); }` — an ODR
- // clash, an anonymous-namespace type, or separate translation units), prefer
- // the definition in the CALL SITE's own file. Without this, every ambiguous
- // call collapses onto the first-indexed definition, so a call in `b/svc.cpp`
- // wrongly points at `a/svc.cpp` (#1079). This runs AFTER the `preferredFqn`
- // block, so Java/Kotlin import disambiguation — whose target is intentionally
- // in ANOTHER file (#314) — is unaffected: that block returns early whenever
- // an import FQN pins the class.
- const ordered = preferCallSiteFile(matches, ref.filePath);
- return {
- original: ref,
- targetNodeId: ordered[0]!.id,
- confidence,
- resolvedBy,
- };
- }
- // C++ keywords/control-flow tokens that can appear right before a receiver
- // (e.g. `return ptr->m()`) and must NOT be treated as a type.
- const CPP_NON_TYPE_TOKENS = new Set([
- 'return', 'if', 'else', 'for', 'while', 'do', 'switch', 'case', 'default',
- 'break', 'continue', 'goto', 'throw', 'new', 'delete', 'co_await', 'co_yield',
- 'co_return', 'static_cast', 'const_cast', 'dynamic_cast', 'reinterpret_cast',
- 'sizeof', 'alignof', 'typeid', 'and', 'or', 'not', 'xor',
- ]);
- function normalizeCppTypeName(typeName: string): string | null {
- const normalized = typeName
- .replace(/\b(const|volatile|mutable|typename|class|struct)\b/g, ' ')
- .replace(/[&*]+/g, ' ')
- .replace(/<[^>]*>/g, ' ')
- .replace(/\s+/g, ' ')
- .trim();
- if (!normalized) return null;
- const parts = normalized.split(/::/).filter(Boolean);
- const last = parts[parts.length - 1];
- if (!last) return null;
- if (CPP_NON_TYPE_TOKENS.has(last)) return null;
- return last;
- }
- // Declarator regex: matches `Type receiver`, `Type* receiver`, `Type *receiver`,
- // `Type*receiver`, `Type<X> receiver`, etc., REQUIRING a declarator terminator
- // (`;`, `=`, `,`, `)`, `[`, `{`, `(`, or end-of-line) after the receiver. The
- // terminator rules out uses like `return receiver->m()` where the preceding
- // token is a keyword, not a type.
- function buildDeclaratorRegex(escapedReceiver: string): RegExp {
- return new RegExp(
- `([A-Za-z_][\\w:]*(?:\\s*<[^;=(){}]+>)?(?:\\s*[*&]+)?)\\s*\\b${escapedReceiver}\\b\\s*(?=[;=,)\\[{(]|$)`,
- );
- }
- function inferCppReceiverType(
- receiverName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- depth = 0,
- ): string | null {
- const source = context.readFile(ref.filePath);
- if (!source) return null;
- const lines = source.split(/\r?\n/);
- const callLineIndex = Math.max(0, Math.min(lines.length - 1, ref.line - 1));
- const escapedReceiver = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
- const receiverPattern = new RegExp(`\\b${escapedReceiver}\\b`);
- const declaratorRegex = buildDeclaratorRegex(escapedReceiver);
- for (let i = callLineIndex; i >= 0; i--) {
- const line = lines[i];
- if (!line || !receiverPattern.test(line)) continue;
- const declaratorMatch = line.match(declaratorRegex);
- if (declaratorMatch) {
- const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
- if (normalized === 'auto') {
- // `auto x = Foo::instance();` — the declared type is deduced; recover it
- // from the initializer (call return type / construction) (#645).
- const initType = inferCppAutoInitializerType(line, receiverName, ref, context, depth);
- if (initType) return initType;
- // No usable initializer on this line — keep scanning earlier ones.
- } else if (normalized) {
- return normalized;
- }
- }
- }
- const headerCandidates = [
- ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.h'),
- ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hpp'),
- ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hxx'),
- ].filter((candidate, index, arr) => arr.indexOf(candidate) === index && candidate !== ref.filePath);
- for (const headerPath of headerCandidates) {
- if (!context.fileExists(headerPath)) continue;
- const headerSource = context.readFile(headerPath);
- if (!headerSource) continue;
- for (const line of headerSource.split(/\r?\n/)) {
- if (!receiverPattern.test(line)) continue;
- const declaratorMatch = line.match(declaratorRegex);
- if (!declaratorMatch) continue;
- const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
- if (normalized && normalized !== 'auto') return normalized;
- }
- }
- return null;
- }
- /**
- * Last `::`-separated segment of a (possibly namespace-qualified) C++ name.
- */
- function cppLastSegment(name: string): string {
- const parts = name.split('::').filter(Boolean);
- return parts[parts.length - 1] ?? name;
- }
- /**
- * Return type captured at extraction for `Class::method` (or a free function),
- * read off the indexed node's `returnType` — used by the C++ (#645) and PHP
- * (#608) chained-call resolvers. Language-filtered. Null when not indexed or no
- * return type was recorded (a `void`/primitive return).
- */
- function lookupCalleeReturnType(
- callee: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): string | null {
- let method = callee;
- let cls: string | null = null;
- if (callee.includes('::')) {
- const parts = callee.split('::').filter(Boolean);
- method = parts[parts.length - 1] ?? callee;
- cls = parts.slice(0, -1).join('::');
- }
- const candidates = context.getNodesByName(method).filter(
- (n) =>
- (n.kind === 'method' || n.kind === 'function') &&
- n.language === ref.language &&
- !!n.returnType,
- );
- if (cls) {
- const want = `${cls}::${method}`;
- // The call site may name the class with MORE namespace qualification than
- // the stored node (`details::registry::instance` at the call vs
- // `registry::instance` on the node — the receiver type only carries the
- // immediate class), or LESS. Accept an exact match or either being a
- // namespace-suffix of the other; the shared `::<class>::<method>` tail keeps
- // it specific.
- const m = candidates.find(
- (n) =>
- n.qualifiedName === want ||
- n.qualifiedName.endsWith(`::${want}`) ||
- want.endsWith(`::${n.qualifiedName}`),
- );
- return m?.returnType ?? null;
- }
- return candidates.find((n) => n.kind === 'function')?.returnType ?? null;
- }
- /** Does the graph contain a class/struct named `name`'s last segment? */
- function cppClassExists(name: string, ref: UnresolvedRef, context: ResolutionContext): boolean {
- const last = cppLastSegment(name);
- return context
- .getNodesByName(last)
- .some((n) => (n.kind === 'class' || n.kind === 'struct') && n.language === ref.language);
- }
- /**
- * Infer the class produced by a C++ call/construction expression, using return
- * types captured at extraction (#645). Handles, in order:
- * - `make_unique<T>()` / `make_shared<T>()` → T
- * - single-level member call `recv.method()` → recv's type, then method's return
- * - `Class::method()` / free `func()` → the callee's recorded return type
- * - direct construction `Type()` / `ns::Type()` → Type
- * Returns null when undeterminable. Callers MUST still validate the outer method
- * exists on the result before creating an edge, so a wrong guess stays silent.
- */
- function resolveCppCallResultType(
- inner: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- depth = 0,
- ): string | null {
- if (depth > 3) return null; // guard against pathological mutual recursion
- const expr = inner.trim();
- const make = expr.match(/(?:^|::)(?:make_unique|make_shared)\s*<\s*([A-Za-z_]\w*)/);
- if (make) return make[1] ?? null;
- // Single-level member call `recv.method` (the `manager.view().render()` shape).
- const dotIdx = expr.lastIndexOf('.');
- if (dotIdx > 0) {
- const recv = expr.slice(0, dotIdx);
- const method = expr.slice(dotIdx + 1);
- if (recv.includes('.') || recv.includes('(') || recv.includes('::')) return null; // single level only
- const recvType = inferCppReceiverType(recv, ref, context, depth + 1);
- if (!recvType) return null;
- return lookupCalleeReturnType(`${recvType}::${method}`, ref, context);
- }
- const ret = lookupCalleeReturnType(expr, ref, context);
- if (ret) return ret;
- // Direct construction — the callee itself names a class/struct.
- if (cppClassExists(expr, ref, context)) return cppLastSegment(expr);
- return null;
- }
- /**
- * Recover the type of an `auto`-declared local from its initializer on the
- * declaration line — `auto x = Foo::instance();`, `auto w = make_unique<W>();`,
- * `auto p = new W();`, `auto w = Widget();` (#645).
- */
- function inferCppAutoInitializerType(
- line: string,
- receiverName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- depth: number,
- ): string | null {
- const escaped = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
- const m = line.match(new RegExp(`\\b${escaped}\\b\\s*=\\s*([^;]+)`));
- if (!m || !m[1]) return null;
- const init = m[1].trim();
- const neu = init.match(/^new\s+([A-Za-z_][\w:]*)/);
- if (neu && neu[1]) return cppLastSegment(neu[1]);
- // A call or construction: `Foo(...)`, `A::b(...)`, `make_unique<T>(...)`.
- const call = init.match(/^([A-Za-z_][\w:]*(?:\s*<[^>;]*>)?)\s*\(/);
- if (call && call[1]) return resolveCppCallResultType(call[1].replace(/\s+/g, ''), ref, context, depth + 1);
- return null;
- }
- /**
- * Resolve a C++ chained call whose receiver is itself a call — encoded by the
- * extractor as `<innerCallee>().<method>` (#645). The receiver's type is what
- * the inner call returns; the outer method is then resolved and VALIDATED on it
- * (resolveMethodOnType requires `cls::method` to exist), so a wrong inference
- * produces no edge rather than a wrong one.
- */
- export function matchCppCallChain(
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): ResolvedRef | null {
- const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
- if (!m || !m[1] || !m[2]) return null;
- const cls = resolveCppCallResultType(m[1], ref, context);
- if (!cls) return null;
- return resolveMethodOnType(cls, m[2], ref, context, 0.85, 'instance-method');
- }
- /**
- * Resolve a `::`-scoped factory chain whose receiver is a scoped/static call —
- * PHP `Cls::for($x)->method()` (#608, the per-credential Laravel client idiom) or
- * Rust `Foo::new().bar()` (an associated-function call) — both encoded by the
- * extractor as `Cls::factory().method`. The receiver's type is what `Cls::factory`
- * returns: a `self` marker (PHP `: self`/`: static`, Rust `-> Self`) resolves to
- * the factory's own type, a concrete return type to that type. The outer method is
- * then resolved and VALIDATED on it (resolveMethodOnType requires the method to
- * exist on the type or a supertype it conforms to), so a wrong inference yields no
- * edge rather than a wrong one. Shared by the `::`-receiver languages (PHP, Rust).
- */
- export function matchScopedCallChain(
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): ResolvedRef | null {
- const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
- if (!m || !m[1] || !m[2]) return null;
- const inner = m[1];
- const method = m[2];
- if (!inner.includes('::')) return null; // only static-factory (`Cls::method`) chains
- const factoryClass = inner.slice(0, inner.lastIndexOf('::'));
- const ret = lookupCalleeReturnType(inner, ref, context);
- if (!ret) return null;
- // `self` (the extractor's marker for self/static/$this) → the factory's class.
- const resolvedClass = ret === 'self' ? factoryClass : ret;
- return resolveMethodOnType(resolvedClass, method, ref, context, 0.85, 'instance-method');
- }
- /**
- * Languages where an unprefixed capitalized call `Foo(args)` constructs the
- * class (so a `Foo(args).method()` receiver's type is `Foo`). Java/C# need `new`,
- * so a bare `Foo()` there is a method call, not construction — excluded. Scala's
- * `Foo(args)` is a case-class / companion `apply`, which conventionally returns
- * `Foo` — and resolveMethodOnType validates, so a non-conventional `apply` that
- * returns another type simply yields no edge rather than a wrong one. Pascal/Delphi:
- * a `TFoo(x)` is a TYPECAST whose result is a `TFoo`, so `TFoo(x).method()` resolves
- * the method on `TFoo` — same shape, same validation.
- */
- const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift', 'scala', 'dart', 'pascal']);
- /**
- * Resolve a dotted chained call whose receiver is a static factory / fluent call —
- * `Foo.getInstance().bar()`, encoded by the extractor as `Foo.getInstance().bar`
- * (#645/#608 mechanism). The receiver's type is what `Foo.getInstance` returns
- * (its declared return type); the outer method is then resolved and VALIDATED on
- * it (resolveMethodOnType requires `Type::method` to exist), so a wrong inference
- * yields no edge rather than a wrong one (e.g. a same-named `bar()` on an
- * unrelated class is never matched). Shared by the dot-notation languages
- * (Java, Kotlin, C#, Swift) — same receiver shape, same `Class::method` qualified names.
- */
- export function matchDottedCallChain(
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): ResolvedRef | null {
- const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
- if (!m || !m[1] || !m[2]) return null;
- const inner = m[1]; // `Foo.getInstance`
- const method = m[2]; // `bar`
- const lastDot = inner.lastIndexOf('.');
- if (lastDot <= 0) {
- // Go: bare package-level factory FUNCTION `New().method()` — the receiver's
- // type is what `New` returns; resolve the method on that.
- if (ref.language === 'go') {
- const ret = lookupCalleeReturnType(inner, ref, context);
- if (ret) {
- return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
- }
- // `inner` isn't a function with a captured return type — typically a
- // package-level VARIABLE holding a function value (e.g. gin's `engine()`),
- // whose type we can't recover. Fall back to bare-name resolution of the
- // method so we don't DROP an edge the un-re-encoded bare path would have
- // found. (When `inner` IS a real factory function but the method doesn't
- // exist on its return type, `ret` is truthy and we returned no edge above —
- // the absent-method safety guarantee is preserved.)
- //
- // CRITICAL: resolve the TARGET via a synthetic bare-name ref, but return the
- // match tied to the ORIGINAL `ref` (referenceName `inner().method`). The
- // batched resolver (resolveAndPersistBatched) reads unresolved rows from
- // offset 0 every pass and relies on deleteSpecificResolvedReferences —
- // keyed on referenceName — to clear each resolved row so the batch empties.
- // If we propagated the synthetic ref's bare `method` as `.original`, the
- // delete would never match the stored `inner().method` row, the batch would
- // never drain, and the loop would re-resolve + re-insert forever (a runaway
- // that grew gin's graph to 5M edges / 1.4 GB before this fix).
- const bareRef = { ...ref, referenceName: method };
- const bareMatch = matchByExactName(bareRef, context) ?? matchFuzzy(bareRef, context);
- return bareMatch ? { ...bareMatch, original: ref } : null;
- }
- // Constructor receiver `Foo(args).method()` (encoded `Foo().method`): a bare,
- // capitalized inner is a class construction, so the receiver's type is the
- // class itself — resolve the method on it. Only in languages where an
- // unprefixed capitalized call constructs the class (Kotlin, Swift); in Java/C#
- // a bare `Foo()` is a method call (constructors need `new`), so we must not
- // assume construction. A lowercase bare inner is a top-level `factory().method()`
- // whose type we can't recover — bail.
- if (!CONSTRUCTS_VIA_BARE_CALL.has(ref.language) || !/^[A-Z]/.test(inner)) return null;
- return resolveMethodOnType(inner, method, ref, context, 0.85, 'instance-method', importedFqnOf(inner, ref, context));
- }
- // Factory/fluent receiver `Receiver.factory(args).method()`: the receiver's
- // type is what `Receiver.factory` returns (its declared return type).
- const factoryClass = inner.slice(0, lastDot).split('.').pop(); // simple class name
- const factoryMethod = inner.slice(lastDot + 1);
- if (!factoryClass || !factoryMethod) return null;
- const ret = lookupCalleeReturnType(`${factoryClass}::${factoryMethod}`, ref, context);
- if (!ret) {
- // Objective-C: a class-message factory — `[X alloc]`, `[X new]`,
- // `[X sharedFoo]` — returns an instance of the RECEIVER class `X` by
- // convention (`instancetype`). So when the factory's own return type isn't
- // recoverable (its selector returns `instancetype`, or `alloc`/`new` aren't
- // user-defined nodes at all), the receiver's type is the class `X` itself.
- // This resolves the ubiquitous `[[X alloc] init]` and singleton chains.
- // resolveMethodOnType validates against X (and its supertypes), so a class
- // whose method actually lives elsewhere yields NO edge, not a wrong one — and
- // crucially this does NOT fire when a concrete return type WAS captured but
- // simply lacks the method (that already returned null above: absent-method
- // safety, so a same-named decoy is still never matched).
- if (ref.language === 'objc' && /^[A-Z]/.test(factoryClass)) {
- return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context));
- }
- // Pascal/Delphi: the extractor only re-encodes a `TFoo`/`IFoo`-prefixed chain
- // (the type-naming convention), so `factoryClass` is always a real class here.
- // A factory whose return type wasn't captured is a CONSTRUCTOR
- // (`TFileMem.Create().SetCachePerformance` — `constructor Create` has no `:
- // TBar` annotation but returns its own class) or an unannotated function. In
- // both cases the receiver's type is the class itself, so resolve the method on
- // `factoryClass`. resolveMethodOnType validates against it (and its
- // supertypes), so a wrong inference yields no edge — and this never fires when
- // a return type WAS captured but lacks the method (absent-method safety above).
- if (ref.language === 'pascal' && /^[TI]/.test(factoryClass)) {
- return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context));
- }
- return null;
- }
- return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
- }
- /**
- * When several classes share a simple type name, the caller file's import of
- * that type is the only signal that names WHICH one (#314). Returns the imported
- * FQN for `typeName` in the ref's file, or undefined.
- */
- function importedFqnOf(
- typeName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): string | undefined {
- const imports = context.getImportMappings(ref.filePath, ref.language);
- return imports.find((i) => i.localName === typeName)?.source;
- }
- /**
- * Java/Kotlin: infer a receiver's declared type by walking field declarations
- * in the class enclosing the call site. The field's `signature` is already in
- * the form "<TypeName> <fieldName>" (set by tree-sitter.ts extractField), so we
- * pull the type from there. Handles Spring `@Resource UserBO userbo;` /
- * `@Autowired private UserService userService;` where the receiver field name
- * doesn't match the class name by Java naming convention.
- *
- * Returns the bare type name (generics stripped, dotted package stripped) or
- * null when no matching field is in the enclosing class.
- */
- function inferJavaFieldReceiverType(
- receiverName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): string | null {
- const inFile = context.getNodesInFile(ref.filePath);
- if (inFile.length === 0) return null;
- // Find the class enclosing the call line (tightest match by latest start).
- let enclosing: Node | null = null;
- for (const n of inFile) {
- if (n.kind !== 'class' && n.kind !== 'interface') continue;
- if (n.language !== ref.language) continue;
- const end = n.endLine ?? n.startLine;
- if (n.startLine <= ref.line && end >= ref.line) {
- if (!enclosing || n.startLine >= enclosing.startLine) enclosing = n;
- }
- }
- if (!enclosing) return null;
- const enclosingEnd = enclosing.endLine ?? enclosing.startLine;
- const field = inFile.find(
- (n) =>
- n.kind === 'field' &&
- n.name === receiverName &&
- n.language === ref.language &&
- n.startLine >= enclosing.startLine &&
- (n.endLine ?? n.startLine) <= enclosingEnd,
- );
- if (!field || !field.signature) return null;
- // Signature shape: "<TypeName> <fieldName>" (extractField). Pull the type,
- // strip generics + dotted package, drop array/varargs markers.
- const beforeName = field.signature.slice(
- 0,
- field.signature.lastIndexOf(field.name),
- );
- const typeRaw = beforeName.trim();
- if (!typeRaw) return null;
- const typeNoGenerics = typeRaw.replace(/<[^>]*>/g, '').trim();
- const typeNoArray = typeNoGenerics.replace(/\[\s*\]/g, '').replace(/\.\.\.$/, '').trim();
- const parts = typeNoArray.split(/[.\s]+/).filter(Boolean);
- const lastPart = parts[parts.length - 1];
- if (!lastPart) return null;
- if (!/^[A-Z]/.test(lastPart)) return null; // primitives / lowercase → skip
- return lastPart;
- }
- // ── Local-variable receiver-type inference (#1108) ──────────────────────────
- //
- // Instance calls through a local variable (`const lg = new Logger(); lg.log()`)
- // only resolved in C++ before this — no other language could learn the
- // receiver's type. Local variables are not indexed as nodes (node-explosion),
- // so, like the C++ inferrer above, we read the enclosing function's source and
- // match the receiver's declaration/initializer to recover its type. The type is
- // then handed to resolveMethodOnType, which VALIDATES that the type actually
- // declares the method, so a mis-inference produces NO edge — the safety net
- // that lets the patterns below stay simple. C++ keeps its dedicated inferrer
- // (header scan + `auto`); this covers every other language.
- // Tokens a loose pattern might capture that are never a user-defined type.
- const NON_TYPE_RECEIVER_TOKENS = new Set([
- 'this', 'self', 'super', 'new', 'return', 'await', 'yield', 'typeof',
- 'null', 'nil', 'None', 'true', 'false', 'True', 'False', 'undefined',
- ]);
- /**
- * Normalize a captured type expression to a simple type name: drop generic
- * args and pointer/ref markers, take the last `.`/`::`-qualified segment, and
- * reject obvious non-types.
- */
- function normalizeInferredTypeName(raw: string): string | null {
- const cleaned = raw.replace(/<[^>]*>/g, '').replace(/[&*]/g, '').trim();
- const seg = cleaned.split(/[.:]+/).filter(Boolean).pop();
- if (!seg) return null;
- if (NON_TYPE_RECEIVER_TOKENS.has(seg)) return null;
- return seg;
- }
- /**
- * Per-language patterns that recover a local variable's (or typed parameter's)
- * type from its declaration/initializer. Each regex captures the type in group
- * 1; `r` is the already-escaped receiver name. Ordered most-specific first.
- * PascalCase is required in the capture where the language convention allows,
- * as a cheap false-positive guard on top of resolveMethodOnType's validation.
- */
- function localReceiverTypePatterns(language: Language, r: string): RegExp[] {
- switch (language) {
- case 'typescript':
- case 'javascript':
- case 'tsx':
- case 'jsx':
- return [
- new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_$][\\w.$]*)`), // = new Logger()
- new RegExp(`\\b(?:const|let|var)\\s+${r}\\s*:\\s*([A-Z][\\w.$]*)`), // lg: Logger
- ];
- case 'python':
- return [
- new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // lg = Logger(...)
- new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // lg: Logger (PEP 526)
- ];
- case 'java':
- return [
- new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_][\\w.]*)`), // = new Logger()
- new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;,)]`), // Logger lg; / param
- ];
- case 'kotlin':
- return [
- new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // val lg = Logger(...)
- new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // val lg: Logger / param
- ];
- case 'csharp':
- return [
- new RegExp(`\\b${r}\\b\\s*=\\s*new\\s+([A-Za-z_][\\w.]*)`), // = new Logger()
- new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;,)]`), // Logger lg; / param
- ];
- case 'swift':
- return [
- new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // let lg = Logger(...)
- new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // let lg: Logger / param
- ];
- case 'rust':
- return [
- new RegExp(`\\blet\\s+(?:mut\\s+)?${r}\\b(?:\\s*:[^=]+)?=\\s*&?(?:mut\\s+)?([A-Z][\\w]*)`), // let lg = Logger::new()/Logger{}/Logger
- new RegExp(`\\blet\\s+(?:mut\\s+)?${r}\\s*:\\s*&?(?:mut\\s+)?([A-Z][\\w]*)`), // let lg: Logger
- ];
- case 'go':
- return [
- new RegExp(`\\b${r}\\b\\s*:=\\s*&?([A-Za-z_][\\w.]*)\\s*{`), // lg := Logger{} / &Logger{}
- new RegExp(`\\bvar\\s+${r}\\s+\\*?([A-Za-z_][\\w.]*)`), // var lg Logger / *Logger
- ];
- case 'ruby':
- return [
- new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w:]*)\\.new\\b`), // lg = Logger.new
- ];
- case 'scala':
- return [
- new RegExp(`\\b${r}\\b\\s*=\\s*(?:new\\s+)?([A-Z][\\w.]*)`), // val lg = new Logger / Logger(...)
- new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w.]*)`), // val lg: Logger / param
- ];
- case 'dart':
- return [
- new RegExp(`\\b${r}\\b\\s*=\\s*([A-Z][\\w.]*)\\s*\\(`), // var lg = Logger(...)
- new RegExp(`\\b([A-Z][\\w.]*)\\s+${r}\\b\\s*[=;]`), // Logger lg = ...
- ];
- case 'php':
- return [
- new RegExp(`\\$?${r}\\b\\s*=\\s*new\\s+([A-Za-z_\\\\][\\w\\\\]*)`), // $lg = new Logger()
- ];
- default:
- return [];
- }
- }
- /** 1-based start line of the tightest function/method enclosing the call. */
- function enclosingScopeStartLine(ref: UnresolvedRef, context: ResolutionContext): number {
- let start = 1;
- for (const n of context.getNodesInFile(ref.filePath)) {
- if (n.kind !== 'function' && n.kind !== 'method') continue;
- if (n.language !== ref.language) continue;
- const end = n.endLine ?? n.startLine;
- if (n.startLine <= ref.line && end >= ref.line && n.startLine >= start) {
- start = n.startLine;
- }
- }
- return start;
- }
- /**
- * Infer a receiver's type from its local declaration/initializer in the
- * enclosing function body. Language-dispatched; returns null for languages
- * without patterns or when no declaration is found. Bounded to the enclosing
- * scope so a same-named variable in another function can't leak in.
- */
- function inferLocalReceiverType(
- receiverName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): string | null {
- const patterns = localReceiverTypePatterns(
- ref.language,
- receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'),
- );
- if (patterns.length === 0) return null;
- const source = context.readFile(ref.filePath);
- if (!source) return null;
- const lines = source.split(/\r?\n/);
- const callIdx = Math.max(0, Math.min(lines.length - 1, ref.line - 1));
- const startIdx = Math.max(0, enclosingScopeStartLine(ref, context) - 1);
- // Nearest declaration wins: scan backward from the call to the scope start.
- for (let i = callIdx; i >= startIdx; i--) {
- const line = lines[i];
- if (!line) continue;
- for (const re of patterns) {
- const m = line.match(re);
- if (m && m[1]) {
- const type = normalizeInferredTypeName(m[1]);
- if (type) return type;
- }
- }
- }
- return null;
- }
- /**
- * Try to resolve by method name on a class/object
- */
- export function matchMethodCall(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // Parse method call patterns like "obj.method" or "Class::method". The method
- // part allows trailing `:` keywords so Objective-C selectors resolve
- // (`SDImageCache.storeImage:`, `obj.setX:y:`); colons never appear in other
- // languages' method refs, so this is a no-op for them.
- // The receiver allows dots (`builder.Services.AddCoreServices`) so a CHAINED
- // call resolves by its last segment — Strategy 3 below name-matches the method
- // (with its existing single-candidate / receiver-overlap guards). Without this
- // a multi-dot extension-method call (C# DI `builder.Services.AddCoreServices()`,
- // `Guard.Against.X()`) matched no pattern and never resolved.
- const dotMatch = ref.referenceName.match(/^([\w.]+)\.(\w+:?(?:\w+:)*)$/);
- const colonMatch = ref.referenceName.match(/^(\w+)::(\w+)$/);
- const match = dotMatch || colonMatch;
- if (!match) {
- return null;
- }
- const [, objectOrClass, methodName] = match;
- // Infer the receiver's type from its local declaration/initializer in the
- // enclosing scope, then resolve the method on that type (#1108). C++ keeps its
- // dedicated inferrer (header scan + `auto`); every other language uses the
- // shared source-based inferrer. resolveMethodOnType validates the method
- // exists on the inferred type, so a mis-inference produces no edge.
- if (dotMatch) {
- const inferredType =
- ref.language === 'cpp'
- ? inferCppReceiverType(objectOrClass!, ref, context)
- : inferLocalReceiverType(objectOrClass!, ref, context);
- if (inferredType) {
- // Java/Kotlin: when two classes share the simple name, the file's import
- // pins WHICH one (#314). Other languages disambiguate by call-site file.
- const importedFqn =
- ref.language === 'java' || ref.language === 'kotlin'
- ? context
- .getImportMappings(ref.filePath, ref.language)
- .find((i) => i.localName === inferredType)?.source
- : undefined;
- const typedMatch = resolveMethodOnType(
- inferredType,
- methodName!,
- ref,
- context,
- 0.9,
- 'instance-method',
- importedFqn,
- );
- if (typedMatch) {
- return typedMatch;
- }
- }
- }
- // Java/Kotlin: receiver may be a field whose name doesn't match the type by
- // Java naming convention (`userbo` → class `UserBO`, abbreviated). Look up
- // the field in the enclosing class to get its declared type, then resolve
- // the method on that type. Covers Spring `@Resource`/`@Autowired` field
- // injection where the field type is the concrete bean class.
- if ((ref.language === 'java' || ref.language === 'kotlin') && dotMatch) {
- const inferredType = inferJavaFieldReceiverType(objectOrClass!, ref, context);
- if (inferredType) {
- // When two classes share the same simple name, the caller file's
- // import is the only signal that names WHICH one — pass the
- // imported FQN so resolveMethodOnType can disambiguate (#314).
- const imports = context.getImportMappings(ref.filePath, ref.language);
- const importedFqn = imports.find((i) => i.localName === inferredType)?.source;
- const typedMatch = resolveMethodOnType(
- inferredType,
- methodName!,
- ref,
- context,
- 0.9,
- 'instance-method',
- importedFqn,
- );
- if (typedMatch) {
- return typedMatch;
- }
- }
- }
- // Strategy 1: Direct class name match (existing logic). When the receiver
- // names a class that exists in several files (`Logger.log()` / `Logger::log()`
- // with a `Logger` in both `a/` and `b/`), try the class in the call site's
- // own file first — otherwise the first-indexed class wins and a call in `b/`
- // resolves to `a/`'s method (#1079).
- const classCandidates = preferCallSiteFile(
- context.getNodesByName(objectOrClass!),
- ref.filePath,
- );
- for (const classNode of classCandidates) {
- if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
- // Skip cross-language class matches
- if (classNode.language !== ref.language) continue;
- const nodesInFile = context.getNodesInFile(classNode.filePath);
- const methodNode = nodesInFile.find(
- (n) =>
- n.kind === 'method' &&
- n.name === methodName &&
- n.qualifiedName.includes(classNode.name)
- );
- if (methodNode) {
- return {
- original: ref,
- targetNodeId: methodNode.id,
- confidence: 0.85,
- resolvedBy: 'qualified-name',
- };
- }
- }
- }
- // Strategy 2: Instance variable receiver - try capitalized form to find class
- // e.g., "permissionEngine" → look for classes containing "PermissionEngine"
- const capitalizedReceiver = objectOrClass!.charAt(0).toUpperCase() + objectOrClass!.slice(1);
- if (capitalizedReceiver !== objectOrClass) {
- const fuzzyClassCandidates = preferCallSiteFile(
- context.getNodesByName(capitalizedReceiver),
- ref.filePath,
- );
- for (const classNode of fuzzyClassCandidates) {
- if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
- // Skip cross-language class matches
- if (classNode.language !== ref.language) continue;
- const nodesInFile = context.getNodesInFile(classNode.filePath);
- const methodNode = nodesInFile.find(
- (n) =>
- n.kind === 'method' &&
- n.name === methodName &&
- n.qualifiedName.includes(classNode.name)
- );
- if (methodNode) {
- return {
- original: ref,
- targetNodeId: methodNode.id,
- confidence: 0.8,
- resolvedBy: 'instance-method',
- };
- }
- }
- }
- }
- // Strategy 3: Find methods by name across the codebase, match by receiver
- // name similarity with the containing class. Handles abbreviated variable
- // names like permissionEngine → PermissionRuleEngine.
- if (methodName) {
- const methodCandidates = context.getNodesByName(methodName!);
- // Ubiquitous-method ceiling (#999): a method name re-declared across a
- // vendored theme/SDK (Metronic's `init`/`update`/… on every widget) yields
- // K candidates that receiver-word overlap can't reliably disambiguate —
- // and filtering + scoring all K per call is the O(K²) cost that wedged
- // "Resolving refs" for 15-28 min. Bail before the O(K) work; Strategy 1/2
- // (class-name match) already had their precise shot above.
- if (methodCandidates.length > AMBIGUOUS_NAME_CEILING) {
- return null;
- }
- const methods = methodCandidates.filter(
- (n) => n.kind === 'method' && n.name === methodName
- );
- // Filter to same-language candidates first
- const sameLanguageMethods = methods.filter(m => m.language === ref.language);
- const targetMethods = sameLanguageMethods.length > 0 ? sameLanguageMethods : methods;
- // If only one same-language method with this name exists, use it
- if (targetMethods.length === 1 && targetMethods[0]!.language === ref.language) {
- return {
- original: ref,
- targetNodeId: targetMethods[0]!.id,
- confidence: 0.7,
- resolvedBy: 'instance-method',
- };
- }
- // Multiple methods: score by receiver name word overlap with class name
- if (targetMethods.length > 1) {
- const receiverWords = splitCamelCase(objectOrClass!);
- let bestMatch: typeof targetMethods[0] | undefined;
- let bestScore = 0;
- // Same-file candidates first, so a score tie (`score > bestScore` keeps
- // the first seen) resolves to the call site's own file rather than the
- // first-indexed duplicate (#1079).
- for (const method of preferCallSiteFile(targetMethods, ref.filePath)) {
- const classWords = splitCamelCase(method.qualifiedName);
- let score = receiverWords.filter(w =>
- classWords.some(cw => cw.toLowerCase() === w.toLowerCase())
- ).length;
- // Bonus for same language
- if (method.language === ref.language) score += 1;
- if (score > bestScore) {
- bestScore = score;
- bestMatch = method;
- }
- }
- if (bestMatch && bestScore >= 2) {
- return {
- original: ref,
- targetNodeId: bestMatch.id,
- confidence: 0.65,
- resolvedBy: 'instance-method',
- };
- }
- }
- }
- return null;
- }
- /**
- * Split a camelCase or PascalCase string into words.
- */
- function splitCamelCase(str: string): string[] {
- return str.replace(/([a-z])([A-Z])/g, '$1 $2')
- .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
- .split(/[\s._:\/\\]+/)
- .filter(w => w.length > 1);
- }
- /**
- * Compute directory proximity from a pre-split list of directory segments
- * (`filePath1` minus its filename) and a second file path.
- * Returns a score based on the number of shared leading directory segments.
- * Higher score = closer in directory tree.
- *
- * Split into a pre-split variant because findBestMatch scores every candidate
- * against the SAME `ref.filePath`; re-splitting it per candidate was a hot spot
- * on large repos (#915), so the caller splits it once and passes the segments.
- */
- function pathProximityFromDirs(dir1: string[], filePath2: string): number {
- const dir2 = filePath2.split('/');
- dir2.pop(); // drop filename — matches the original slice(0, -1) on both paths
- let shared = 0;
- const limit = Math.min(dir1.length, dir2.length);
- for (let i = 0; i < limit; i++) {
- if (dir1[i] === dir2[i]) {
- shared++;
- } else {
- break;
- }
- }
- // Each shared directory segment contributes 15 points, capped at 80
- return Math.min(shared * 15, 80);
- }
- /**
- * Compute directory proximity between two file paths.
- * Returns a score based on the number of shared directory segments.
- */
- function computePathProximity(filePath1: string, filePath2: string): number {
- const dir1 = filePath1.split('/');
- dir1.pop();
- return pathProximityFromDirs(dir1, filePath2);
- }
- /**
- * Find the best matching node when there are multiple candidates
- */
- function findBestMatch(
- ref: UnresolvedRef,
- candidates: Node[],
- _context: ResolutionContext
- ): Node | null {
- // Prioritization rules:
- // 1. Same file > different file
- // 2. Directory proximity (same module/package > different module)
- // 3. Same language > different language
- // 4. Functions/methods > classes/types (for call references)
- // 5. Exported > non-exported
- let bestScore = -1;
- let bestNode: Node | null = null;
- // Split the ref's path once (it's the same across every candidate) instead of
- // re-splitting it inside computePathProximity per candidate (#915 hot spot).
- const refDirs = ref.filePath.split('/');
- refDirs.pop();
- // A same-language candidate ALWAYS outscores a cross-language one: same-language
- // scores at least +50 (language bonus), while a cross-language candidate maxes
- // out at +35 (−80 language, +80 proximity, +25 kind, +10 exported; it can never
- // be in the same file). So when any same-language candidate exists, skip the
- // cross-language ones — provably the same winner, without paying the per-candidate
- // scoring. Cuts the candidate set to same-language size on mixed front-end +
- // back-end repos (#915). When ALL candidates are cross-language (a legitimate
- // cross-language `calls` bridge), none are skipped and behavior is unchanged.
- const hasSameLanguage = candidates.some((c) => c.language === ref.language);
- for (const candidate of candidates) {
- if (hasSameLanguage && candidate.language !== ref.language) continue;
- let score = 0;
- // Same file bonus
- if (candidate.filePath === ref.filePath) {
- score += 100;
- }
- // Directory proximity bonus — strongly prefer same module/package
- score += pathProximityFromDirs(refDirs, candidate.filePath);
- // Language matching: strongly prefer same language, penalize cross-language
- if (candidate.language === ref.language) {
- score += 50;
- } else {
- score -= 80;
- }
- // For call references, prefer functions/methods
- if (ref.referenceKind === 'calls') {
- if (candidate.kind === 'function' || candidate.kind === 'method') {
- score += 25;
- }
- }
- // For instantiation references (`new Foo()`), prefer class-like
- // targets — without this, a function named `Foo` in another module
- // could outscore the actual class.
- if (ref.referenceKind === 'instantiates') {
- if (
- candidate.kind === 'class' ||
- candidate.kind === 'struct' ||
- candidate.kind === 'interface'
- ) {
- score += 25;
- }
- }
- // For decorator references (`@Foo`), prefer functions. Class
- // decorators (Python `@SomeClass`, Java annotation interfaces)
- // also resolve here, hence the smaller class bonus.
- if (ref.referenceKind === 'decorates') {
- if (candidate.kind === 'function' || candidate.kind === 'method') {
- score += 25;
- } else if (candidate.kind === 'class' || candidate.kind === 'interface') {
- score += 15;
- }
- }
- // Exported bonus
- if (candidate.isExported) {
- score += 10;
- }
- // Closer line number (within same file)
- if (candidate.filePath === ref.filePath && candidate.startLine) {
- const distance = Math.abs(candidate.startLine - ref.line);
- score += Math.max(0, 20 - distance / 10);
- }
- if (score > bestScore) {
- bestScore = score;
- bestNode = candidate;
- }
- }
- return bestNode;
- }
- /**
- * Fuzzy match - last resort with lower confidence
- */
- export function matchFuzzy(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- const lowerName = ref.referenceName.toLowerCase();
- // Use pre-built lowercase index for O(1) lookup instead of scanning all nodes
- const candidates = context.getNodesByLowerName(lowerName);
- // Filter to callable kinds only (function, method, class)
- const callableKinds = new Set(['function', 'method', 'class']);
- const callableCandidates = applyLanguageGate(candidates.filter((n) => callableKinds.has(n.kind)), ref);
- // Prefer same-language matches
- const sameLanguageCandidates = callableCandidates.filter(n => n.language === ref.language);
- const finalCandidates = sameLanguageCandidates.length > 0 ? sameLanguageCandidates : callableCandidates;
- if (finalCandidates.length === 1) {
- const isCrossLanguage = finalCandidates[0]!.language !== ref.language;
- return {
- original: ref,
- targetNodeId: finalCandidates[0]!.id,
- confidence: isCrossLanguage ? 0.3 : 0.5,
- resolvedBy: 'fuzzy',
- };
- }
- return null;
- }
- /**
- * Match all strategies in order of confidence
- */
- export function matchReference(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // Function-as-value refs (#756) resolve ONLY through the dedicated matcher —
- // never the fuzzy/qualified fallthrough below (a wrong callback edge is
- // worse than none).
- if (ref.referenceKind === 'function_ref') {
- return matchFunctionRef(ref, context);
- }
- // Try strategies in order of confidence
- let result: ResolvedRef | null;
- // 0. File path match (e.g., "snippets/drawer-menu.liquid" → file node)
- result = matchByFilePath(ref, context);
- if (result) return result;
- // 1. Qualified name match (highest confidence)
- result = matchByQualifiedName(ref, context);
- if (result) return result;
- // 1b. C++ chained call whose receiver is another call — `Foo::instance().bar()`
- // encoded as `Foo::instance().bar` by the extractor (#645). Resolve the
- // receiver's type from what the inner call returns, then the method on it.
- if (ref.language === 'cpp' || ref.language === 'c') {
- result = matchCppCallChain(ref, context);
- if (result) return result;
- }
- // 1c. `::`-scoped factory chain — PHP `Cls::for($x)->method()` (#608) or Rust
- // `Foo::new().bar()`, both encoded as `Cls::factory().method`. The receiver's
- // type is the factory's `self` (PHP `: self`/`: static`, Rust `-> Self`) or
- // concrete return type.
- if (ref.language === 'php' || ref.language === 'rust') {
- result = matchScopedCallChain(ref, context);
- if (result) return result;
- }
- // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C# / Swift /
- // Go / Scala / Dart / Objective-C) — `Foo.getInstance().bar()` encoded as
- // `Foo.getInstance().bar`, Go's bare-factory `New().Method()` as `New().Method`,
- // Scala's companion factory, Dart's static factory / factory-constructor, or
- // ObjC's chained message send `[[Foo create] doIt]` encoded as `Foo.create().doIt`
- // (#645/#608 mechanism). Resolve the method's class from the inner call's
- // declared return type, then validate it.
- if (
- ref.language === 'java' ||
- ref.language === 'kotlin' ||
- ref.language === 'csharp' ||
- ref.language === 'swift' ||
- ref.language === 'go' ||
- ref.language === 'scala' ||
- ref.language === 'dart' ||
- ref.language === 'objc' ||
- ref.language === 'pascal'
- ) {
- result = matchDottedCallChain(ref, context);
- if (result) return result;
- }
- // 2. Method call pattern
- result = matchMethodCall(ref, context);
- if (result) return result;
- // 3. Exact name match
- result = matchByExactName(ref, context);
- if (result) return result;
- // 4. Fuzzy match (lowest confidence)
- result = matchFuzzy(ref, context);
- if (result) return result;
- return null;
- }
|