| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340 |
- /**
- * Name Matcher
- *
- * Handles symbol name matching for reference resolution.
- */
- import { Node } from '../types';
- import { UnresolvedRef, ResolvedRef, ResolutionContext } from './types';
- /**
- * Try to resolve a path-like reference (e.g., "snippets/drawer-menu.liquid")
- * by matching the filename against file nodes.
- */
- export function matchByFilePath(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // Path-like (`a/b.liquid`) OR a bare filename ending in a short extension
- // (`Foo.h` — an Objective-C `#import "Foo.h"`, resolved to the header by
- // basename). A bare ref WITHOUT an extension is a symbol name, not a file, so
- // leave it to the symbol-matching strategies.
- if (!ref.referenceName.includes('/') && !/\.[A-Za-z][A-Za-z0-9]{0,3}$/.test(ref.referenceName)) {
- return null;
- }
- // Extract the filename from the path
- const fileName = ref.referenceName.split('/').pop();
- if (!fileName) return null;
- // Search for file nodes with this name
- const candidates = context.getNodesByName(fileName);
- const fileNodes = candidates.filter(n => n.kind === 'file');
- if (fileNodes.length === 0) return null;
- // Prefer exact path match on qualified_name
- const exactMatch = fileNodes.find(n => n.qualifiedName === ref.referenceName || n.filePath === ref.referenceName);
- if (exactMatch) {
- return {
- original: ref,
- targetNodeId: exactMatch.id,
- confidence: 0.95,
- resolvedBy: 'file-path',
- };
- }
- // Fall back to suffix match (e.g., ref="snippets/foo.liquid" matches
- // "src/snippets/foo.liquid"). When several files share the basename — a
- // `#include "RNCAsyncStorage.h"` with a same-named header on another platform
- // (windows/code/ vs apple/) — prefer the one in the includer's own directory,
- // then by directory proximity / same language family. A C/C++ include (and any
- // bare-filename import) resolves relative to the including file, not to an
- // arbitrary same-named header elsewhere in the tree.
- const suffixMatches = fileNodes.filter(
- n => n.qualifiedName.endsWith(ref.referenceName) || n.filePath.endsWith(ref.referenceName)
- );
- if (suffixMatches.length > 0) {
- return {
- original: ref,
- targetNodeId: pickClosestFileNode(suffixMatches, ref).id,
- confidence: 0.85,
- resolvedBy: 'file-path',
- };
- }
- // If only one file node with this name, use it with lower confidence
- if (fileNodes.length === 1) {
- return {
- original: ref,
- targetNodeId: fileNodes[0]!.id,
- confidence: 0.7,
- resolvedBy: 'file-path',
- };
- }
- return null;
- }
- /**
- * Among several file nodes that all match a bare include/import by basename,
- * pick the one closest to the referencing file: same directory first, then by
- * directory-tree proximity, with the same language family as a tiebreak. A
- * C/C++ `#include "X.h"` (and any bare-filename import) resolves relative to the
- * including file — not to an arbitrary same-named header on another platform.
- */
- function pickClosestFileNode(candidates: Node[], ref: UnresolvedRef): Node {
- const dirOf = (p: string): string => {
- const i = p.lastIndexOf('/');
- return i >= 0 ? p.slice(0, i) : '';
- };
- const refDir = dirOf(ref.filePath);
- const sameDir = candidates.filter((c) => dirOf(c.filePath) === refDir);
- const pool = sameDir.length > 0 ? sameDir : candidates;
- let best = pool[0]!;
- let bestScore = -Infinity;
- for (const c of pool) {
- const score =
- computePathProximity(ref.filePath, c.filePath) +
- (sameLanguageFamily(c.language, ref.language) ? 5 : 0);
- if (score > bestScore) {
- bestScore = score;
- best = c;
- }
- }
- return best;
- }
- /**
- * Language families that share a type system / runtime, so a same-language-only
- * reference may still resolve across them (a Kotlin `Foo.BAR` can name a Java
- * `Foo`). Anything not listed forms its own singleton family.
- */
- const LANGUAGE_FAMILY: Record<string, string> = {
- java: 'jvm', kotlin: 'jvm', scala: 'jvm',
- swift: 'apple', objc: 'apple',
- typescript: 'web', tsx: 'web', javascript: 'web', jsx: 'web',
- c: 'c', cpp: 'c',
- // Razor/Blazor markup names C# types — same family so `@model Foo` /
- // `<MyComponent/>` resolve to their `.cs` class through the cross-family gate.
- csharp: 'dotnet', razor: 'dotnet',
- };
- export function sameLanguageFamily(a: string, b: string): boolean {
- if (a === b) return true;
- const fa = LANGUAGE_FAMILY[a];
- return fa !== undefined && fa === LANGUAGE_FAMILY[b];
- }
- /**
- * True when `lang` belongs to a known multi-language family (jvm/apple/web/c).
- * Languages not listed (php, python, go, ruby, rust, dart, …) and config
- * formats (yaml/xml/blade) form their own singleton families and return
- * `false` — used to leave config↔code framework bridges (whose config side is
- * never a known programming-language family) out of the cross-family gate.
- */
- export function isKnownLanguageFamily(lang: string): boolean {
- return LANGUAGE_FAMILY[lang] !== undefined;
- }
- /**
- * True when `a` and `b` are two DIFFERENT *known* language families — the
- * signature of a coincidental cross-language name collision (a TS `import
- * React` matching a Swift `import React`, a C++ `#include "X.h"` matching a
- * same-named ObjC header on another platform). The both-*known* test is
- * deliberately weaker than {@link sameLanguageFamily}'s negation: a
- * single-file-component language that carries its own tag (`vue`/`svelte`)
- * importing a `.ts` module, or any singleton-family language (php/go/ruby/…),
- * returns `false` here and is left alone.
- */
- export function crossesKnownFamily(a: string, b: string): boolean {
- return isKnownLanguageFamily(a) && isKnownLanguageFamily(b) && !sameLanguageFamily(a, b);
- }
- /**
- * Drop cross-language candidates from a name lookup. Two regimes:
- * - `references` (type-usage): a type named in language X resolves to a
- * SAME-family type, never a coincidentally same-named symbol in another
- * language (the Android `BatteryManager` system class vs a JS one). Strict
- * same-family filter — cross-language communication is `calls`, not refs.
- * - `imports` (import binding): an `import`/`#include` never crosses two
- * KNOWN families (TS `import React` ↮ Swift `import React`). Weaker
- * both-known filter so `.vue`/`.svelte` (own tag) importing `.ts` survives.
- */
- function applyLanguageGate(candidates: Node[], ref: UnresolvedRef): Node[] {
- if (ref.referenceKind === 'references' || ref.referenceKind === 'function_ref') {
- return candidates.filter((c) => sameLanguageFamily(c.language, ref.language));
- }
- if (ref.referenceKind === 'imports') {
- return candidates.filter((c) => !crossesKnownFamily(c.language, ref.language));
- }
- return candidates;
- }
- /**
- * Resolve a function-as-value reference (#756) — a function name used as a
- * callback/function-pointer value (`register(handler)`, `o->cb = handler`,
- * `{ .cb = handler }`, `signal(SIGINT, handler)`). The ONLY strategy allowed
- * for `function_ref` refs: exact name, function/method targets only, same
- * language family, same-file first, and cross-file only when the match is
- * UNIQUE. No fuzzy fallback, no qualified-name walking — a wrong callback
- * edge is worse than none.
- */
- export function matchFunctionRef(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // `this.<member>` refs are resolved ONLY by the class-scoped resolver in
- // resolveOne (resolveThisMemberFnRef) — never by name matching here.
- if (ref.referenceName.startsWith('this.')) return null;
- // In JS/TS/Python a bare identifier can never be a method value (methods
- // are only reachable through a receiver — `this.m` / `self.m` /
- // `Cls.m`), so bare fn-refs match FUNCTIONS only. This also sidesteps the
- // pre-existing TS quirk of class fields extracting as method-kind nodes,
- // which otherwise soaked up local names passed as arguments (excalidraw
- // A/B finding; same pattern in vendored docopt.py). Python's `self.m`
- // form keeps method targets via its own capture shape. C++ likewise: a
- // bare identifier can only be a FREE function (member values need
- // `&Cls::method`). PHP string callables name global FUNCTIONS (methods
- // need the `[$obj, 'm']` array form, which carries its own shape). Other
- // languages keep method targets: C# method groups, Swift/Dart
- // implicit-self, Java/Kotlin method references.
- const bareFnOnly =
- ref.language === 'typescript' || ref.language === 'tsx' ||
- ref.language === 'javascript' || ref.language === 'jsx' ||
- ref.language === 'cpp' || ref.language === 'python' ||
- ref.language === 'php';
- // Qualified member-pointer (`&Widget::on_click` → "Widget::on_click"):
- // resolve the member ON THAT SCOPE — exempt from bareFnOnly (the `&Cls::m`
- // shape is an explicit member reference). Unique-or-drop like everything else.
- if (ref.referenceName.includes('::')) {
- const memberName = ref.referenceName.slice(ref.referenceName.lastIndexOf('::') + 2);
- const scoped = context
- .getNodesByName(memberName)
- .filter(
- (n) =>
- (n.kind === 'function' || n.kind === 'method') &&
- sameLanguageFamily(n.language, ref.language) &&
- n.id !== ref.fromNodeId &&
- (n.qualifiedName === ref.referenceName ||
- n.qualifiedName.endsWith(`::${ref.referenceName}`))
- );
- if (scoped.length === 0) return null;
- const sameFileScoped = scoped.filter((n) => n.filePath === ref.filePath);
- const pool = sameFileScoped.length > 0 ? sameFileScoped : scoped;
- if (sameFileScoped.length === 0 && scoped.length > 1) return null;
- const target = pool.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
- return {
- original: ref,
- targetNodeId: target.id,
- confidence: 0.9,
- resolvedBy: 'function-ref',
- };
- }
- let candidates = context
- .getNodesByName(ref.referenceName)
- .filter(
- (n) =>
- (n.kind === 'function' || (!bareFnOnly && n.kind === 'method')) &&
- sameLanguageFamily(n.language, ref.language) &&
- n.id !== ref.fromNodeId // a function registering itself is not a dependency edge
- );
- if (candidates.length === 0) return null;
- // Swift implicit-self: a bare identifier can name a METHOD only of the
- // ENCLOSING type (`Button(action: handleTap)` written inside that type) —
- // a same-named method on any OTHER class is a parameter collision
- // (Alamofire: a `request` parameter resolving to EventMonitor::request).
- // Scope method candidates to the from-symbol's type; top-level code has no
- // implicit self, so method targets are excluded there entirely. Free
- // functions are unaffected.
- if (ref.language === 'swift' && candidates.some((n) => n.kind === 'method')) {
- const fromNode = context.getNodeById?.(ref.fromNodeId);
- const sep = fromNode ? fromNode.qualifiedName.lastIndexOf('::') : -1;
- const classPrefix = fromNode && sep > 0 ? fromNode.qualifiedName.slice(0, sep) : null;
- candidates = candidates.filter((n) => {
- if (n.kind !== 'method') return true;
- if (!classPrefix) return false;
- const mSep = n.qualifiedName.lastIndexOf('::');
- if (mSep <= 0) return false;
- const methodPrefix = n.qualifiedName.slice(0, mSep);
- // Accept exact-scope matches plus suffix relationships either way, so
- // extension-declared members (`Holder::m`) still match a nested
- // from-scope (`Module::Holder::wire`) and vice versa.
- return (
- methodPrefix === classPrefix ||
- methodPrefix.endsWith(`::${classPrefix}`) ||
- classPrefix.endsWith(`::${methodPrefix}`)
- );
- });
- if (candidates.length === 0) return null;
- }
- // Same-file definition wins — the extraction gate guarantees most survivors
- // have one, and it's the dominant C pattern (static callback registered in
- // a same-file ops struct).
- const sameFile = candidates.filter((n) => n.filePath === ref.filePath);
- if (sameFile.length > 0) {
- // Swift: several same-named METHODS in one file is an API overload family
- // (`Session.request(...)` × N), and a bare identifier hitting it is almost
- // always a same-named parameter, not a method value (Alamofire A/B
- // finding) — refuse rather than guess. A single method (SwiftUI's
- // `action: handleTap`) still resolves.
- if (
- ref.language === 'swift' &&
- sameFile.length > 1 &&
- sameFile.every((n) => n.kind === 'method')
- ) {
- return null;
- }
- // Same-name overloads in one file are the same conceptual symbol; pick
- // the first by position for determinism.
- const target = sameFile.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
- return {
- original: ref,
- targetNodeId: target.id,
- confidence: sameFile.length === 1 ? 0.95 : 0.9,
- resolvedBy: 'function-ref',
- };
- }
- // Cross-file (imported names the import resolver didn't already claim):
- // only an unambiguous match resolves.
- if (candidates.length === 1) {
- return {
- original: ref,
- targetNodeId: candidates[0]!.id,
- confidence: 0.8,
- resolvedBy: 'function-ref',
- };
- }
- return null;
- }
- /**
- * Try to resolve a reference by exact name match
- */
- export function matchByExactName(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- const candidates = applyLanguageGate(context.getNodesByName(ref.referenceName), ref);
- if (candidates.length === 0) {
- return null;
- }
- // If only one match, use it — but penalize cross-language matches
- if (candidates.length === 1) {
- const isCrossLanguage = candidates[0]!.language !== ref.language;
- return {
- original: ref,
- targetNodeId: candidates[0]!.id,
- confidence: isCrossLanguage ? 0.5 : 0.9,
- resolvedBy: 'exact-match',
- };
- }
- // Multiple matches - try to narrow down
- const bestMatch = findBestMatch(ref, candidates, context);
- if (bestMatch) {
- // Lower confidence when the match is from a distant/unrelated module
- const proximity = computePathProximity(ref.filePath, bestMatch.filePath);
- const confidence = proximity >= 30 ? 0.7 : 0.4;
- return {
- original: ref,
- targetNodeId: bestMatch.id,
- confidence,
- resolvedBy: 'exact-match',
- };
- }
- return null;
- }
- /**
- * Try to resolve by qualified name
- */
- export function matchByQualifiedName(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // Check if the reference name looks qualified (contains :: or .)
- if (!ref.referenceName.includes('::') && !ref.referenceName.includes('.')) {
- return null;
- }
- const candidates = context.getNodesByQualifiedName(ref.referenceName);
- if (candidates.length === 1) {
- return {
- original: ref,
- targetNodeId: candidates[0]!.id,
- confidence: 0.95,
- resolvedBy: 'qualified-name',
- };
- }
- // Try partial qualified name match
- const parts = ref.referenceName.split(/[:.]/);
- const lastName = parts[parts.length - 1];
- if (lastName) {
- const partialCandidates = context.getNodesByName(lastName);
- for (const candidate of partialCandidates) {
- if (candidate.qualifiedName.endsWith(ref.referenceName)) {
- return {
- original: ref,
- targetNodeId: candidate.id,
- confidence: 0.85,
- resolvedBy: 'qualified-name',
- };
- }
- }
- }
- return null;
- }
- function resolveMethodOnType(
- typeName: string,
- methodName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- confidence: number,
- resolvedBy: ResolvedRef['resolvedBy'],
- /**
- * Optional FQN that identifies WHICH class declaration `typeName`
- * refers to in the caller's file. When multiple candidates share
- * the same qualifiedName (`FooConverter::convert` in both
- * `dao/converter/` and `service/converter/`), the FQN's
- * file-path-suffix picks the right one — the disambiguation
- * signal Java imports carry but the call site doesn't (#314).
- */
- preferredFqn?: string,
- /** Recursion guard for the supertype/conformance walk. */
- depth = 0,
- ): ResolvedRef | null {
- // Look up methods by name and match by qualifiedName ending in
- // `<typeName>::<methodName>`. This works whether the method is defined
- // in-class (`class Foo { int bar() { ... } }`) or out-of-line in a separate
- // file (`int Foo::bar() { ... }` in foo.cpp while class Foo is in foo.hpp).
- // The previous same-file approach missed the latter — the typical C++ layout.
- const methodCandidates = context.getNodesByName(methodName);
- const want = `${typeName}::${methodName}`;
- const matches: Node[] = [];
- for (const m of methodCandidates) {
- if (m.kind !== 'method') continue;
- if (m.language !== ref.language) continue;
- const qn = m.qualifiedName;
- if (qn === want || qn.endsWith(`::${want}`)) {
- matches.push(m);
- }
- }
- if (matches.length === 0) {
- // Conformance fallback: the method may be defined on a supertype `typeName`
- // extends, or on a protocol / trait it conforms to (e.g. a Swift protocol-
- // extension method, a C# default-interface or extension method, a Kotlin
- // extension on a supertype). Walk supertypes transitively (depth-capped) via
- // the resolved implements/extends edges — empty in the first resolution pass,
- // populated in the conformance pass. Still VALIDATED (the method must exist on
- // a supertype), so a wrong inference produces no edge.
- if (depth < 4 && context.getSupertypes) {
- for (const supertype of context.getSupertypes(typeName, ref.language)) {
- const via = resolveMethodOnType(
- supertype, methodName, ref, context, confidence, resolvedBy, preferredFqn, depth + 1,
- );
- if (via) return via;
- }
- }
- return null;
- }
- if (matches.length > 1 && preferredFqn) {
- const ext = ref.language === 'kotlin' ? '.kt' : '.java';
- const fqnPath = preferredFqn.replace(/\./g, '/') + ext;
- const chosen = matches.find((m) => {
- const fp = m.filePath.replace(/\\/g, '/');
- return fp.endsWith(fqnPath) || fp.endsWith('/' + fqnPath);
- });
- if (chosen) {
- return {
- original: ref,
- targetNodeId: chosen.id,
- confidence,
- resolvedBy,
- };
- }
- }
- return {
- original: ref,
- targetNodeId: matches[0]!.id,
- confidence,
- resolvedBy,
- };
- }
- // C++ keywords/control-flow tokens that can appear right before a receiver
- // (e.g. `return ptr->m()`) and must NOT be treated as a type.
- const CPP_NON_TYPE_TOKENS = new Set([
- 'return', 'if', 'else', 'for', 'while', 'do', 'switch', 'case', 'default',
- 'break', 'continue', 'goto', 'throw', 'new', 'delete', 'co_await', 'co_yield',
- 'co_return', 'static_cast', 'const_cast', 'dynamic_cast', 'reinterpret_cast',
- 'sizeof', 'alignof', 'typeid', 'and', 'or', 'not', 'xor',
- ]);
- function normalizeCppTypeName(typeName: string): string | null {
- const normalized = typeName
- .replace(/\b(const|volatile|mutable|typename|class|struct)\b/g, ' ')
- .replace(/[&*]+/g, ' ')
- .replace(/<[^>]*>/g, ' ')
- .replace(/\s+/g, ' ')
- .trim();
- if (!normalized) return null;
- const parts = normalized.split(/::/).filter(Boolean);
- const last = parts[parts.length - 1];
- if (!last) return null;
- if (CPP_NON_TYPE_TOKENS.has(last)) return null;
- return last;
- }
- // Declarator regex: matches `Type receiver`, `Type* receiver`, `Type *receiver`,
- // `Type*receiver`, `Type<X> receiver`, etc., REQUIRING a declarator terminator
- // (`;`, `=`, `,`, `)`, `[`, `{`, `(`, or end-of-line) after the receiver. The
- // terminator rules out uses like `return receiver->m()` where the preceding
- // token is a keyword, not a type.
- function buildDeclaratorRegex(escapedReceiver: string): RegExp {
- return new RegExp(
- `([A-Za-z_][\\w:]*(?:\\s*<[^;=(){}]+>)?(?:\\s*[*&]+)?)\\s*\\b${escapedReceiver}\\b\\s*(?=[;=,)\\[{(]|$)`,
- );
- }
- function inferCppReceiverType(
- receiverName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- depth = 0,
- ): string | null {
- const source = context.readFile(ref.filePath);
- if (!source) return null;
- const lines = source.split(/\r?\n/);
- const callLineIndex = Math.max(0, Math.min(lines.length - 1, ref.line - 1));
- const escapedReceiver = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
- const receiverPattern = new RegExp(`\\b${escapedReceiver}\\b`);
- const declaratorRegex = buildDeclaratorRegex(escapedReceiver);
- for (let i = callLineIndex; i >= 0; i--) {
- const line = lines[i];
- if (!line || !receiverPattern.test(line)) continue;
- const declaratorMatch = line.match(declaratorRegex);
- if (declaratorMatch) {
- const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
- if (normalized === 'auto') {
- // `auto x = Foo::instance();` — the declared type is deduced; recover it
- // from the initializer (call return type / construction) (#645).
- const initType = inferCppAutoInitializerType(line, receiverName, ref, context, depth);
- if (initType) return initType;
- // No usable initializer on this line — keep scanning earlier ones.
- } else if (normalized) {
- return normalized;
- }
- }
- }
- const headerCandidates = [
- ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.h'),
- ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hpp'),
- ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hxx'),
- ].filter((candidate, index, arr) => arr.indexOf(candidate) === index && candidate !== ref.filePath);
- for (const headerPath of headerCandidates) {
- if (!context.fileExists(headerPath)) continue;
- const headerSource = context.readFile(headerPath);
- if (!headerSource) continue;
- for (const line of headerSource.split(/\r?\n/)) {
- if (!receiverPattern.test(line)) continue;
- const declaratorMatch = line.match(declaratorRegex);
- if (!declaratorMatch) continue;
- const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
- if (normalized && normalized !== 'auto') return normalized;
- }
- }
- return null;
- }
- /**
- * Last `::`-separated segment of a (possibly namespace-qualified) C++ name.
- */
- function cppLastSegment(name: string): string {
- const parts = name.split('::').filter(Boolean);
- return parts[parts.length - 1] ?? name;
- }
- /**
- * Return type captured at extraction for `Class::method` (or a free function),
- * read off the indexed node's `returnType` — used by the C++ (#645) and PHP
- * (#608) chained-call resolvers. Language-filtered. Null when not indexed or no
- * return type was recorded (a `void`/primitive return).
- */
- function lookupCalleeReturnType(
- callee: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): string | null {
- let method = callee;
- let cls: string | null = null;
- if (callee.includes('::')) {
- const parts = callee.split('::').filter(Boolean);
- method = parts[parts.length - 1] ?? callee;
- cls = parts.slice(0, -1).join('::');
- }
- const candidates = context.getNodesByName(method).filter(
- (n) =>
- (n.kind === 'method' || n.kind === 'function') &&
- n.language === ref.language &&
- !!n.returnType,
- );
- if (cls) {
- const want = `${cls}::${method}`;
- // The call site may name the class with MORE namespace qualification than
- // the stored node (`details::registry::instance` at the call vs
- // `registry::instance` on the node — the receiver type only carries the
- // immediate class), or LESS. Accept an exact match or either being a
- // namespace-suffix of the other; the shared `::<class>::<method>` tail keeps
- // it specific.
- const m = candidates.find(
- (n) =>
- n.qualifiedName === want ||
- n.qualifiedName.endsWith(`::${want}`) ||
- want.endsWith(`::${n.qualifiedName}`),
- );
- return m?.returnType ?? null;
- }
- return candidates.find((n) => n.kind === 'function')?.returnType ?? null;
- }
- /** Does the graph contain a class/struct named `name`'s last segment? */
- function cppClassExists(name: string, ref: UnresolvedRef, context: ResolutionContext): boolean {
- const last = cppLastSegment(name);
- return context
- .getNodesByName(last)
- .some((n) => (n.kind === 'class' || n.kind === 'struct') && n.language === ref.language);
- }
- /**
- * Infer the class produced by a C++ call/construction expression, using return
- * types captured at extraction (#645). Handles, in order:
- * - `make_unique<T>()` / `make_shared<T>()` → T
- * - single-level member call `recv.method()` → recv's type, then method's return
- * - `Class::method()` / free `func()` → the callee's recorded return type
- * - direct construction `Type()` / `ns::Type()` → Type
- * Returns null when undeterminable. Callers MUST still validate the outer method
- * exists on the result before creating an edge, so a wrong guess stays silent.
- */
- function resolveCppCallResultType(
- inner: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- depth = 0,
- ): string | null {
- if (depth > 3) return null; // guard against pathological mutual recursion
- const expr = inner.trim();
- const make = expr.match(/(?:^|::)(?:make_unique|make_shared)\s*<\s*([A-Za-z_]\w*)/);
- if (make) return make[1] ?? null;
- // Single-level member call `recv.method` (the `manager.view().render()` shape).
- const dotIdx = expr.lastIndexOf('.');
- if (dotIdx > 0) {
- const recv = expr.slice(0, dotIdx);
- const method = expr.slice(dotIdx + 1);
- if (recv.includes('.') || recv.includes('(') || recv.includes('::')) return null; // single level only
- const recvType = inferCppReceiverType(recv, ref, context, depth + 1);
- if (!recvType) return null;
- return lookupCalleeReturnType(`${recvType}::${method}`, ref, context);
- }
- const ret = lookupCalleeReturnType(expr, ref, context);
- if (ret) return ret;
- // Direct construction — the callee itself names a class/struct.
- if (cppClassExists(expr, ref, context)) return cppLastSegment(expr);
- return null;
- }
- /**
- * Recover the type of an `auto`-declared local from its initializer on the
- * declaration line — `auto x = Foo::instance();`, `auto w = make_unique<W>();`,
- * `auto p = new W();`, `auto w = Widget();` (#645).
- */
- function inferCppAutoInitializerType(
- line: string,
- receiverName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- depth: number,
- ): string | null {
- const escaped = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
- const m = line.match(new RegExp(`\\b${escaped}\\b\\s*=\\s*([^;]+)`));
- if (!m || !m[1]) return null;
- const init = m[1].trim();
- const neu = init.match(/^new\s+([A-Za-z_][\w:]*)/);
- if (neu && neu[1]) return cppLastSegment(neu[1]);
- // A call or construction: `Foo(...)`, `A::b(...)`, `make_unique<T>(...)`.
- const call = init.match(/^([A-Za-z_][\w:]*(?:\s*<[^>;]*>)?)\s*\(/);
- if (call && call[1]) return resolveCppCallResultType(call[1].replace(/\s+/g, ''), ref, context, depth + 1);
- return null;
- }
- /**
- * Resolve a C++ chained call whose receiver is itself a call — encoded by the
- * extractor as `<innerCallee>().<method>` (#645). The receiver's type is what
- * the inner call returns; the outer method is then resolved and VALIDATED on it
- * (resolveMethodOnType requires `cls::method` to exist), so a wrong inference
- * produces no edge rather than a wrong one.
- */
- export function matchCppCallChain(
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): ResolvedRef | null {
- const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
- if (!m || !m[1] || !m[2]) return null;
- const cls = resolveCppCallResultType(m[1], ref, context);
- if (!cls) return null;
- return resolveMethodOnType(cls, m[2], ref, context, 0.85, 'instance-method');
- }
- /**
- * Resolve a `::`-scoped factory chain whose receiver is a scoped/static call —
- * PHP `Cls::for($x)->method()` (#608, the per-credential Laravel client idiom) or
- * Rust `Foo::new().bar()` (an associated-function call) — both encoded by the
- * extractor as `Cls::factory().method`. The receiver's type is what `Cls::factory`
- * returns: a `self` marker (PHP `: self`/`: static`, Rust `-> Self`) resolves to
- * the factory's own type, a concrete return type to that type. The outer method is
- * then resolved and VALIDATED on it (resolveMethodOnType requires the method to
- * exist on the type or a supertype it conforms to), so a wrong inference yields no
- * edge rather than a wrong one. Shared by the `::`-receiver languages (PHP, Rust).
- */
- export function matchScopedCallChain(
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): ResolvedRef | null {
- const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
- if (!m || !m[1] || !m[2]) return null;
- const inner = m[1];
- const method = m[2];
- if (!inner.includes('::')) return null; // only static-factory (`Cls::method`) chains
- const factoryClass = inner.slice(0, inner.lastIndexOf('::'));
- const ret = lookupCalleeReturnType(inner, ref, context);
- if (!ret) return null;
- // `self` (the extractor's marker for self/static/$this) → the factory's class.
- const resolvedClass = ret === 'self' ? factoryClass : ret;
- return resolveMethodOnType(resolvedClass, method, ref, context, 0.85, 'instance-method');
- }
- /**
- * Languages where an unprefixed capitalized call `Foo(args)` constructs the
- * class (so a `Foo(args).method()` receiver's type is `Foo`). Java/C# need `new`,
- * so a bare `Foo()` there is a method call, not construction — excluded. Scala's
- * `Foo(args)` is a case-class / companion `apply`, which conventionally returns
- * `Foo` — and resolveMethodOnType validates, so a non-conventional `apply` that
- * returns another type simply yields no edge rather than a wrong one. Pascal/Delphi:
- * a `TFoo(x)` is a TYPECAST whose result is a `TFoo`, so `TFoo(x).method()` resolves
- * the method on `TFoo` — same shape, same validation.
- */
- const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift', 'scala', 'dart', 'pascal']);
- /**
- * Resolve a dotted chained call whose receiver is a static factory / fluent call —
- * `Foo.getInstance().bar()`, encoded by the extractor as `Foo.getInstance().bar`
- * (#645/#608 mechanism). The receiver's type is what `Foo.getInstance` returns
- * (its declared return type); the outer method is then resolved and VALIDATED on
- * it (resolveMethodOnType requires `Type::method` to exist), so a wrong inference
- * yields no edge rather than a wrong one (e.g. a same-named `bar()` on an
- * unrelated class is never matched). Shared by the dot-notation languages
- * (Java, Kotlin, C#, Swift) — same receiver shape, same `Class::method` qualified names.
- */
- export function matchDottedCallChain(
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): ResolvedRef | null {
- const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
- if (!m || !m[1] || !m[2]) return null;
- const inner = m[1]; // `Foo.getInstance`
- const method = m[2]; // `bar`
- const lastDot = inner.lastIndexOf('.');
- if (lastDot <= 0) {
- // Go: bare package-level factory FUNCTION `New().method()` — the receiver's
- // type is what `New` returns; resolve the method on that.
- if (ref.language === 'go') {
- const ret = lookupCalleeReturnType(inner, ref, context);
- if (ret) {
- return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
- }
- // `inner` isn't a function with a captured return type — typically a
- // package-level VARIABLE holding a function value (e.g. gin's `engine()`),
- // whose type we can't recover. Fall back to bare-name resolution of the
- // method so we don't DROP an edge the un-re-encoded bare path would have
- // found. (When `inner` IS a real factory function but the method doesn't
- // exist on its return type, `ret` is truthy and we returned no edge above —
- // the absent-method safety guarantee is preserved.)
- //
- // CRITICAL: resolve the TARGET via a synthetic bare-name ref, but return the
- // match tied to the ORIGINAL `ref` (referenceName `inner().method`). The
- // batched resolver (resolveAndPersistBatched) reads unresolved rows from
- // offset 0 every pass and relies on deleteSpecificResolvedReferences —
- // keyed on referenceName — to clear each resolved row so the batch empties.
- // If we propagated the synthetic ref's bare `method` as `.original`, the
- // delete would never match the stored `inner().method` row, the batch would
- // never drain, and the loop would re-resolve + re-insert forever (a runaway
- // that grew gin's graph to 5M edges / 1.4 GB before this fix).
- const bareRef = { ...ref, referenceName: method };
- const bareMatch = matchByExactName(bareRef, context) ?? matchFuzzy(bareRef, context);
- return bareMatch ? { ...bareMatch, original: ref } : null;
- }
- // Constructor receiver `Foo(args).method()` (encoded `Foo().method`): a bare,
- // capitalized inner is a class construction, so the receiver's type is the
- // class itself — resolve the method on it. Only in languages where an
- // unprefixed capitalized call constructs the class (Kotlin, Swift); in Java/C#
- // a bare `Foo()` is a method call (constructors need `new`), so we must not
- // assume construction. A lowercase bare inner is a top-level `factory().method()`
- // whose type we can't recover — bail.
- if (!CONSTRUCTS_VIA_BARE_CALL.has(ref.language) || !/^[A-Z]/.test(inner)) return null;
- return resolveMethodOnType(inner, method, ref, context, 0.85, 'instance-method', importedFqnOf(inner, ref, context));
- }
- // Factory/fluent receiver `Receiver.factory(args).method()`: the receiver's
- // type is what `Receiver.factory` returns (its declared return type).
- const factoryClass = inner.slice(0, lastDot).split('.').pop(); // simple class name
- const factoryMethod = inner.slice(lastDot + 1);
- if (!factoryClass || !factoryMethod) return null;
- const ret = lookupCalleeReturnType(`${factoryClass}::${factoryMethod}`, ref, context);
- if (!ret) {
- // Objective-C: a class-message factory — `[X alloc]`, `[X new]`,
- // `[X sharedFoo]` — returns an instance of the RECEIVER class `X` by
- // convention (`instancetype`). So when the factory's own return type isn't
- // recoverable (its selector returns `instancetype`, or `alloc`/`new` aren't
- // user-defined nodes at all), the receiver's type is the class `X` itself.
- // This resolves the ubiquitous `[[X alloc] init]` and singleton chains.
- // resolveMethodOnType validates against X (and its supertypes), so a class
- // whose method actually lives elsewhere yields NO edge, not a wrong one — and
- // crucially this does NOT fire when a concrete return type WAS captured but
- // simply lacks the method (that already returned null above: absent-method
- // safety, so a same-named decoy is still never matched).
- if (ref.language === 'objc' && /^[A-Z]/.test(factoryClass)) {
- return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context));
- }
- // Pascal/Delphi: the extractor only re-encodes a `TFoo`/`IFoo`-prefixed chain
- // (the type-naming convention), so `factoryClass` is always a real class here.
- // A factory whose return type wasn't captured is a CONSTRUCTOR
- // (`TFileMem.Create().SetCachePerformance` — `constructor Create` has no `:
- // TBar` annotation but returns its own class) or an unannotated function. In
- // both cases the receiver's type is the class itself, so resolve the method on
- // `factoryClass`. resolveMethodOnType validates against it (and its
- // supertypes), so a wrong inference yields no edge — and this never fires when
- // a return type WAS captured but lacks the method (absent-method safety above).
- if (ref.language === 'pascal' && /^[TI]/.test(factoryClass)) {
- return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context));
- }
- return null;
- }
- return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
- }
- /**
- * When several classes share a simple type name, the caller file's import of
- * that type is the only signal that names WHICH one (#314). Returns the imported
- * FQN for `typeName` in the ref's file, or undefined.
- */
- function importedFqnOf(
- typeName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): string | undefined {
- const imports = context.getImportMappings(ref.filePath, ref.language);
- return imports.find((i) => i.localName === typeName)?.source;
- }
- /**
- * Java/Kotlin: infer a receiver's declared type by walking field declarations
- * in the class enclosing the call site. The field's `signature` is already in
- * the form "<TypeName> <fieldName>" (set by tree-sitter.ts extractField), so we
- * pull the type from there. Handles Spring `@Resource UserBO userbo;` /
- * `@Autowired private UserService userService;` where the receiver field name
- * doesn't match the class name by Java naming convention.
- *
- * Returns the bare type name (generics stripped, dotted package stripped) or
- * null when no matching field is in the enclosing class.
- */
- function inferJavaFieldReceiverType(
- receiverName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): string | null {
- const inFile = context.getNodesInFile(ref.filePath);
- if (inFile.length === 0) return null;
- // Find the class enclosing the call line (tightest match by latest start).
- let enclosing: Node | null = null;
- for (const n of inFile) {
- if (n.kind !== 'class' && n.kind !== 'interface') continue;
- if (n.language !== ref.language) continue;
- const end = n.endLine ?? n.startLine;
- if (n.startLine <= ref.line && end >= ref.line) {
- if (!enclosing || n.startLine >= enclosing.startLine) enclosing = n;
- }
- }
- if (!enclosing) return null;
- const enclosingEnd = enclosing.endLine ?? enclosing.startLine;
- const field = inFile.find(
- (n) =>
- n.kind === 'field' &&
- n.name === receiverName &&
- n.language === ref.language &&
- n.startLine >= enclosing.startLine &&
- (n.endLine ?? n.startLine) <= enclosingEnd,
- );
- if (!field || !field.signature) return null;
- // Signature shape: "<TypeName> <fieldName>" (extractField). Pull the type,
- // strip generics + dotted package, drop array/varargs markers.
- const beforeName = field.signature.slice(
- 0,
- field.signature.lastIndexOf(field.name),
- );
- const typeRaw = beforeName.trim();
- if (!typeRaw) return null;
- const typeNoGenerics = typeRaw.replace(/<[^>]*>/g, '').trim();
- const typeNoArray = typeNoGenerics.replace(/\[\s*\]/g, '').replace(/\.\.\.$/, '').trim();
- const parts = typeNoArray.split(/[.\s]+/).filter(Boolean);
- const lastPart = parts[parts.length - 1];
- if (!lastPart) return null;
- if (!/^[A-Z]/.test(lastPart)) return null; // primitives / lowercase → skip
- return lastPart;
- }
- /**
- * Try to resolve by method name on a class/object
- */
- export function matchMethodCall(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // Parse method call patterns like "obj.method" or "Class::method". The method
- // part allows trailing `:` keywords so Objective-C selectors resolve
- // (`SDImageCache.storeImage:`, `obj.setX:y:`); colons never appear in other
- // languages' method refs, so this is a no-op for them.
- // The receiver allows dots (`builder.Services.AddCoreServices`) so a CHAINED
- // call resolves by its last segment — Strategy 3 below name-matches the method
- // (with its existing single-candidate / receiver-overlap guards). Without this
- // a multi-dot extension-method call (C# DI `builder.Services.AddCoreServices()`,
- // `Guard.Against.X()`) matched no pattern and never resolved.
- const dotMatch = ref.referenceName.match(/^([\w.]+)\.(\w+:?(?:\w+:)*)$/);
- const colonMatch = ref.referenceName.match(/^(\w+)::(\w+)$/);
- const match = dotMatch || colonMatch;
- if (!match) {
- return null;
- }
- const [, objectOrClass, methodName] = match;
- if (ref.language === 'cpp' && dotMatch) {
- const inferredType = inferCppReceiverType(objectOrClass!, ref, context);
- if (inferredType) {
- const typedMatch = resolveMethodOnType(
- inferredType,
- methodName!,
- ref,
- context,
- 0.9,
- 'instance-method',
- );
- if (typedMatch) {
- return typedMatch;
- }
- }
- }
- // Java/Kotlin: receiver may be a field whose name doesn't match the type by
- // Java naming convention (`userbo` → class `UserBO`, abbreviated). Look up
- // the field in the enclosing class to get its declared type, then resolve
- // the method on that type. Covers Spring `@Resource`/`@Autowired` field
- // injection where the field type is the concrete bean class.
- if ((ref.language === 'java' || ref.language === 'kotlin') && dotMatch) {
- const inferredType = inferJavaFieldReceiverType(objectOrClass!, ref, context);
- if (inferredType) {
- // When two classes share the same simple name, the caller file's
- // import is the only signal that names WHICH one — pass the
- // imported FQN so resolveMethodOnType can disambiguate (#314).
- const imports = context.getImportMappings(ref.filePath, ref.language);
- const importedFqn = imports.find((i) => i.localName === inferredType)?.source;
- const typedMatch = resolveMethodOnType(
- inferredType,
- methodName!,
- ref,
- context,
- 0.9,
- 'instance-method',
- importedFqn,
- );
- if (typedMatch) {
- return typedMatch;
- }
- }
- }
- // Strategy 1: Direct class name match (existing logic)
- const classCandidates = context.getNodesByName(objectOrClass!);
- for (const classNode of classCandidates) {
- if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
- // Skip cross-language class matches
- if (classNode.language !== ref.language) continue;
- const nodesInFile = context.getNodesInFile(classNode.filePath);
- const methodNode = nodesInFile.find(
- (n) =>
- n.kind === 'method' &&
- n.name === methodName &&
- n.qualifiedName.includes(classNode.name)
- );
- if (methodNode) {
- return {
- original: ref,
- targetNodeId: methodNode.id,
- confidence: 0.85,
- resolvedBy: 'qualified-name',
- };
- }
- }
- }
- // Strategy 2: Instance variable receiver - try capitalized form to find class
- // e.g., "permissionEngine" → look for classes containing "PermissionEngine"
- const capitalizedReceiver = objectOrClass!.charAt(0).toUpperCase() + objectOrClass!.slice(1);
- if (capitalizedReceiver !== objectOrClass) {
- const fuzzyClassCandidates = context.getNodesByName(capitalizedReceiver);
- for (const classNode of fuzzyClassCandidates) {
- if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
- // Skip cross-language class matches
- if (classNode.language !== ref.language) continue;
- const nodesInFile = context.getNodesInFile(classNode.filePath);
- const methodNode = nodesInFile.find(
- (n) =>
- n.kind === 'method' &&
- n.name === methodName &&
- n.qualifiedName.includes(classNode.name)
- );
- if (methodNode) {
- return {
- original: ref,
- targetNodeId: methodNode.id,
- confidence: 0.8,
- resolvedBy: 'instance-method',
- };
- }
- }
- }
- }
- // Strategy 3: Find methods by name across the codebase, match by receiver
- // name similarity with the containing class. Handles abbreviated variable
- // names like permissionEngine → PermissionRuleEngine.
- if (methodName) {
- const methodCandidates = context.getNodesByName(methodName!);
- const methods = methodCandidates.filter(
- (n) => n.kind === 'method' && n.name === methodName
- );
- // Filter to same-language candidates first
- const sameLanguageMethods = methods.filter(m => m.language === ref.language);
- const targetMethods = sameLanguageMethods.length > 0 ? sameLanguageMethods : methods;
- // If only one same-language method with this name exists, use it
- if (targetMethods.length === 1 && targetMethods[0]!.language === ref.language) {
- return {
- original: ref,
- targetNodeId: targetMethods[0]!.id,
- confidence: 0.7,
- resolvedBy: 'instance-method',
- };
- }
- // Multiple methods: score by receiver name word overlap with class name
- if (targetMethods.length > 1) {
- const receiverWords = splitCamelCase(objectOrClass!);
- let bestMatch: typeof targetMethods[0] | undefined;
- let bestScore = 0;
- for (const method of targetMethods) {
- const classWords = splitCamelCase(method.qualifiedName);
- let score = receiverWords.filter(w =>
- classWords.some(cw => cw.toLowerCase() === w.toLowerCase())
- ).length;
- // Bonus for same language
- if (method.language === ref.language) score += 1;
- if (score > bestScore) {
- bestScore = score;
- bestMatch = method;
- }
- }
- if (bestMatch && bestScore >= 2) {
- return {
- original: ref,
- targetNodeId: bestMatch.id,
- confidence: 0.65,
- resolvedBy: 'instance-method',
- };
- }
- }
- }
- return null;
- }
- /**
- * Split a camelCase or PascalCase string into words.
- */
- function splitCamelCase(str: string): string[] {
- return str.replace(/([a-z])([A-Z])/g, '$1 $2')
- .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
- .split(/[\s._:\/\\]+/)
- .filter(w => w.length > 1);
- }
- /**
- * Compute directory proximity between two file paths.
- * Returns a score based on the number of shared directory segments.
- * Higher score = closer in directory tree.
- */
- function computePathProximity(filePath1: string, filePath2: string): number {
- const dir1 = filePath1.split('/').slice(0, -1);
- const dir2 = filePath2.split('/').slice(0, -1);
- let shared = 0;
- for (let i = 0; i < Math.min(dir1.length, dir2.length); i++) {
- if (dir1[i] === dir2[i]) {
- shared++;
- } else {
- break;
- }
- }
- // Each shared directory segment contributes 15 points, capped at 80
- return Math.min(shared * 15, 80);
- }
- /**
- * Find the best matching node when there are multiple candidates
- */
- function findBestMatch(
- ref: UnresolvedRef,
- candidates: Node[],
- _context: ResolutionContext
- ): Node | null {
- // Prioritization rules:
- // 1. Same file > different file
- // 2. Directory proximity (same module/package > different module)
- // 3. Same language > different language
- // 4. Functions/methods > classes/types (for call references)
- // 5. Exported > non-exported
- let bestScore = -1;
- let bestNode: Node | null = null;
- for (const candidate of candidates) {
- let score = 0;
- // Same file bonus
- if (candidate.filePath === ref.filePath) {
- score += 100;
- }
- // Directory proximity bonus — strongly prefer same module/package
- score += computePathProximity(ref.filePath, candidate.filePath);
- // Language matching: strongly prefer same language, penalize cross-language
- if (candidate.language === ref.language) {
- score += 50;
- } else {
- score -= 80;
- }
- // For call references, prefer functions/methods
- if (ref.referenceKind === 'calls') {
- if (candidate.kind === 'function' || candidate.kind === 'method') {
- score += 25;
- }
- }
- // For instantiation references (`new Foo()`), prefer class-like
- // targets — without this, a function named `Foo` in another module
- // could outscore the actual class.
- if (ref.referenceKind === 'instantiates') {
- if (
- candidate.kind === 'class' ||
- candidate.kind === 'struct' ||
- candidate.kind === 'interface'
- ) {
- score += 25;
- }
- }
- // For decorator references (`@Foo`), prefer functions. Class
- // decorators (Python `@SomeClass`, Java annotation interfaces)
- // also resolve here, hence the smaller class bonus.
- if (ref.referenceKind === 'decorates') {
- if (candidate.kind === 'function' || candidate.kind === 'method') {
- score += 25;
- } else if (candidate.kind === 'class' || candidate.kind === 'interface') {
- score += 15;
- }
- }
- // Exported bonus
- if (candidate.isExported) {
- score += 10;
- }
- // Closer line number (within same file)
- if (candidate.filePath === ref.filePath && candidate.startLine) {
- const distance = Math.abs(candidate.startLine - ref.line);
- score += Math.max(0, 20 - distance / 10);
- }
- if (score > bestScore) {
- bestScore = score;
- bestNode = candidate;
- }
- }
- return bestNode;
- }
- /**
- * Fuzzy match - last resort with lower confidence
- */
- export function matchFuzzy(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- const lowerName = ref.referenceName.toLowerCase();
- // Use pre-built lowercase index for O(1) lookup instead of scanning all nodes
- const candidates = context.getNodesByLowerName(lowerName);
- // Filter to callable kinds only (function, method, class)
- const callableKinds = new Set(['function', 'method', 'class']);
- const callableCandidates = applyLanguageGate(candidates.filter((n) => callableKinds.has(n.kind)), ref);
- // Prefer same-language matches
- const sameLanguageCandidates = callableCandidates.filter(n => n.language === ref.language);
- const finalCandidates = sameLanguageCandidates.length > 0 ? sameLanguageCandidates : callableCandidates;
- if (finalCandidates.length === 1) {
- const isCrossLanguage = finalCandidates[0]!.language !== ref.language;
- return {
- original: ref,
- targetNodeId: finalCandidates[0]!.id,
- confidence: isCrossLanguage ? 0.3 : 0.5,
- resolvedBy: 'fuzzy',
- };
- }
- return null;
- }
- /**
- * Match all strategies in order of confidence
- */
- export function matchReference(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // Function-as-value refs (#756) resolve ONLY through the dedicated matcher —
- // never the fuzzy/qualified fallthrough below (a wrong callback edge is
- // worse than none).
- if (ref.referenceKind === 'function_ref') {
- return matchFunctionRef(ref, context);
- }
- // Try strategies in order of confidence
- let result: ResolvedRef | null;
- // 0. File path match (e.g., "snippets/drawer-menu.liquid" → file node)
- result = matchByFilePath(ref, context);
- if (result) return result;
- // 1. Qualified name match (highest confidence)
- result = matchByQualifiedName(ref, context);
- if (result) return result;
- // 1b. C++ chained call whose receiver is another call — `Foo::instance().bar()`
- // encoded as `Foo::instance().bar` by the extractor (#645). Resolve the
- // receiver's type from what the inner call returns, then the method on it.
- if (ref.language === 'cpp' || ref.language === 'c') {
- result = matchCppCallChain(ref, context);
- if (result) return result;
- }
- // 1c. `::`-scoped factory chain — PHP `Cls::for($x)->method()` (#608) or Rust
- // `Foo::new().bar()`, both encoded as `Cls::factory().method`. The receiver's
- // type is the factory's `self` (PHP `: self`/`: static`, Rust `-> Self`) or
- // concrete return type.
- if (ref.language === 'php' || ref.language === 'rust') {
- result = matchScopedCallChain(ref, context);
- if (result) return result;
- }
- // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C# / Swift /
- // Go / Scala / Dart / Objective-C) — `Foo.getInstance().bar()` encoded as
- // `Foo.getInstance().bar`, Go's bare-factory `New().Method()` as `New().Method`,
- // Scala's companion factory, Dart's static factory / factory-constructor, or
- // ObjC's chained message send `[[Foo create] doIt]` encoded as `Foo.create().doIt`
- // (#645/#608 mechanism). Resolve the method's class from the inner call's
- // declared return type, then validate it.
- if (
- ref.language === 'java' ||
- ref.language === 'kotlin' ||
- ref.language === 'csharp' ||
- ref.language === 'swift' ||
- ref.language === 'go' ||
- ref.language === 'scala' ||
- ref.language === 'dart' ||
- ref.language === 'objc' ||
- ref.language === 'pascal'
- ) {
- result = matchDottedCallChain(ref, context);
- if (result) return result;
- }
- // 2. Method call pattern
- result = matchMethodCall(ref, context);
- if (result) return result;
- // 3. Exact name match
- result = matchByExactName(ref, context);
- if (result) return result;
- // 4. Fuzzy match (lowest confidence)
- result = matchFuzzy(ref, context);
- if (result) return result;
- return null;
- }
|