| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712 |
- /**
- * Name Matcher
- *
- * Handles symbol name matching for reference resolution.
- */
- import { Node } from '../types';
- import { UnresolvedRef, ResolvedRef, ResolutionContext } from './types';
- /**
- * Try to resolve a path-like reference (e.g., "snippets/drawer-menu.liquid")
- * by matching the filename against file nodes.
- */
- export function matchByFilePath(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- if (!ref.referenceName.includes('/')) return null;
- // Extract the filename from the path
- const fileName = ref.referenceName.split('/').pop();
- if (!fileName) return null;
- // Search for file nodes with this name
- const candidates = context.getNodesByName(fileName);
- const fileNodes = candidates.filter(n => n.kind === 'file');
- if (fileNodes.length === 0) return null;
- // Prefer exact path match on qualified_name
- const exactMatch = fileNodes.find(n => n.qualifiedName === ref.referenceName || n.filePath === ref.referenceName);
- if (exactMatch) {
- return {
- original: ref,
- targetNodeId: exactMatch.id,
- confidence: 0.95,
- resolvedBy: 'file-path',
- };
- }
- // Fall back to suffix match (e.g., ref="snippets/foo.liquid" matches "src/snippets/foo.liquid")
- const suffixMatch = fileNodes.find(n => n.qualifiedName.endsWith(ref.referenceName) || n.filePath.endsWith(ref.referenceName));
- if (suffixMatch) {
- return {
- original: ref,
- targetNodeId: suffixMatch.id,
- confidence: 0.85,
- resolvedBy: 'file-path',
- };
- }
- // If only one file node with this name, use it with lower confidence
- if (fileNodes.length === 1) {
- return {
- original: ref,
- targetNodeId: fileNodes[0]!.id,
- confidence: 0.7,
- resolvedBy: 'file-path',
- };
- }
- return null;
- }
- /**
- * Try to resolve a reference by exact name match
- */
- export function matchByExactName(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- const candidates = context.getNodesByName(ref.referenceName);
- if (candidates.length === 0) {
- return null;
- }
- // If only one match, use it — but penalize cross-language matches
- if (candidates.length === 1) {
- const isCrossLanguage = candidates[0]!.language !== ref.language;
- return {
- original: ref,
- targetNodeId: candidates[0]!.id,
- confidence: isCrossLanguage ? 0.5 : 0.9,
- resolvedBy: 'exact-match',
- };
- }
- // Multiple matches - try to narrow down
- const bestMatch = findBestMatch(ref, candidates, context);
- if (bestMatch) {
- // Lower confidence when the match is from a distant/unrelated module
- const proximity = computePathProximity(ref.filePath, bestMatch.filePath);
- const confidence = proximity >= 30 ? 0.7 : 0.4;
- return {
- original: ref,
- targetNodeId: bestMatch.id,
- confidence,
- resolvedBy: 'exact-match',
- };
- }
- return null;
- }
- /**
- * Try to resolve by qualified name
- */
- export function matchByQualifiedName(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // Check if the reference name looks qualified (contains :: or .)
- if (!ref.referenceName.includes('::') && !ref.referenceName.includes('.')) {
- return null;
- }
- const candidates = context.getNodesByQualifiedName(ref.referenceName);
- if (candidates.length === 1) {
- return {
- original: ref,
- targetNodeId: candidates[0]!.id,
- confidence: 0.95,
- resolvedBy: 'qualified-name',
- };
- }
- // Try partial qualified name match
- const parts = ref.referenceName.split(/[:.]/);
- const lastName = parts[parts.length - 1];
- if (lastName) {
- const partialCandidates = context.getNodesByName(lastName);
- for (const candidate of partialCandidates) {
- if (candidate.qualifiedName.endsWith(ref.referenceName)) {
- return {
- original: ref,
- targetNodeId: candidate.id,
- confidence: 0.85,
- resolvedBy: 'qualified-name',
- };
- }
- }
- }
- return null;
- }
- function resolveMethodOnType(
- typeName: string,
- methodName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- confidence: number,
- resolvedBy: ResolvedRef['resolvedBy'],
- /**
- * Optional FQN that identifies WHICH class declaration `typeName`
- * refers to in the caller's file. When multiple candidates share
- * the same qualifiedName (`FooConverter::convert` in both
- * `dao/converter/` and `service/converter/`), the FQN's
- * file-path-suffix picks the right one — the disambiguation
- * signal Java imports carry but the call site doesn't (#314).
- */
- preferredFqn?: string,
- ): ResolvedRef | null {
- // Look up methods by name and match by qualifiedName ending in
- // `<typeName>::<methodName>`. This works whether the method is defined
- // in-class (`class Foo { int bar() { ... } }`) or out-of-line in a separate
- // file (`int Foo::bar() { ... }` in foo.cpp while class Foo is in foo.hpp).
- // The previous same-file approach missed the latter — the typical C++ layout.
- const methodCandidates = context.getNodesByName(methodName);
- const want = `${typeName}::${methodName}`;
- const matches: Node[] = [];
- for (const m of methodCandidates) {
- if (m.kind !== 'method') continue;
- if (m.language !== ref.language) continue;
- const qn = m.qualifiedName;
- if (qn === want || qn.endsWith(`::${want}`)) {
- matches.push(m);
- }
- }
- if (matches.length === 0) return null;
- if (matches.length > 1 && preferredFqn) {
- const ext = ref.language === 'kotlin' ? '.kt' : '.java';
- const fqnPath = preferredFqn.replace(/\./g, '/') + ext;
- const chosen = matches.find((m) => {
- const fp = m.filePath.replace(/\\/g, '/');
- return fp.endsWith(fqnPath) || fp.endsWith('/' + fqnPath);
- });
- if (chosen) {
- return {
- original: ref,
- targetNodeId: chosen.id,
- confidence,
- resolvedBy,
- };
- }
- }
- return {
- original: ref,
- targetNodeId: matches[0]!.id,
- confidence,
- resolvedBy,
- };
- }
- // C++ keywords/control-flow tokens that can appear right before a receiver
- // (e.g. `return ptr->m()`) and must NOT be treated as a type.
- const CPP_NON_TYPE_TOKENS = new Set([
- 'return', 'if', 'else', 'for', 'while', 'do', 'switch', 'case', 'default',
- 'break', 'continue', 'goto', 'throw', 'new', 'delete', 'co_await', 'co_yield',
- 'co_return', 'static_cast', 'const_cast', 'dynamic_cast', 'reinterpret_cast',
- 'sizeof', 'alignof', 'typeid', 'and', 'or', 'not', 'xor',
- ]);
- function normalizeCppTypeName(typeName: string): string | null {
- const normalized = typeName
- .replace(/\b(const|volatile|mutable|typename|class|struct)\b/g, ' ')
- .replace(/[&*]+/g, ' ')
- .replace(/<[^>]*>/g, ' ')
- .replace(/\s+/g, ' ')
- .trim();
- if (!normalized) return null;
- const parts = normalized.split(/::/).filter(Boolean);
- const last = parts[parts.length - 1];
- if (!last) return null;
- if (CPP_NON_TYPE_TOKENS.has(last)) return null;
- return last;
- }
- // Declarator regex: matches `Type receiver`, `Type* receiver`, `Type *receiver`,
- // `Type*receiver`, `Type<X> receiver`, etc., REQUIRING a declarator terminator
- // (`;`, `=`, `,`, `)`, `[`, `{`, `(`, or end-of-line) after the receiver. The
- // terminator rules out uses like `return receiver->m()` where the preceding
- // token is a keyword, not a type.
- function buildDeclaratorRegex(escapedReceiver: string): RegExp {
- return new RegExp(
- `([A-Za-z_][\\w:]*(?:\\s*<[^;=(){}]+>)?(?:\\s*[*&]+)?)\\s*\\b${escapedReceiver}\\b\\s*(?=[;=,)\\[{(]|$)`,
- );
- }
- function inferCppReceiverType(
- receiverName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): string | null {
- const source = context.readFile(ref.filePath);
- if (!source) return null;
- const lines = source.split(/\r?\n/);
- const callLineIndex = Math.max(0, Math.min(lines.length - 1, ref.line - 1));
- const escapedReceiver = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
- const receiverPattern = new RegExp(`\\b${escapedReceiver}\\b`);
- const declaratorRegex = buildDeclaratorRegex(escapedReceiver);
- for (let i = callLineIndex; i >= 0; i--) {
- const line = lines[i];
- if (!line || !receiverPattern.test(line)) continue;
- const declaratorMatch = line.match(declaratorRegex);
- if (declaratorMatch) {
- const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
- if (normalized) return normalized;
- }
- }
- const headerCandidates = [
- ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.h'),
- ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hpp'),
- ref.filePath.replace(/\.(?:c|cc|cpp|cxx)$/i, '.hxx'),
- ].filter((candidate, index, arr) => arr.indexOf(candidate) === index && candidate !== ref.filePath);
- for (const headerPath of headerCandidates) {
- if (!context.fileExists(headerPath)) continue;
- const headerSource = context.readFile(headerPath);
- if (!headerSource) continue;
- for (const line of headerSource.split(/\r?\n/)) {
- if (!receiverPattern.test(line)) continue;
- const declaratorMatch = line.match(declaratorRegex);
- if (!declaratorMatch) continue;
- const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
- if (normalized) return normalized;
- }
- }
- return null;
- }
- /**
- * Java/Kotlin: infer a receiver's declared type by walking field declarations
- * in the class enclosing the call site. The field's `signature` is already in
- * the form "<TypeName> <fieldName>" (set by tree-sitter.ts extractField), so we
- * pull the type from there. Handles Spring `@Resource UserBO userbo;` /
- * `@Autowired private UserService userService;` where the receiver field name
- * doesn't match the class name by Java naming convention.
- *
- * Returns the bare type name (generics stripped, dotted package stripped) or
- * null when no matching field is in the enclosing class.
- */
- function inferJavaFieldReceiverType(
- receiverName: string,
- ref: UnresolvedRef,
- context: ResolutionContext,
- ): string | null {
- const inFile = context.getNodesInFile(ref.filePath);
- if (inFile.length === 0) return null;
- // Find the class enclosing the call line (tightest match by latest start).
- let enclosing: Node | null = null;
- for (const n of inFile) {
- if (n.kind !== 'class' && n.kind !== 'interface') continue;
- if (n.language !== ref.language) continue;
- const end = n.endLine ?? n.startLine;
- if (n.startLine <= ref.line && end >= ref.line) {
- if (!enclosing || n.startLine >= enclosing.startLine) enclosing = n;
- }
- }
- if (!enclosing) return null;
- const enclosingEnd = enclosing.endLine ?? enclosing.startLine;
- const field = inFile.find(
- (n) =>
- n.kind === 'field' &&
- n.name === receiverName &&
- n.language === ref.language &&
- n.startLine >= enclosing.startLine &&
- (n.endLine ?? n.startLine) <= enclosingEnd,
- );
- if (!field || !field.signature) return null;
- // Signature shape: "<TypeName> <fieldName>" (extractField). Pull the type,
- // strip generics + dotted package, drop array/varargs markers.
- const beforeName = field.signature.slice(
- 0,
- field.signature.lastIndexOf(field.name),
- );
- const typeRaw = beforeName.trim();
- if (!typeRaw) return null;
- const typeNoGenerics = typeRaw.replace(/<[^>]*>/g, '').trim();
- const typeNoArray = typeNoGenerics.replace(/\[\s*\]/g, '').replace(/\.\.\.$/, '').trim();
- const parts = typeNoArray.split(/[.\s]+/).filter(Boolean);
- const lastPart = parts[parts.length - 1];
- if (!lastPart) return null;
- if (!/^[A-Z]/.test(lastPart)) return null; // primitives / lowercase → skip
- return lastPart;
- }
- /**
- * Try to resolve by method name on a class/object
- */
- export function matchMethodCall(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // Parse method call patterns like "obj.method" or "Class::method"
- const dotMatch = ref.referenceName.match(/^(\w+)\.(\w+)$/);
- const colonMatch = ref.referenceName.match(/^(\w+)::(\w+)$/);
- const match = dotMatch || colonMatch;
- if (!match) {
- return null;
- }
- const [, objectOrClass, methodName] = match;
- if (ref.language === 'cpp' && dotMatch) {
- const inferredType = inferCppReceiverType(objectOrClass!, ref, context);
- if (inferredType) {
- const typedMatch = resolveMethodOnType(
- inferredType,
- methodName!,
- ref,
- context,
- 0.9,
- 'instance-method',
- );
- if (typedMatch) {
- return typedMatch;
- }
- }
- }
- // Java/Kotlin: receiver may be a field whose name doesn't match the type by
- // Java naming convention (`userbo` → class `UserBO`, abbreviated). Look up
- // the field in the enclosing class to get its declared type, then resolve
- // the method on that type. Covers Spring `@Resource`/`@Autowired` field
- // injection where the field type is the concrete bean class.
- if ((ref.language === 'java' || ref.language === 'kotlin') && dotMatch) {
- const inferredType = inferJavaFieldReceiverType(objectOrClass!, ref, context);
- if (inferredType) {
- // When two classes share the same simple name, the caller file's
- // import is the only signal that names WHICH one — pass the
- // imported FQN so resolveMethodOnType can disambiguate (#314).
- const imports = context.getImportMappings(ref.filePath, ref.language);
- const importedFqn = imports.find((i) => i.localName === inferredType)?.source;
- const typedMatch = resolveMethodOnType(
- inferredType,
- methodName!,
- ref,
- context,
- 0.9,
- 'instance-method',
- importedFqn,
- );
- if (typedMatch) {
- return typedMatch;
- }
- }
- }
- // Strategy 1: Direct class name match (existing logic)
- const classCandidates = context.getNodesByName(objectOrClass!);
- for (const classNode of classCandidates) {
- if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
- // Skip cross-language class matches
- if (classNode.language !== ref.language) continue;
- const nodesInFile = context.getNodesInFile(classNode.filePath);
- const methodNode = nodesInFile.find(
- (n) =>
- n.kind === 'method' &&
- n.name === methodName &&
- n.qualifiedName.includes(classNode.name)
- );
- if (methodNode) {
- return {
- original: ref,
- targetNodeId: methodNode.id,
- confidence: 0.85,
- resolvedBy: 'qualified-name',
- };
- }
- }
- }
- // Strategy 2: Instance variable receiver - try capitalized form to find class
- // e.g., "permissionEngine" → look for classes containing "PermissionEngine"
- const capitalizedReceiver = objectOrClass!.charAt(0).toUpperCase() + objectOrClass!.slice(1);
- if (capitalizedReceiver !== objectOrClass) {
- const fuzzyClassCandidates = context.getNodesByName(capitalizedReceiver);
- for (const classNode of fuzzyClassCandidates) {
- if (classNode.kind === 'class' || classNode.kind === 'struct' || classNode.kind === 'interface') {
- // Skip cross-language class matches
- if (classNode.language !== ref.language) continue;
- const nodesInFile = context.getNodesInFile(classNode.filePath);
- const methodNode = nodesInFile.find(
- (n) =>
- n.kind === 'method' &&
- n.name === methodName &&
- n.qualifiedName.includes(classNode.name)
- );
- if (methodNode) {
- return {
- original: ref,
- targetNodeId: methodNode.id,
- confidence: 0.8,
- resolvedBy: 'instance-method',
- };
- }
- }
- }
- }
- // Strategy 3: Find methods by name across the codebase, match by receiver
- // name similarity with the containing class. Handles abbreviated variable
- // names like permissionEngine → PermissionRuleEngine.
- if (methodName) {
- const methodCandidates = context.getNodesByName(methodName!);
- const methods = methodCandidates.filter(
- (n) => n.kind === 'method' && n.name === methodName
- );
- // Filter to same-language candidates first
- const sameLanguageMethods = methods.filter(m => m.language === ref.language);
- const targetMethods = sameLanguageMethods.length > 0 ? sameLanguageMethods : methods;
- // If only one same-language method with this name exists, use it
- if (targetMethods.length === 1 && targetMethods[0]!.language === ref.language) {
- return {
- original: ref,
- targetNodeId: targetMethods[0]!.id,
- confidence: 0.7,
- resolvedBy: 'instance-method',
- };
- }
- // Multiple methods: score by receiver name word overlap with class name
- if (targetMethods.length > 1) {
- const receiverWords = splitCamelCase(objectOrClass!);
- let bestMatch: typeof targetMethods[0] | undefined;
- let bestScore = 0;
- for (const method of targetMethods) {
- const classWords = splitCamelCase(method.qualifiedName);
- let score = receiverWords.filter(w =>
- classWords.some(cw => cw.toLowerCase() === w.toLowerCase())
- ).length;
- // Bonus for same language
- if (method.language === ref.language) score += 1;
- if (score > bestScore) {
- bestScore = score;
- bestMatch = method;
- }
- }
- if (bestMatch && bestScore >= 2) {
- return {
- original: ref,
- targetNodeId: bestMatch.id,
- confidence: 0.65,
- resolvedBy: 'instance-method',
- };
- }
- }
- }
- return null;
- }
- /**
- * Split a camelCase or PascalCase string into words.
- */
- function splitCamelCase(str: string): string[] {
- return str.replace(/([a-z])([A-Z])/g, '$1 $2')
- .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
- .split(/[\s._:\/\\]+/)
- .filter(w => w.length > 1);
- }
- /**
- * Compute directory proximity between two file paths.
- * Returns a score based on the number of shared directory segments.
- * Higher score = closer in directory tree.
- */
- function computePathProximity(filePath1: string, filePath2: string): number {
- const dir1 = filePath1.split('/').slice(0, -1);
- const dir2 = filePath2.split('/').slice(0, -1);
- let shared = 0;
- for (let i = 0; i < Math.min(dir1.length, dir2.length); i++) {
- if (dir1[i] === dir2[i]) {
- shared++;
- } else {
- break;
- }
- }
- // Each shared directory segment contributes 15 points, capped at 80
- return Math.min(shared * 15, 80);
- }
- /**
- * Find the best matching node when there are multiple candidates
- */
- function findBestMatch(
- ref: UnresolvedRef,
- candidates: Node[],
- _context: ResolutionContext
- ): Node | null {
- // Prioritization rules:
- // 1. Same file > different file
- // 2. Directory proximity (same module/package > different module)
- // 3. Same language > different language
- // 4. Functions/methods > classes/types (for call references)
- // 5. Exported > non-exported
- let bestScore = -1;
- let bestNode: Node | null = null;
- for (const candidate of candidates) {
- let score = 0;
- // Same file bonus
- if (candidate.filePath === ref.filePath) {
- score += 100;
- }
- // Directory proximity bonus — strongly prefer same module/package
- score += computePathProximity(ref.filePath, candidate.filePath);
- // Language matching: strongly prefer same language, penalize cross-language
- if (candidate.language === ref.language) {
- score += 50;
- } else {
- score -= 80;
- }
- // For call references, prefer functions/methods
- if (ref.referenceKind === 'calls') {
- if (candidate.kind === 'function' || candidate.kind === 'method') {
- score += 25;
- }
- }
- // For instantiation references (`new Foo()`), prefer class-like
- // targets — without this, a function named `Foo` in another module
- // could outscore the actual class.
- if (ref.referenceKind === 'instantiates') {
- if (
- candidate.kind === 'class' ||
- candidate.kind === 'struct' ||
- candidate.kind === 'interface'
- ) {
- score += 25;
- }
- }
- // For decorator references (`@Foo`), prefer functions. Class
- // decorators (Python `@SomeClass`, Java annotation interfaces)
- // also resolve here, hence the smaller class bonus.
- if (ref.referenceKind === 'decorates') {
- if (candidate.kind === 'function' || candidate.kind === 'method') {
- score += 25;
- } else if (candidate.kind === 'class' || candidate.kind === 'interface') {
- score += 15;
- }
- }
- // Exported bonus
- if (candidate.isExported) {
- score += 10;
- }
- // Closer line number (within same file)
- if (candidate.filePath === ref.filePath && candidate.startLine) {
- const distance = Math.abs(candidate.startLine - ref.line);
- score += Math.max(0, 20 - distance / 10);
- }
- if (score > bestScore) {
- bestScore = score;
- bestNode = candidate;
- }
- }
- return bestNode;
- }
- /**
- * Fuzzy match - last resort with lower confidence
- */
- export function matchFuzzy(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- const lowerName = ref.referenceName.toLowerCase();
- // Use pre-built lowercase index for O(1) lookup instead of scanning all nodes
- const candidates = context.getNodesByLowerName(lowerName);
- // Filter to callable kinds only (function, method, class)
- const callableKinds = new Set(['function', 'method', 'class']);
- const callableCandidates = candidates.filter((n) => callableKinds.has(n.kind));
- // Prefer same-language matches
- const sameLanguageCandidates = callableCandidates.filter(n => n.language === ref.language);
- const finalCandidates = sameLanguageCandidates.length > 0 ? sameLanguageCandidates : callableCandidates;
- if (finalCandidates.length === 1) {
- const isCrossLanguage = finalCandidates[0]!.language !== ref.language;
- return {
- original: ref,
- targetNodeId: finalCandidates[0]!.id,
- confidence: isCrossLanguage ? 0.3 : 0.5,
- resolvedBy: 'fuzzy',
- };
- }
- return null;
- }
- /**
- * Match all strategies in order of confidence
- */
- export function matchReference(
- ref: UnresolvedRef,
- context: ResolutionContext
- ): ResolvedRef | null {
- // Try strategies in order of confidence
- let result: ResolvedRef | null;
- // 0. File path match (e.g., "snippets/drawer-menu.liquid" → file node)
- result = matchByFilePath(ref, context);
- if (result) return result;
- // 1. Qualified name match (highest confidence)
- result = matchByQualifiedName(ref, context);
- if (result) return result;
- // 2. Method call pattern
- result = matchMethodCall(ref, context);
- if (result) return result;
- // 3. Exact name match
- result = matchByExactName(ref, context);
- if (result) return result;
- // 4. Fuzzy match (lowest confidence)
- result = matchFuzzy(ref, context);
- if (result) return result;
- return null;
- }
|