Просмотр исходного кода

fix: Prevent false cross-module edges in name-based resolution

Name matching was creating false `calls` edges between unrelated modules
in monorepos because `findBestMatch()` had no concept of directory
proximity — functions with common names (e.g. `navigate`) in different
apps scored identically and resolved to whichever came first.

Adds path proximity scoring (shared directory segments) so same-module
candidates strongly win over cross-boundary ones, and lowers confidence
for distant matches so import-based resolution takes precedence.

Closes #67

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Colby McHenry 2 месяцев назад
Родитель
Сommit
584bd94ecc
2 измененных файлов с 156 добавлено и 4 удалено
  1. 123 0
      __tests__/resolution.test.ts
  2. 33 4
      src/resolution/name-matcher.ts

+ 123 - 0
__tests__/resolution.test.ts

@@ -82,6 +82,129 @@ describe('Resolution Module', () => {
       expect(result?.resolvedBy).toBe('exact-match');
     });
 
+    it('should prefer same-module candidates over cross-module matches', () => {
+      // Simulates a Python monorepo where multiple apps define navigate()
+      const candidateA: Node = {
+        id: 'func:apps/app_a/src/server.py:navigate:10',
+        kind: 'function',
+        name: 'navigate',
+        qualifiedName: 'apps/app_a/src/server.py::navigate',
+        filePath: 'apps/app_a/src/server.py',
+        language: 'python',
+        startLine: 10,
+        endLine: 20,
+        startColumn: 0,
+        endColumn: 0,
+        updatedAt: Date.now(),
+      };
+
+      const candidateB: Node = {
+        id: 'func:apps/app_b/src/server.py:navigate:15',
+        kind: 'function',
+        name: 'navigate',
+        qualifiedName: 'apps/app_b/src/server.py::navigate',
+        filePath: 'apps/app_b/src/server.py',
+        language: 'python',
+        startLine: 15,
+        endLine: 25,
+        startColumn: 0,
+        endColumn: 0,
+        updatedAt: Date.now(),
+      };
+
+      const context: ResolutionContext = {
+        getNodesInFile: () => [],
+        getNodesByName: (name) => name === 'navigate' ? [candidateA, candidateB] : [],
+        getNodesByQualifiedName: () => [],
+        getNodesByKind: () => [],
+        fileExists: () => true,
+        readFile: () => null,
+        getProjectRoot: () => '/test',
+        getAllFiles: () => [],
+        getNodesByLowerName: () => [],
+        getImportMappings: () => [],
+      };
+
+      // Reference from app_a should resolve to app_a's navigate, not app_b's
+      const ref = {
+        fromNodeId: 'func:apps/app_a/src/handler.py:handler:5',
+        referenceName: 'navigate',
+        referenceKind: 'calls' as const,
+        line: 5,
+        column: 10,
+        filePath: 'apps/app_a/src/handler.py',
+        language: 'python' as const,
+      };
+
+      const result = matchReference(ref, context);
+
+      expect(result).not.toBeNull();
+      expect(result?.targetNodeId).toBe('func:apps/app_a/src/server.py:navigate:10');
+      expect(result?.resolvedBy).toBe('exact-match');
+    });
+
+    it('should lower confidence for cross-module exact matches', () => {
+      // Only one candidate but in a completely different module
+      const candidates: Node[] = [
+        {
+          id: 'func:apps/app_b/src/server.py:navigate:10',
+          kind: 'function',
+          name: 'navigate',
+          qualifiedName: 'apps/app_b/src/server.py::navigate',
+          filePath: 'apps/app_b/src/server.py',
+          language: 'python',
+          startLine: 10,
+          endLine: 20,
+          startColumn: 0,
+          endColumn: 0,
+          updatedAt: Date.now(),
+        },
+        {
+          id: 'func:apps/app_c/src/server.py:navigate:10',
+          kind: 'function',
+          name: 'navigate',
+          qualifiedName: 'apps/app_c/src/server.py::navigate',
+          filePath: 'apps/app_c/src/server.py',
+          language: 'python',
+          startLine: 10,
+          endLine: 20,
+          startColumn: 0,
+          endColumn: 0,
+          updatedAt: Date.now(),
+        },
+      ];
+
+      const context: ResolutionContext = {
+        getNodesInFile: () => [],
+        getNodesByName: (name) => name === 'navigate' ? candidates : [],
+        getNodesByQualifiedName: () => [],
+        getNodesByKind: () => [],
+        fileExists: () => true,
+        readFile: () => null,
+        getProjectRoot: () => '/test',
+        getAllFiles: () => [],
+        getNodesByLowerName: () => [],
+        getImportMappings: () => [],
+      };
+
+      // Reference from app_a — neither candidate is in the same module
+      const ref = {
+        fromNodeId: 'func:apps/app_a/src/handler.py:handler:5',
+        referenceName: 'navigate',
+        referenceKind: 'calls' as const,
+        line: 5,
+        column: 10,
+        filePath: 'apps/app_a/src/handler.py',
+        language: 'python' as const,
+      };
+
+      const result = matchReference(ref, context);
+
+      // Should still resolve but with low confidence
+      expect(result).not.toBeNull();
+      expect(result?.confidence).toBeLessThanOrEqual(0.4);
+    });
+
     it('should match qualified name references', () => {
       const mockClassNode: Node = {
         id: 'class:user.ts:User:5',

+ 33 - 4
src/resolution/name-matcher.ts

@@ -33,10 +33,13 @@ export function matchByExactName(
   // Multiple matches - try to narrow down
   const bestMatch = findBestMatch(ref, candidates, context);
   if (bestMatch) {
+    // Lower confidence when the match is from a distant/unrelated module
+    const proximity = computePathProximity(ref.filePath, bestMatch.filePath);
+    const confidence = proximity >= 30 ? 0.7 : 0.4;
     return {
       original: ref,
       targetNodeId: bestMatch.id,
-      confidence: 0.7,
+      confidence,
       resolvedBy: 'exact-match',
     };
   }
@@ -133,6 +136,28 @@ export function matchMethodCall(
   return null;
 }
 
+/**
+ * Compute directory proximity between two file paths.
+ * Returns a score based on the number of shared directory segments.
+ * Higher score = closer in directory tree.
+ */
+function computePathProximity(filePath1: string, filePath2: string): number {
+  const dir1 = filePath1.split('/').slice(0, -1);
+  const dir2 = filePath2.split('/').slice(0, -1);
+
+  let shared = 0;
+  for (let i = 0; i < Math.min(dir1.length, dir2.length); i++) {
+    if (dir1[i] === dir2[i]) {
+      shared++;
+    } else {
+      break;
+    }
+  }
+
+  // Each shared directory segment contributes 15 points, capped at 80
+  return Math.min(shared * 15, 80);
+}
+
 /**
  * Find the best matching node when there are multiple candidates
  */
@@ -143,9 +168,10 @@ function findBestMatch(
 ): Node | null {
   // Prioritization rules:
   // 1. Same file > different file
-  // 2. Same language > different language
-  // 3. Functions/methods > classes/types (for call references)
-  // 4. Exported > non-exported
+  // 2. Directory proximity (same module/package > different module)
+  // 3. Same language > different language
+  // 4. Functions/methods > classes/types (for call references)
+  // 5. Exported > non-exported
 
   let bestScore = -1;
   let bestNode: Node | null = null;
@@ -158,6 +184,9 @@ function findBestMatch(
       score += 100;
     }
 
+    // Directory proximity bonus — strongly prefer same module/package
+    score += computePathProximity(ref.filePath, candidate.filePath);
+
     // Same language bonus
     if (candidate.language === ref.language) {
       score += 50;