Sfoglia il codice sorgente

fix: Handle Kotlin fun interface edge cases with annotated methods and nested interfaces

Addresses two tree-sitter misparse patterns: (1) fun interfaces with @Throws annotations parse as function_declaration > ERROR instead of user_type, (2) parent interface bodies become ERROR nodes when containing nested fun interfaces, causing methods to be skipped. Updates isFunInterfaceNode to check ERROR-nested user_type children and resolveBody to prefer ERROR bodies starting with `{`.
Colby McHenry 2 mesi fa
parent
commit
b872459f19

+ 44 - 0
__tests__/extraction.test.ts

@@ -1076,6 +1076,50 @@ fun regularFunction(): String {
     expect(funcNode).toBeDefined();
     expect(funcNode?.name).toBe('regularFunction');
   });
+
+  it('should extract fun interface with annotation on method (Pattern 2b)', () => {
+    // When the SAM method has annotations like @Throws, tree-sitter produces a different
+    // misparse: function_declaration > ERROR("interface Name {") instead of
+    // function_declaration > user_type("interface"). This is the OkHttp Interceptor pattern.
+    const code = `
+import java.io.IOException
+
+fun interface Interceptor {
+  @Throws(IOException::class)
+  fun intercept(chain: Chain): Response
+}
+`;
+    const result = extractFromSource('interceptor.kt', code);
+
+    const ifaceNode = result.nodes.find((n) => n.kind === 'interface');
+    expect(ifaceNode).toBeDefined();
+    expect(ifaceNode?.name).toBe('Interceptor');
+  });
+
+  it('should extract methods from interface with nested fun interface', () => {
+    // When an interface contains a nested `fun interface`, tree-sitter misparsed
+    // the parent body as ERROR. Methods inside should still be extracted.
+    const code = `
+interface WebSocket {
+  fun request(): Request
+  fun send(text: String): Boolean
+  fun cancel()
+  fun interface Factory {
+    fun newWebSocket(request: Request): WebSocket
+  }
+}
+`;
+    const result = extractFromSource('websocket.kt', code);
+
+    const wsIface = result.nodes.find((n) => n.kind === 'interface' && n.name === 'WebSocket');
+    expect(wsIface).toBeDefined();
+
+    const methods = result.nodes.filter((n) => n.kind === 'method' && n.qualifiedName?.startsWith('WebSocket::'));
+    const methodNames = methods.map((m) => m.name);
+    expect(methodNames).toContain('request');
+    expect(methodNames).toContain('send');
+    expect(methodNames).toContain('cancel');
+  });
 });
 
 describe('Dart Extraction', () => {

+ 2 - 1
docs/SEARCH_QUALITY_LOOP.md

@@ -453,7 +453,8 @@ test().catch(console.error);
 | Kotlin interfaces/enums extracted as classes | `class_declaration` matches `classTypes` first; `interfaceTypes`/`enumTypes` never fire | `src/extraction/languages/kotlin.ts: classifyClassNode` detects `interface`/`enum` keywords in AST children |
 | Kotlin functions have zero calls extracted | Tree-sitter grammar doesn't use field names, so `getChildByField(node, 'function_body')` returns null | `src/extraction/languages/kotlin.ts: resolveBody` finds body by type (`function_body`, `class_body`, `enum_class_body`) |
 | Kotlin `navigation_expression` calls not resolved cleanly | `navigation_expression` fell through to `getNodeText` producing messy names with parentheses | `src/extraction/tree-sitter.ts: extractCall` — handle `navigation_expression` by extracting method name from `navigation_suffix > simple_identifier` |
-| Kotlin `fun interface` declarations invisible | Tree-sitter-kotlin doesn't support `fun interface` syntax (Kotlin 1.4+), producing ERROR or misparse as `function_declaration` | `src/extraction/languages/kotlin.ts: visitNode` detects both misparse patterns (ERROR node + lambda body, or function_declaration with `user_type("interface")`) and extracts as interface |
+| Kotlin `fun interface` declarations invisible | Tree-sitter-kotlin doesn't support `fun interface` syntax (Kotlin 1.4+), producing ERROR or misparse as `function_declaration` | `src/extraction/languages/kotlin.ts: visitNode` detects three misparse patterns: (1) ERROR node + lambda body, (2) function_declaration with `user_type("interface")` direct child + name in ERROR child, (3) function_declaration with ERROR child containing `user_type("interface")` + name. `isFunInterfaceNode` checks both direct and ERROR-nested `user_type` children |
+| Kotlin class/interface methods missing when nested `fun interface` present | Tree-sitter misparsed parent body as ERROR (starting with `{`) + class_body (nested interface body); `resolveBody` found wrong body | `src/extraction/languages/kotlin.ts: resolveBody` prefers ERROR bodies starting with `{`; `visitNode` excludes body-like ERROR from `fun interface` detection |
 
 ## After Fixing Issues
 

+ 55 - 6
src/extraction/languages/kotlin.ts

@@ -14,6 +14,16 @@ function isFunInterfaceNode(node: SyntaxNode): boolean {
       const typeId = child.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
       if (typeId && typeId.text === 'interface') hasInterfaceType = true;
     }
+    // Pattern 2b: user_type("interface") is inside an ERROR child
+    if (child.type === 'ERROR') {
+      for (let j = 0; j < child.childCount; j++) {
+        const gc = child.child(j);
+        if (gc && gc.type === 'user_type') {
+          const typeId = gc.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
+          if (typeId && typeId.text === 'interface') hasInterfaceType = true;
+        }
+      }
+    }
   }
   return hasFun && hasInterfaceType;
 }
@@ -49,15 +59,43 @@ export const kotlinExtractor: LanguageExtractor = {
 
     if (node.type !== 'ERROR' && node.type !== 'function_declaration') return false;
 
+    // Skip ERROR nodes that are class bodies (start with `{`). These contain parent
+    // methods + trailing `fun interface` tokens. The methods are extracted via
+    // resolveBody; handling the ERROR here would consume the whole body.
+    if (node.type === 'ERROR') {
+      const firstChild = node.child(0);
+      if (firstChild && firstChild.type === '{') return false;
+    }
+
     if (!isFunInterfaceNode(node)) return false;
 
-    // Extract the interface name from simple_identifier child
+    // Extract the interface name.
+    // For function_declaration misparses (patterns 2a/2b), the real name is inside
+    // an ERROR child — direct simple_identifier children are the misparsed method name.
     let nameText: string | null = null;
-    for (let i = 0; i < node.childCount; i++) {
-      const child = node.child(i);
-      if (child && child.type === 'simple_identifier') {
-        nameText = child.text;
-        break;
+    if (node.type === 'function_declaration') {
+      for (let i = 0; i < node.childCount; i++) {
+        const child = node.child(i);
+        if (child && child.type === 'ERROR') {
+          for (let j = 0; j < child.childCount; j++) {
+            const gc = child.child(j);
+            if (gc && gc.type === 'simple_identifier') {
+              nameText = gc.text;
+              break;
+            }
+          }
+          if (nameText) break;
+        }
+      }
+    }
+    // Fallback: direct simple_identifier child (Pattern 1: ERROR node at top level)
+    if (!nameText) {
+      for (let i = 0; i < node.childCount; i++) {
+        const child = node.child(i);
+        if (child && child.type === 'simple_identifier') {
+          nameText = child.text;
+          break;
+        }
       }
     }
     if (!nameText) return false;
@@ -96,8 +134,19 @@ export const kotlinExtractor: LanguageExtractor = {
     // Kotlin's tree-sitter grammar doesn't use field names, so getChildByField fails.
     // Find body by type: function_body for functions/methods, class_body for classes,
     // enum_class_body for enums.
+    //
+    // Special case: when a class/interface contains a nested `fun interface`, tree-sitter
+    // misparsed the parent's body as an ERROR node (starting with `{`) and creates
+    // a class_body sibling for the nested interface's body. Prefer the ERROR body
+    // so the parent's methods are extracted.
     for (let i = 0; i < node.namedChildCount; i++) {
       const child = node.namedChild(i);
+      if (child && child.type === 'ERROR') {
+        const firstChild = child.child(0);
+        if (firstChild && firstChild.type === '{') {
+          return child;
+        }
+      }
       if (child && (child.type === 'function_body' || child.type === 'class_body' || child.type === 'enum_class_body')) {
         return child;
       }