ソースを参照

feat: Add complete Kotlin language support with fun interface handling

Addresses Kotlin interfaces/enums extracted as classes, zero function calls, and missing `fun interface` declarations. Adds classifyClassNode to distinguish interfaces/enums from classes, resolveBody hook for non-field grammar, navigation_expression call handling, getReceiverType for extension functions, and visitNode hook to detect `fun interface` misparse patterns from tree-sitter-kotlin's lack of Kotlin 1.4+ syntax support. Verified against Koin and LeakCanary codebases.
Colby McHenry 2 ヶ月 前
コミット
0cad147859

+ 63 - 0
__tests__/extraction.test.ts

@@ -1013,6 +1013,69 @@ suspend fun loadData(): List<String> {
     expect(funcNode).toBeDefined();
     expect(funcNode).toBeDefined();
     expect(funcNode?.isAsync).toBe(true);
     expect(funcNode?.isAsync).toBe(true);
   });
   });
+
+  it('should extract fun interface declarations', () => {
+    const code = `
+fun interface OnObjectRetainedListener {
+  fun onObjectRetained()
+}
+`;
+    const result = extractFromSource('listener.kt', code);
+
+    const ifaceNode = result.nodes.find((n) => n.kind === 'interface');
+    expect(ifaceNode).toBeDefined();
+    expect(ifaceNode?.name).toBe('OnObjectRetainedListener');
+
+    const methodNode = result.nodes.find((n) => n.kind === 'method');
+    expect(methodNode).toBeDefined();
+    expect(methodNode?.name).toBe('onObjectRetained');
+    expect(methodNode?.qualifiedName).toBe('OnObjectRetainedListener::onObjectRetained');
+  });
+
+  it('should extract complex fun interface with nested classes', () => {
+    const code = `
+fun interface EventListener {
+  fun onEvent(event: Event)
+
+  sealed class Event {
+    class DumpingHeap : Event()
+  }
+}
+`;
+    const result = extractFromSource('events.kt', code);
+
+    const ifaceNode = result.nodes.find((n) => n.kind === 'interface');
+    expect(ifaceNode).toBeDefined();
+    expect(ifaceNode?.name).toBe('EventListener');
+
+    // Nested sealed class should still be extracted (as sibling due to grammar limitations)
+    const eventClass = result.nodes.find((n) => n.kind === 'class' && n.name === 'Event');
+    expect(eventClass).toBeDefined();
+
+    const dumpingHeap = result.nodes.find((n) => n.kind === 'class' && n.name === 'DumpingHeap');
+    expect(dumpingHeap).toBeDefined();
+  });
+
+  it('should not affect regular function declarations', () => {
+    const code = `
+fun interface MyCallback {
+  fun invoke(value: Int)
+}
+
+fun regularFunction(): String {
+  return "hello"
+}
+`;
+    const result = extractFromSource('mixed.kt', code);
+
+    const ifaceNode = result.nodes.find((n) => n.kind === 'interface');
+    expect(ifaceNode).toBeDefined();
+    expect(ifaceNode?.name).toBe('MyCallback');
+
+    const funcNode = result.nodes.find((n) => n.kind === 'function');
+    expect(funcNode).toBeDefined();
+    expect(funcNode?.name).toBe('regularFunction');
+  });
 });
 });
 
 
 describe('Dart Extraction', () => {
 describe('Dart Extraction', () => {

+ 6 - 3
docs/SEARCH_QUALITY_LOOP.md

@@ -450,6 +450,10 @@ test().catch(console.error);
 | Ruby methods inside modules missing owner in `qualified_name` | Ruby `module` AST nodes not being extracted | `src/extraction/languages/ruby.ts: visitNode` hook extracts modules; `src/extraction/tree-sitter.ts: isInsideClassLikeNode` includes `module` kind |
 | Ruby methods inside modules missing owner in `qualified_name` | Ruby `module` AST nodes not being extracted | `src/extraction/languages/ruby.ts: visitNode` hook extracts modules; `src/extraction/tree-sitter.ts: isInsideClassLikeNode` includes `module` kind |
 | TypeScript abstract classes missing | `abstract_class_declaration` not in `classTypes` | `src/extraction/languages/typescript.ts: classTypes` — add `abstract_class_declaration` |
 | TypeScript abstract classes missing | `abstract_class_declaration` not in `classTypes` | `src/extraction/languages/typescript.ts: classTypes` — add `abstract_class_declaration` |
 | Single-expression arrow functions silently dropped | `extractName` finds identifier in expression body instead of returning `<anonymous>` | `src/extraction/tree-sitter.ts: extractName` — skip identifier search for `arrow_function`/`function_expression` nodes |
 | Single-expression arrow functions silently dropped | `extractName` finds identifier in expression body instead of returning `<anonymous>` | `src/extraction/tree-sitter.ts: extractName` — skip identifier search for `arrow_function`/`function_expression` nodes |
+| Kotlin interfaces/enums extracted as classes | `class_declaration` matches `classTypes` first; `interfaceTypes`/`enumTypes` never fire | `src/extraction/languages/kotlin.ts: classifyClassNode` detects `interface`/`enum` keywords in AST children |
+| Kotlin functions have zero calls extracted | Tree-sitter grammar doesn't use field names, so `getChildByField(node, 'function_body')` returns null | `src/extraction/languages/kotlin.ts: resolveBody` finds body by type (`function_body`, `class_body`, `enum_class_body`) |
+| Kotlin `navigation_expression` calls not resolved cleanly | `navigation_expression` fell through to `getNodeText` producing messy names with parentheses | `src/extraction/tree-sitter.ts: extractCall` — handle `navigation_expression` by extracting method name from `navigation_suffix > simple_identifier` |
+| Kotlin `fun interface` declarations invisible | Tree-sitter-kotlin doesn't support `fun interface` syntax (Kotlin 1.4+), producing ERROR or misparse as `function_declaration` | `src/extraction/languages/kotlin.ts: visitNode` detects both misparse patterns (ERROR node + lambda body, or function_declaration with `user_type("interface")`) and extracts as interface |
 
 
 ## After Fixing Issues
 ## After Fixing Issues
 
 
@@ -535,9 +539,8 @@ if (receiverType) {
 - [x] **Ruby** — NOT needed for `getReceiverType`. Methods nested in class body. Added `visitNode` hook to extract Ruby `module` nodes (concerns, namespaces) with proper containment and qualified names. Methods inside modules get `Module::method` qualified names. Also wired up the `ExtractorContext` with `pushScope`/`popScope` for language hooks. Verified against Discourse
 - [x] **Ruby** — NOT needed for `getReceiverType`. Methods nested in class body. Added `visitNode` hook to extract Ruby `module` nodes (concerns, namespaces) with proper containment and qualified names. Methods inside modules get `Module::method` qualified names. Also wired up the `ExtractorContext` with `pushScope`/`popScope` for language hooks. Verified against Discourse
 - [x] **TypeScript** — NOT needed for `getReceiverType`. Methods nested in class body. Added `abstract_class_declaration` to `classTypes` so abstract classes are properly extracted. Fixed single-expression arrow function extraction (`const fn = () => expr` was silently dropped because `extractName` picked up the body identifier instead of returning `<anonymous>` for parent name resolution). Verified against Grafana
 - [x] **TypeScript** — NOT needed for `getReceiverType`. Methods nested in class body. Added `abstract_class_declaration` to `classTypes` so abstract classes are properly extracted. Fixed single-expression arrow function extraction (`const fn = () => expr` was silently dropped because `extractName` picked up the body identifier instead of returning `<anonymous>` for parent name resolution). Verified against Grafana
 - [x] **Dart** — NOT needed for `getReceiverType`. Methods nested in class body. Added bare call extraction for selector-based method calls (e.g. `object.method()`). Verified against Flutter
 - [x] **Dart** — NOT needed for `getReceiverType`. Methods nested in class body. Added bare call extraction for selector-based method calls (e.g. `object.method()`). Verified against Flutter
+- [x] **Kotlin** — `getReceiverType` extracts receiver from extension functions `fun Type.method()`. Added `classifyClassNode` to distinguish interfaces/enums from classes (all use `class_declaration` AST node). Added `resolveBody` hook since Kotlin's tree-sitter grammar doesn't use field names. Added `navigation_expression` handling for method call extraction. Added `object_declaration` via `extraClassNodeTypes`. Added `delegation_specifier` handling in `extractInheritance` for Kotlin's `: Parent, Interface` syntax. Also fixed `extractInterface` to visit body children (interface methods were not being extracted). Added `visitNode` hook to handle `fun interface` (SAM) declarations — tree-sitter-kotlin doesn't support this Kotlin 1.4+ syntax, producing ERROR or function_declaration misparse; the hook detects both patterns and extracts the interface. Verified against Koin, LeakCanary
 
 
 ### Needs Verification
 ### Needs Verification
 
 
-Check these — may need `getReceiverType` if methods are top-level in the AST:
-
-- [ ] Kotlin — extension functions `fun Type.method()`
+(none currently)

+ 123 - 3
src/extraction/languages/kotlin.ts

@@ -2,22 +2,143 @@ import type { Node as SyntaxNode } from 'web-tree-sitter';
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
 import type { LanguageExtractor } from '../tree-sitter-types';
 
 
+/** Check if a node matches the `fun interface` misparse pattern */
+function isFunInterfaceNode(node: SyntaxNode): boolean {
+  let hasFun = false;
+  let hasInterfaceType = false;
+  for (let i = 0; i < node.childCount; i++) {
+    const child = node.child(i);
+    if (!child) continue;
+    if (child.type === 'fun' && !child.isNamed) hasFun = true;
+    if (child.type === 'user_type') {
+      const typeId = child.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
+      if (typeId && typeId.text === 'interface') hasInterfaceType = true;
+    }
+  }
+  return hasFun && hasInterfaceType;
+}
+
 export const kotlinExtractor: LanguageExtractor = {
 export const kotlinExtractor: LanguageExtractor = {
   functionTypes: ['function_declaration'],
   functionTypes: ['function_declaration'],
   classTypes: ['class_declaration'],
   classTypes: ['class_declaration'],
   methodTypes: ['function_declaration'], // Methods are functions inside classes
   methodTypes: ['function_declaration'], // Methods are functions inside classes
-  interfaceTypes: ['class_declaration'], // Interfaces use class_declaration with 'interface' modifier
+  interfaceTypes: [], // Handled via classifyClassNode
   structTypes: [], // Kotlin uses data classes
   structTypes: [], // Kotlin uses data classes
-  enumTypes: ['class_declaration'], // Enums use class_declaration with 'enum' modifier
+  enumTypes: [], // Handled via classifyClassNode
   enumMemberTypes: ['enum_entry'],
   enumMemberTypes: ['enum_entry'],
   typeAliasTypes: ['type_alias'],
   typeAliasTypes: ['type_alias'],
   importTypes: ['import_header'],
   importTypes: ['import_header'],
   callTypes: ['call_expression'],
   callTypes: ['call_expression'],
   variableTypes: ['property_declaration'],
   variableTypes: ['property_declaration'],
+  fieldTypes: ['property_declaration'],
+  extraClassNodeTypes: ['object_declaration'],
   nameField: 'simple_identifier',
   nameField: 'simple_identifier',
   bodyField: 'function_body',
   bodyField: 'function_body',
+  visitNode: (node, ctx) => {
+    // Handle Kotlin `fun interface` declarations.
+    // Tree-sitter-kotlin doesn't support `fun interface` syntax (Kotlin 1.4+).
+    // It produces two different misparse patterns:
+    //   Pattern 1 (simple): ERROR node + sibling lambda_literal for body
+    //   Pattern 2 (complex): function_declaration misparse with ERROR child
+    // Skip lambda_literal bodies that were already consumed by a fun interface ERROR node
+    if (node.type === 'lambda_literal') {
+      const prev = node.previousSibling;
+      if (prev && prev.type === 'ERROR' && isFunInterfaceNode(prev)) return true;
+      return false;
+    }
+
+    if (node.type !== 'ERROR' && node.type !== 'function_declaration') return false;
+
+    if (!isFunInterfaceNode(node)) return false;
+
+    // Extract the interface name from simple_identifier child
+    let nameText: string | null = null;
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child && child.type === 'simple_identifier') {
+        nameText = child.text;
+        break;
+      }
+    }
+    if (!nameText) return false;
+
+    // Create the interface node
+    const ifaceNode = ctx.createNode('interface', nameText, node);
+    if (!ifaceNode) return false;
+
+    ctx.pushScope(ifaceNode.id);
+
+    if (node.type === 'ERROR') {
+      // Pattern 1: body is in the next sibling lambda_literal
+      const nextSibling = node.nextSibling;
+      if (nextSibling && nextSibling.type === 'lambda_literal') {
+        for (let i = 0; i < nextSibling.namedChildCount; i++) {
+          const child = nextSibling.namedChild(i);
+          if (child && child.type === 'statements') {
+            for (let j = 0; j < child.namedChildCount; j++) {
+              const stmt = child.namedChild(j);
+              if (stmt) ctx.visitNode(stmt);
+            }
+          }
+        }
+      }
+    }
+    // Pattern 2 (function_declaration): nested classes are siblings at source_file level,
+    // already visited by the normal traversal. The single abstract method is misparsed
+    // and cannot be reliably recovered, but the interface node itself is the key value.
+
+    ctx.popScope();
+    return true;
+  },
   paramsField: 'function_value_parameters',
   paramsField: 'function_value_parameters',
   returnField: 'type',
   returnField: 'type',
+  resolveBody: (node, _bodyField) => {
+    // Kotlin's tree-sitter grammar doesn't use field names, so getChildByField fails.
+    // Find body by type: function_body for functions/methods, class_body for classes,
+    // enum_class_body for enums.
+    for (let i = 0; i < node.namedChildCount; i++) {
+      const child = node.namedChild(i);
+      if (child && (child.type === 'function_body' || child.type === 'class_body' || child.type === 'enum_class_body')) {
+        return child;
+      }
+    }
+    return null;
+  },
+  classifyClassNode: (node) => {
+    // Kotlin reuses class_declaration for classes, interfaces, and enums.
+    // Detect by checking for keyword children:
+    //   interface Foo { }       → has 'interface' keyword child
+    //   enum class Level { }    → has 'enum' keyword child
+    //   class / data class / abstract class → default 'class'
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (!child) continue;
+      if (child.type === 'interface') return 'interface';
+      if (child.type === 'enum') return 'enum';
+    }
+    return 'class';
+  },
+  getReceiverType: (node, source) => {
+    // Kotlin extension functions: fun Type.method() { }
+    // AST: function_declaration > user_type, ".", simple_identifier
+    // The user_type before the dot is the receiver type.
+    let foundUserType: SyntaxNode | null = null;
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (!child) continue;
+      if (child.type === 'user_type') {
+        foundUserType = child;
+      } else if (child.type === '.' && foundUserType) {
+        // The user_type before the dot is the receiver type
+        const typeId = foundUserType.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
+        return typeId ? getNodeText(typeId, source) : getNodeText(foundUserType, source);
+      } else if (child.type === 'simple_identifier' || child.type === 'function_value_parameters') {
+        // Past the function name — no receiver
+        break;
+      }
+    }
+    return undefined;
+  },
   getSignature: (node, source) => {
   getSignature: (node, source) => {
     // Kotlin function signature: fun name(params): ReturnType
     // Kotlin function signature: fun name(params): ReturnType
     const params = getChildByField(node, 'function_value_parameters');
     const params = getChildByField(node, 'function_value_parameters');
@@ -45,7 +166,6 @@ export const kotlinExtractor: LanguageExtractor = {
   },
   },
   isStatic: (_node) => {
   isStatic: (_node) => {
     // Kotlin doesn't have static, uses companion objects
     // Kotlin doesn't have static, uses companion objects
-    // Check if inside companion object would require more context
     return false;
     return false;
   },
   },
   isAsync: (node) => {
   isAsync: (node) => {

+ 1 - 1
src/extraction/tree-sitter-types.ts

@@ -154,7 +154,7 @@ export interface LanguageExtractor {
    * Classify a class_declaration node when the grammar reuses one node type
    * Classify a class_declaration node when the grammar reuses one node type
    * for multiple concepts (e.g. Swift uses class_declaration for classes, structs, and enums).
    * for multiple concepts (e.g. Swift uses class_declaration for classes, structs, and enums).
    */
    */
-  classifyClassNode?: (node: SyntaxNode) => 'class' | 'struct' | 'enum';
+  classifyClassNode?: (node: SyntaxNode) => 'class' | 'struct' | 'enum' | 'interface';
 
 
   /**
   /**
    * Resolve the body node for a function/method/class when it's not a child field.
    * Resolve the body node for a function/method/class when it's not a child field.

+ 51 - 3
src/extraction/tree-sitter.ts

@@ -264,6 +264,8 @@ export class TreeSitterExtractor {
         this.extractStruct(node);
         this.extractStruct(node);
       } else if (classification === 'enum') {
       } else if (classification === 'enum') {
         this.extractEnum(node);
         this.extractEnum(node);
+      } else if (classification === 'interface') {
+        this.extractInterface(node);
       } else {
       } else {
         this.extractClass(node);
         this.extractClass(node);
       }
       }
@@ -675,6 +677,19 @@ export class TreeSitterExtractor {
 
 
     // Extract extends (interface inheritance)
     // Extract extends (interface inheritance)
     this.extractInheritance(node, interfaceNode.id);
     this.extractInheritance(node, interfaceNode.id);
+
+    // Visit body children for interface methods and nested types
+    this.nodeStack.push(interfaceNode.id);
+    let body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
+      ?? getChildByField(node, this.extractor.bodyField);
+    if (!body) body = node;
+    for (let i = 0; i < body.namedChildCount; i++) {
+      const child = body.namedChild(i);
+      if (child) {
+        this.visitNode(child);
+      }
+    }
+    this.nodeStack.pop();
   }
   }
 
 
   /**
   /**
@@ -1338,10 +1353,20 @@ export class TreeSitterExtractor {
       const func = getChildByField(node, 'function') || node.namedChild(0);
       const func = getChildByField(node, 'function') || node.namedChild(0);
 
 
       if (func) {
       if (func) {
-        if (func.type === 'member_expression' || func.type === 'attribute' || func.type === 'selector_expression') {
+        if (func.type === 'member_expression' || func.type === 'attribute' || func.type === 'selector_expression' || func.type === 'navigation_expression') {
           // Method call: obj.method() or obj.field.method()
           // Method call: obj.method() or obj.field.method()
           // Go uses selector_expression with 'field', JS/TS uses member_expression with 'property'
           // Go uses selector_expression with 'field', JS/TS uses member_expression with 'property'
-          const property = getChildByField(func, 'property') || getChildByField(func, 'field') || func.namedChild(1);
+          // Kotlin uses navigation_expression with navigation_suffix > simple_identifier
+          let property = getChildByField(func, 'property') || getChildByField(func, 'field');
+          if (!property) {
+            const child1 = func.namedChild(1);
+            // Kotlin: navigation_suffix wraps the method name — extract simple_identifier from it
+            if (child1?.type === 'navigation_suffix') {
+              property = child1.namedChildren.find((c: SyntaxNode) => c.type === 'simple_identifier') ?? child1;
+            } else {
+              property = child1;
+            }
+          }
           if (property) {
           if (property) {
             const methodName = getNodeText(property, this.source);
             const methodName = getNodeText(property, this.source);
             // Include receiver name for qualified resolution (e.g., console.print → "console.print")
             // Include receiver name for qualified resolution (e.g., console.print → "console.print")
@@ -1350,7 +1375,7 @@ export class TreeSitterExtractor {
             // Skip self/this/cls as they don't aid resolution
             // Skip self/this/cls as they don't aid resolution
             const receiver = getChildByField(func, 'object') || getChildByField(func, 'operand') || func.namedChild(0);
             const receiver = getChildByField(func, 'object') || getChildByField(func, 'operand') || func.namedChild(0);
             const SKIP_RECEIVERS = new Set(['self', 'this', 'cls', 'super']);
             const SKIP_RECEIVERS = new Set(['self', 'this', 'cls', 'super']);
-            if (receiver && receiver.type === 'identifier') {
+            if (receiver && (receiver.type === 'identifier' || receiver.type === 'simple_identifier')) {
               const receiverName = getNodeText(receiver, this.source);
               const receiverName = getNodeText(receiver, this.source);
               if (!SKIP_RECEIVERS.has(receiverName)) {
               if (!SKIP_RECEIVERS.has(receiverName)) {
                 calleeName = `${receiverName}.${methodName}`;
                 calleeName = `${receiverName}.${methodName}`;
@@ -1421,6 +1446,7 @@ export class TreeSitterExtractor {
         const classification = this.extractor!.classifyClassNode?.(node) ?? 'class';
         const classification = this.extractor!.classifyClassNode?.(node) ?? 'class';
         if (classification === 'struct') this.extractStruct(node);
         if (classification === 'struct') this.extractStruct(node);
         else if (classification === 'enum') this.extractEnum(node);
         else if (classification === 'enum') this.extractEnum(node);
+        else if (classification === 'interface') this.extractInterface(node);
         else this.extractClass(node);
         else this.extractClass(node);
         return;
         return;
       }
       }
@@ -1612,6 +1638,28 @@ export class TreeSitterExtractor {
         }
         }
       }
       }
 
 
+      // Kotlin: `class Foo : Bar, Baz` → delegation_specifier > user_type > type_identifier
+      // Also handles `class Foo : Bar()` → delegation_specifier > constructor_invocation > user_type
+      if (child.type === 'delegation_specifier') {
+        const userType = child.namedChildren.find((c: SyntaxNode) => c.type === 'user_type');
+        const constructorInvocation = child.namedChildren.find((c: SyntaxNode) => c.type === 'constructor_invocation');
+        const target = userType ?? constructorInvocation;
+        if (target) {
+          const typeId = target.type === 'user_type'
+            ? target.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier') ?? target
+            : target.namedChildren.find((c: SyntaxNode) => c.type === 'user_type')?.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier')
+              ?? target.namedChildren.find((c: SyntaxNode) => c.type === 'user_type') ?? target;
+          const name = getNodeText(typeId, this.source);
+          this.unresolvedReferences.push({
+            fromNodeId: classId,
+            referenceName: name,
+            referenceKind: 'extends',
+            line: typeId.startPosition.row + 1,
+            column: typeId.startPosition.column,
+          });
+        }
+      }
+
       // Swift: inheritance_specifier > user_type > type_identifier
       // Swift: inheritance_specifier > user_type > type_identifier
       // Used for class inheritance, protocol conformance, and protocol inheritance
       // Used for class inheritance, protocol conformance, and protocol inheritance
       if (child.type === 'inheritance_specifier') {
       if (child.type === 'inheritance_specifier') {