Przeglądaj źródła

feat: Fix Go struct/interface extraction by refactoring type_spec handling through type alias resolver

Addresses Go's tree-sitter parsing where structs and interfaces are wrapped in type_spec nodes rather than appearing as direct node types. Moves struct_type and interface_type detection from direct node type matching to a new resolveTypeAliasKind resolver that examines the inner type field, ensuring proper extraction with field visiting and inheritance detection.
Colby McHenry 2 miesięcy temu
rodzic
commit
982d987349

+ 11 - 2
src/extraction/languages/go.ts

@@ -5,8 +5,8 @@ export const goExtractor: LanguageExtractor = {
   functionTypes: ['function_declaration'],
   classTypes: [], // Go doesn't have classes
   methodTypes: ['method_declaration'],
-  interfaceTypes: ['interface_type'],
-  structTypes: ['struct_type'],
+  interfaceTypes: [],  // Handled via type_spec → resolveTypeAliasKind
+  structTypes: [],     // Handled via type_spec → resolveTypeAliasKind
   enumTypes: [],
   typeAliasTypes: ['type_spec'], // Go type declarations
   importTypes: ['import_declaration'],
@@ -27,6 +27,15 @@ export const goExtractor: LanguageExtractor = {
     }
     return sig;
   },
+  resolveTypeAliasKind: (node, _source) => {
+    // Go type_spec: `type Foo struct { ... }` or `type Bar interface { ... }`
+    // The inner type is in the 'type' field of the type_spec node
+    const typeChild = getChildByField(node, 'type');
+    if (!typeChild) return undefined;
+    if (typeChild.type === 'struct_type') return 'struct';
+    if (typeChild.type === 'interface_type') return 'interface';
+    return undefined;
+  },
   getReceiverType: (node, source) => {
     // Go method_declaration has a "receiver" field: func (sl *scrapeLoop) run(...)
     // The receiver is a parameter_list containing a parameter_declaration

+ 9 - 0
src/extraction/tree-sitter-types.ts

@@ -174,4 +174,13 @@ export interface LanguageExtractor {
    * When present, the receiver type is included in the qualified name for better searchability.
    */
   getReceiverType?: (node: SyntaxNode, source: string) => string | undefined;
+
+  /**
+   * Resolve the actual node kind for a type alias declaration.
+   * Used by Go where `type_spec` is the named declaration wrapper for structs/interfaces:
+   *   `type Foo struct { ... }` → type_spec (name: "Foo") → struct_type
+   * Returns 'struct', 'interface', etc. to override the default 'type_alias' kind,
+   * or undefined to keep it as a type alias.
+   */
+  resolveTypeAliasKind?: (node: SyntaxNode, source: string) => NodeKind | undefined;
 }

+ 42 - 5
src/extraction/tree-sitter.ts

@@ -273,8 +273,10 @@ export class TreeSitterExtractor {
       skipChildren = true; // extractEnum visits body children
     }
     // Check for type alias declarations (e.g. `type X = ...` in TypeScript)
+    // For Go, type_spec wraps struct/interface definitions — resolveTypeAliasKind
+    // detects these and extractTypeAlias creates the correct node kind.
     else if (this.extractor.typeAliasTypes.includes(nodeType)) {
-      this.extractTypeAlias(node);
+      skipChildren = this.extractTypeAlias(node);
     }
     // Check for class fields (e.g. Java field_declaration, C# field_declaration)
     else if (this.extractor.fieldTypes?.includes(nodeType) && this.isInsideClassLikeNode()) {
@@ -852,16 +854,50 @@ export class TreeSitterExtractor {
   }
 
   /**
-   * Extract a type alias (e.g. `export type X = ...` in TypeScript)
+   * Extract a type alias (e.g. `export type X = ...` in TypeScript).
+   * For languages like Go, resolveTypeAliasKind detects when the type_spec
+   * wraps a struct or interface definition and creates the correct node kind.
+   * Returns true if children should be skipped (struct/interface handled body visiting).
    */
-  private extractTypeAlias(node: SyntaxNode): void {
-    if (!this.extractor) return;
+  private extractTypeAlias(node: SyntaxNode): boolean {
+    if (!this.extractor) return false;
 
     const name = extractName(node, this.source, this.extractor);
-    if (name === '<anonymous>') return;
+    if (name === '<anonymous>') return false;
     const docstring = getPrecedingDocstring(node, this.source);
     const isExported = this.extractor.isExported?.(node, this.source);
 
+    // Check if this type alias is actually a struct or interface definition
+    // (e.g. Go: `type Foo struct { ... }` is a type_spec wrapping struct_type)
+    const resolvedKind = this.extractor.resolveTypeAliasKind?.(node, this.source);
+
+    if (resolvedKind === 'struct') {
+      const structNode = this.createNode('struct', name, node, { docstring, isExported });
+      if (!structNode) return true;
+      // Visit body children for field extraction
+      this.nodeStack.push(structNode.id);
+      const typeChild = getChildByField(node, 'type');
+      if (typeChild) {
+        const body = getChildByField(typeChild, this.extractor.bodyField) || typeChild;
+        for (let i = 0; i < body.namedChildCount; i++) {
+          const child = body.namedChild(i);
+          if (child) this.visitNode(child);
+        }
+      }
+      this.nodeStack.pop();
+      return true;
+    }
+
+    if (resolvedKind === 'interface') {
+      const kind: NodeKind = this.extractor.interfaceKind ?? 'interface';
+      const interfaceNode = this.createNode(kind, name, node, { docstring, isExported });
+      if (!interfaceNode) return true;
+      // Extract interface inheritance from the inner type node
+      const typeChild = getChildByField(node, 'type');
+      if (typeChild) this.extractInheritance(typeChild, interfaceNode.id);
+      return true;
+    }
+
     const typeAliasNode = this.createNode('type_alias', name, node, {
       docstring,
       isExported,
@@ -876,6 +912,7 @@ export class TreeSitterExtractor {
         this.extractTypeRefsFromSubtree(value, typeAliasNode.id);
       }
     }
+    return false;
   }
 
   /**