Przeglądaj źródła

feat: Add C/C++ typedef enum and struct extraction with inner type resolution

Addresses C/C++ typedef syntax where anonymous enum/struct definitions are wrapped in typedef declarations (e.g. `typedef enum { A, B } MyEnum;`). Adds resolveTypeAliasKind to identify inner enum_specifier and struct_specifier nodes within typedefs, enabling proper extraction of enum members and struct fields from the inner anonymous definitions rather than treating them as simple type aliases.
Colby McHenry 2 miesięcy temu
rodzic
commit
6f34be38aa
2 zmienionych plików z 65 dodań i 2 usunięć
  1. 23 1
      src/extraction/languages/c-cpp.ts
  2. 42 1
      src/extraction/tree-sitter.ts

+ 23 - 1
src/extraction/languages/c-cpp.ts

@@ -1,5 +1,5 @@
 import type { Node as SyntaxNode } from 'web-tree-sitter';
-import { getNodeText } from '../tree-sitter-helpers';
+import { getChildByField, getNodeText } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
 
 export const cExtractor: LanguageExtractor = {
@@ -17,6 +17,18 @@ export const cExtractor: LanguageExtractor = {
   nameField: 'declarator',
   bodyField: 'body',
   paramsField: 'parameters',
+  resolveTypeAliasKind: (node, _source) => {
+    // C typedef: `typedef enum { ... } name;` or `typedef struct { ... } name;`
+    // The inner enum_specifier/struct_specifier is anonymous, but we want the typedef name
+    // to become the enum/struct node name.
+    for (let i = 0; i < node.namedChildCount; i++) {
+      const child = node.namedChild(i);
+      if (!child) continue;
+      if (child.type === 'enum_specifier' && getChildByField(child, 'body')) return 'enum';
+      if (child.type === 'struct_specifier' && getChildByField(child, 'body')) return 'struct';
+    }
+    return undefined;
+  },
   extractImport: (node, source) => {
     const importText = source.substring(node.startIndex, node.endIndex).trim();
     // C includes: #include <stdio.h>, #include "myheader.h"
@@ -66,6 +78,16 @@ export const cppExtractor: LanguageExtractor = {
     }
     return undefined;
   },
+  resolveTypeAliasKind: (node, _source) => {
+    // C++ typedef: `typedef enum { ... } name;` or `typedef struct { ... } name;`
+    for (let i = 0; i < node.namedChildCount; i++) {
+      const child = node.namedChild(i);
+      if (!child) continue;
+      if (child.type === 'enum_specifier' && getChildByField(child, 'body')) return 'enum';
+      if (child.type === 'struct_specifier' && getChildByField(child, 'body')) return 'struct';
+    }
+    return undefined;
+  },
   extractImport: (node, source) => {
     const importText = source.substring(node.startIndex, node.endIndex).trim();
     // C++ includes: #include <iostream>, #include "myheader.h"

+ 42 - 1
src/extraction/tree-sitter.ts

@@ -375,6 +375,18 @@ export class TreeSitterExtractor {
     return newNode;
   }
 
+  /**
+   * Find first named child whose type is in the given list.
+   * Used to locate inner type nodes (e.g. enum_specifier inside a typedef).
+   */
+  private findChildByTypes(node: SyntaxNode, types: string[]): SyntaxNode | null {
+    for (let i = 0; i < node.namedChildCount; i++) {
+      const child = node.namedChild(i);
+      if (child && types.includes(child.type)) return child;
+    }
+    return null;
+  }
+
   /**
    * Build qualified name from node stack
    */
@@ -922,7 +934,9 @@ export class TreeSitterExtractor {
       if (!structNode) return true;
       // Visit body children for field extraction
       this.nodeStack.push(structNode.id);
-      const typeChild = getChildByField(node, 'type');
+      // Try Go-style 'type' field first, then find inner struct child (C typedef struct)
+      const typeChild = getChildByField(node, 'type')
+        || this.findChildByTypes(node, this.extractor.structTypes);
       if (typeChild) {
         // Extract struct embedding (e.g. Go: `type DB struct { *Head; Queryable }`)
         this.extractInheritance(typeChild, structNode.id);
@@ -936,6 +950,33 @@ export class TreeSitterExtractor {
       return true;
     }
 
+    if (resolvedKind === 'enum') {
+      const enumNode = this.createNode('enum', name, node, { docstring, isExported });
+      if (!enumNode) return true;
+      this.nodeStack.push(enumNode.id);
+      // Find the inner enum type child (e.g. C: typedef enum { ... } name)
+      const innerEnum = this.findChildByTypes(node, this.extractor.enumTypes);
+      if (innerEnum) {
+        this.extractInheritance(innerEnum, enumNode.id);
+        const body = this.extractor.resolveBody?.(innerEnum, this.extractor.bodyField)
+          ?? getChildByField(innerEnum, this.extractor.bodyField);
+        if (body) {
+          const memberTypes = this.extractor.enumMemberTypes;
+          for (let i = 0; i < body.namedChildCount; i++) {
+            const child = body.namedChild(i);
+            if (!child) continue;
+            if (memberTypes?.includes(child.type)) {
+              this.extractEnumMembers(child);
+            } else {
+              this.visitNode(child);
+            }
+          }
+        }
+      }
+      this.nodeStack.pop();
+      return true;
+    }
+
     if (resolvedKind === 'interface') {
       const kind: NodeKind = this.extractor.interfaceKind ?? 'interface';
       const interfaceNode = this.createNode(kind, name, node, { docstring, isExported });