Răsfoiți Sursa

feat: Add enum member extraction support across all language extractors

Extends AST parsing to identify and extract individual enum members/cases for better code analysis. Adds enumMemberTypes configuration to each language extractor with language-specific node types (e.g., 'enum_variant' for Rust, 'enum_case' for PHP, 'enum_entry' for Swift/Kotlin). Implements flexible member name resolution supporting both field-based and identifier-based extraction patterns.
Colby McHenry 2 luni în urmă
părinte
comite
c0c8a3bb43

+ 2 - 0
src/extraction/languages/c-cpp.ts

@@ -9,6 +9,7 @@ export const cExtractor: LanguageExtractor = {
   interfaceTypes: [],
   structTypes: ['struct_specifier'],
   enumTypes: ['enum_specifier'],
+  enumMemberTypes: ['enumerator'],
   typeAliasTypes: ['type_definition'], // typedef
   importTypes: ['preproc_include'],
   callTypes: ['call_expression'],
@@ -41,6 +42,7 @@ export const cppExtractor: LanguageExtractor = {
   interfaceTypes: [],
   structTypes: ['struct_specifier'],
   enumTypes: ['enum_specifier'],
+  enumMemberTypes: ['enumerator'],
   typeAliasTypes: ['type_definition', 'alias_declaration'], // typedef and using
   importTypes: ['preproc_include'],
   callTypes: ['call_expression'],

+ 1 - 0
src/extraction/languages/csharp.ts

@@ -9,6 +9,7 @@ export const csharpExtractor: LanguageExtractor = {
   interfaceTypes: ['interface_declaration'],
   structTypes: ['struct_declaration'],
   enumTypes: ['enum_declaration'],
+  enumMemberTypes: ['enum_member_declaration'],
   typeAliasTypes: [],
   importTypes: ['using_directive'],
   callTypes: ['invocation_expression'],

+ 1 - 0
src/extraction/languages/dart.ts

@@ -9,6 +9,7 @@ export const dartExtractor: LanguageExtractor = {
   interfaceTypes: [],
   structTypes: [],
   enumTypes: ['enum_declaration'],
+  enumMemberTypes: ['enum_constant'],
   typeAliasTypes: ['type_alias'],
   importTypes: ['import_or_export'],
   callTypes: [],  // Dart calls use identifier+selector, handled via function body traversal

+ 1 - 0
src/extraction/languages/java.ts

@@ -9,6 +9,7 @@ export const javaExtractor: LanguageExtractor = {
   interfaceTypes: ['interface_declaration'],
   structTypes: [],
   enumTypes: ['enum_declaration'],
+  enumMemberTypes: ['enum_constant'],
   typeAliasTypes: [],
   importTypes: ['import_declaration'],
   callTypes: ['method_invocation'],

+ 1 - 0
src/extraction/languages/kotlin.ts

@@ -9,6 +9,7 @@ export const kotlinExtractor: LanguageExtractor = {
   interfaceTypes: ['class_declaration'], // Interfaces use class_declaration with 'interface' modifier
   structTypes: [], // Kotlin uses data classes
   enumTypes: ['class_declaration'], // Enums use class_declaration with 'enum' modifier
+  enumMemberTypes: ['enum_entry'],
   typeAliasTypes: ['type_alias'],
   importTypes: ['import_header'],
   callTypes: ['call_expression'],

+ 1 - 0
src/extraction/languages/php.ts

@@ -9,6 +9,7 @@ export const phpExtractor: LanguageExtractor = {
   interfaceTypes: ['interface_declaration'],
   structTypes: [],
   enumTypes: ['enum_declaration'],
+  enumMemberTypes: ['enum_case'],
   typeAliasTypes: [],
   importTypes: ['namespace_use_declaration'],
   callTypes: ['function_call_expression', 'member_call_expression', 'scoped_call_expression'],

+ 1 - 0
src/extraction/languages/rust.ts

@@ -9,6 +9,7 @@ export const rustExtractor: LanguageExtractor = {
   interfaceTypes: ['trait_item'],
   structTypes: ['struct_item'],
   enumTypes: ['enum_item'],
+  enumMemberTypes: ['enum_variant'],
   typeAliasTypes: ['type_item'], // Rust type aliases
   importTypes: ['use_declaration'],
   callTypes: ['call_expression'],

+ 1 - 0
src/extraction/languages/swift.ts

@@ -9,6 +9,7 @@ export const swiftExtractor: LanguageExtractor = {
   interfaceTypes: ['protocol_declaration'],
   structTypes: ['struct_declaration'],
   enumTypes: ['enum_declaration'],
+  enumMemberTypes: ['enum_entry'],
   typeAliasTypes: ['typealias_declaration'],
   importTypes: ['import_declaration'],
   callTypes: ['call_expression'],

+ 1 - 0
src/extraction/languages/typescript.ts

@@ -8,6 +8,7 @@ export const typescriptExtractor: LanguageExtractor = {
   interfaceTypes: ['interface_declaration'],
   structTypes: [],
   enumTypes: ['enum_declaration'],
+  enumMemberTypes: ['property_identifier', 'enum_assignment'],
   typeAliasTypes: ['type_alias_declaration'],
   importTypes: ['import_statement'],
   callTypes: ['call_expression'],

+ 2 - 0
src/extraction/tree-sitter-types.ts

@@ -88,6 +88,8 @@ export interface LanguageExtractor {
   structTypes: string[];
   /** Node types that represent enums */
   enumTypes: string[];
+  /** Node types that represent enum members/cases (e.g. Swift: 'enum_entry', Rust: 'enum_variant') */
+  enumMemberTypes?: string[];
   /** Node types that represent type aliases (e.g. `type X = ...`) */
   typeAliasTypes: string[];
   /** Node types that represent imports */

+ 52 - 1
src/extraction/tree-sitter.ts

@@ -598,11 +598,62 @@ export class TreeSitterExtractor {
     const visibility = this.extractor.getVisibility?.(node);
     const isExported = this.extractor.isExported?.(node, this.source);
 
-    this.createNode('enum', name, node, {
+    const enumNode = this.createNode('enum', name, node, {
       docstring,
       visibility,
       isExported,
     });
+    if (!enumNode) return;
+
+    // Extract inheritance (e.g. Swift: enum AFError: Error)
+    this.extractInheritance(node, enumNode.id);
+
+    // Push to stack and visit body children (enum members, nested types, methods)
+    this.nodeStack.push(enumNode.id);
+    const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
+      ?? getChildByField(node, this.extractor.bodyField)
+      ?? node;
+
+    const memberTypes = this.extractor.enumMemberTypes;
+    for (let i = 0; i < body.namedChildCount; i++) {
+      const child = body.namedChild(i);
+      if (!child) continue;
+
+      if (memberTypes?.includes(child.type)) {
+        this.extractEnumMembers(child);
+      } else {
+        this.visitNode(child);
+      }
+    }
+    this.nodeStack.pop();
+  }
+
+  /**
+   * Extract enum member names from an enum member node.
+   * Handles multi-case declarations (Swift: `case put, delete`) and single-case patterns.
+   */
+  private extractEnumMembers(node: SyntaxNode): void {
+    // Try field-based name first (e.g. Rust enum_variant has a 'name' field)
+    const nameNode = getChildByField(node, 'name');
+    if (nameNode) {
+      this.createNode('enum_member', getNodeText(nameNode, this.source), node);
+      return;
+    }
+
+    // Check for identifier-like children (Swift: simple_identifier, TS: property_identifier)
+    let found = false;
+    for (let i = 0; i < node.namedChildCount; i++) {
+      const child = node.namedChild(i);
+      if (child && (child.type === 'simple_identifier' || child.type === 'identifier' || child.type === 'property_identifier')) {
+        this.createNode('enum_member', getNodeText(child, this.source), child);
+        found = true;
+      }
+    }
+
+    // If the node itself IS the identifier (e.g. TS property_identifier directly in enum body)
+    if (!found && node.namedChildCount === 0) {
+      this.createNode('enum_member', getNodeText(node, this.source), node);
+    }
   }
 
   /**