Ver código fonte

feat: Add Ruby module extraction with containment and qualified names

Addresses Ruby methods inside modules missing owner in qualified_name by adding visitNode hook to extract module AST nodes. Methods inside modules now get Module::method qualified names with proper containment relationships. Includes ExtractorContext wiring with pushScope/popScope for language hooks and updates isInsideClassLikeNode to include module kind for nested method handling.
Colby McHenry 2 meses atrás
pai
commit
59ea5a43be

+ 64 - 0
__tests__/extraction.test.ts

@@ -1787,6 +1787,70 @@ require_relative 'helper'
     });
   });
 
+  describe('Ruby modules', () => {
+    it('should extract module as module node with containment', () => {
+      const code = `
+module CachedCounting
+  def self.disable
+    @enabled = false
+  end
+
+  def perform_increment!(key, count)
+    write_cache!(key, count)
+  end
+end
+`;
+      const result = extractFromSource('concerns/cached_counting.rb', code);
+
+      const moduleNode = result.nodes.find((n) => n.kind === 'module' && n.name === 'CachedCounting');
+      expect(moduleNode).toBeDefined();
+      expect(moduleNode?.qualifiedName).toBe('CachedCounting');
+
+      // Methods inside module should have module-qualified names
+      const disableMethod = result.nodes.find((n) => n.name === 'disable' && n.kind === 'method');
+      expect(disableMethod).toBeDefined();
+      expect(disableMethod?.qualifiedName).toBe('CachedCounting::disable');
+
+      const incrementMethod = result.nodes.find((n) => n.name === 'perform_increment!' && n.kind === 'method');
+      expect(incrementMethod).toBeDefined();
+      expect(incrementMethod?.qualifiedName).toBe('CachedCounting::perform_increment!');
+
+      // Containment edge from module to methods
+      const containsEdges = result.edges.filter((e) => e.source === moduleNode?.id && e.kind === 'contains');
+      expect(containsEdges.length).toBeGreaterThanOrEqual(2);
+    });
+
+    it('should handle nested modules with classes', () => {
+      const code = `
+module Discourse
+  module Auth
+    class AuthProvider
+      def authenticate(params)
+        validate(params)
+      end
+    end
+  end
+end
+`;
+      const result = extractFromSource('lib/auth.rb', code);
+
+      const discourseModule = result.nodes.find((n) => n.kind === 'module' && n.name === 'Discourse');
+      expect(discourseModule).toBeDefined();
+
+      const authModule = result.nodes.find((n) => n.kind === 'module' && n.name === 'Auth');
+      expect(authModule).toBeDefined();
+      expect(authModule?.qualifiedName).toBe('Discourse::Auth');
+
+      const authProvider = result.nodes.find((n) => n.kind === 'class' && n.name === 'AuthProvider');
+      expect(authProvider).toBeDefined();
+      expect(authProvider?.qualifiedName).toBe('Discourse::Auth::AuthProvider');
+
+      const authMethod = result.nodes.find((n) => n.name === 'authenticate');
+      expect(authMethod).toBeDefined();
+      expect(authMethod?.qualifiedName).toBe('Discourse::Auth::AuthProvider::authenticate');
+    });
+  });
+
   describe('C/C++ imports', () => {
     it('should extract system include', () => {
       const code = `#include <iostream>`;

+ 2 - 0
docs/SEARCH_QUALITY_LOOP.md

@@ -447,6 +447,7 @@ test().catch(console.error);
 | Import edges missing | `extractImport` returns null for this syntax | `src/extraction/languages/<lang>.ts: extractImport` |
 | C++ classes/structs/enums missing from macro namespaces | Macros like `NLOHMANN_JSON_NAMESPACE_BEGIN` cause tree-sitter to misparse namespace blocks as `function_definition` | `src/extraction/languages/c-cpp.ts: isMisparsedFunction` filters bad names; `src/extraction/tree-sitter.ts: visitFunctionBody` extracts structural nodes |
 | C++ classes missing from `.h` headers | `.h` files default to `c` language which has `classTypes: []` | `src/extraction/grammars.ts: looksLikeCpp()` — content-based heuristic promotes `.h` files to `cpp` when C++ patterns detected |
+| Ruby methods inside modules missing owner in `qualified_name` | Ruby `module` AST nodes not being extracted | `src/extraction/languages/ruby.ts: visitNode` hook extracts modules; `src/extraction/tree-sitter.ts: isInsideClassLikeNode` includes `module` kind |
 
 ## After Fixing Issues
 
@@ -529,6 +530,7 @@ if (receiverType) {
 - [x] **C** — NOT needed. No methods in C. Strong function/struct/enum extraction with excellent call edge density. Verified against Redis
 - [x] **C++** — NOT needed for header-only libs. `isMisparsedFunction` hook filters macro-caused misparse artifacts (e.g. `NLOHMANN_JSON_NAMESPACE_BEGIN`). `visitFunctionBody` now extracts structural nodes (classes/structs/enums) inside macro-confused "function" bodies. Content-based `.h` detection (`looksLikeCpp` in `grammars.ts`) promotes C++ headers to `cpp` language so classes in `.h` files are extracted. Verified against nlohmann/json and gRPC. Note: out-of-class `Type::method()` definitions would need `getReceiverType` but are uncommon in header-only codebases.
 - [x] **C#** — NOT needed. Methods nested in class body. Added `base_list` handling in `extractInheritance` for C#'s `: Parent, IInterface` syntax. Added `propertyTypes` support for C# `property_declaration` nodes. Fixed `extractField` to handle C#'s nested `variable_declaration > variable_declarator` structure. Verified against Jellyfin
+- [x] **Ruby** — NOT needed for `getReceiverType`. Methods nested in class body. Added `visitNode` hook to extract Ruby `module` nodes (concerns, namespaces) with proper containment and qualified names. Methods inside modules get `Module::method` qualified names. Also wired up the `ExtractorContext` with `pushScope`/`popScope` for language hooks. Verified against Discourse
 
 ### Needs Verification
 

+ 2 - 2
package-lock.json

@@ -1,12 +1,12 @@
 {
   "name": "@colbymchenry/codegraph",
-  "version": "0.6.8",
+  "version": "0.6.9",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "@colbymchenry/codegraph",
-      "version": "0.6.8",
+      "version": "0.6.9",
       "hasInstallScript": true,
       "license": "MIT",
       "dependencies": {

+ 1 - 1
package.json

@@ -1,6 +1,6 @@
 {
   "name": "@colbymchenry/codegraph",
-  "version": "0.6.8",
+  "version": "0.6.9",
   "description": "Supercharge Claude Code with semantic code intelligence. 30% fewer tokens, 25% fewer tool calls, 100% local.",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",

+ 23 - 1
src/extraction/languages/ruby.ts

@@ -6,7 +6,7 @@ export const rubyExtractor: LanguageExtractor = {
   functionTypes: ['method'],
   classTypes: ['class'],
   methodTypes: ['method', 'singleton_method'],
-  interfaceTypes: [], // Ruby uses modules
+  interfaceTypes: [], // Ruby uses modules (handled via visitNode hook)
   structTypes: [],
   enumTypes: [],
   typeAliasTypes: [],
@@ -16,6 +16,28 @@ export const rubyExtractor: LanguageExtractor = {
   nameField: 'name',
   bodyField: 'body',
   paramsField: 'parameters',
+  visitNode: (node, ctx) => {
+    if (node.type !== 'module') return false;
+
+    const nameNode = node.childForFieldName('name');
+    if (!nameNode) return false;
+    const name = nameNode.text;
+
+    const moduleNode = ctx.createNode('module', name, node);
+    if (!moduleNode) return false;
+
+    // Push module onto scope stack so children get proper qualified names
+    ctx.pushScope(moduleNode.id);
+    const body = node.childForFieldName('body');
+    if (body) {
+      for (let i = 0; i < body.namedChildCount; i++) {
+        const child = body.namedChild(i);
+        if (child) ctx.visitNode(child);
+      }
+    }
+    ctx.popScope();
+    return true; // handled
+  },
   getVisibility: (node) => {
     // Ruby visibility is based on preceding visibility modifiers
     let sibling = node.previousNamedSibling;

+ 4 - 0
src/extraction/tree-sitter-types.ts

@@ -56,6 +56,10 @@ export interface ExtractorContext {
   visitFunctionBody(body: SyntaxNode, functionId: string): void;
   /** Add an unresolved reference */
   addUnresolvedReference(ref: UnresolvedReference): void;
+  /** Push a node ID onto the scope stack (for containment/qualified name building) */
+  pushScope(nodeId: string): void;
+  /** Pop the last node ID from the scope stack */
+  popScope(): void;
   /** Current file path */
   readonly filePath: string;
   /** Current source text */

+ 30 - 2
src/extraction/tree-sitter.ts

@@ -17,7 +17,7 @@ import {
 } from '../types';
 import { getParser, detectLanguage, isLanguageSupported } from './grammars';
 import { generateNodeId, getNodeText, getChildByField, getPrecedingDocstring } from './tree-sitter-helpers';
-import type { LanguageExtractor } from './tree-sitter-types';
+import type { LanguageExtractor, ExtractorContext } from './tree-sitter-types';
 import { EXTRACTORS } from './languages';
 import { LiquidExtractor } from './liquid-extractor';
 import { SvelteExtractor } from './svelte-extractor';
@@ -223,6 +223,13 @@ export class TreeSitterExtractor {
     const nodeType = node.type;
     let skipChildren = false;
 
+    // Language-specific custom visitor hook
+    if (this.extractor.visitNode) {
+      const ctx = this.makeExtractorContext();
+      const handled = this.extractor.visitNode(node, ctx);
+      if (handled) return;
+    }
+
     // Pascal-specific AST handling
     if (this.language === 'pascal') {
       skipChildren = this.visitPascalNode(node);
@@ -409,6 +416,26 @@ export class TreeSitterExtractor {
     return parts.join('::');
   }
 
+  /**
+   * Build an ExtractorContext for passing to language-specific visitNode hooks.
+   */
+  private makeExtractorContext(): ExtractorContext {
+    // eslint-disable-next-line @typescript-eslint/no-this-alias
+    const self = this;
+    return {
+      createNode: (kind, name, node, extra) => self.createNode(kind, name, node, extra),
+      visitNode: (node) => self.visitNode(node),
+      visitFunctionBody: (body, functionId) => self.visitFunctionBody(body, functionId),
+      addUnresolvedReference: (ref) => self.unresolvedReferences.push(ref),
+      pushScope: (nodeId) => self.nodeStack.push(nodeId),
+      popScope: () => self.nodeStack.pop(),
+      get filePath() { return self.filePath; },
+      get source() { return self.source; },
+      get nodeStack() { return self.nodeStack; },
+      get nodes() { return self.nodes; },
+    };
+  }
+
   /**
    * Check if the current node stack indicates we are inside a class-like node
    * (class, struct, interface, trait). File nodes do not count as class-like.
@@ -424,7 +451,8 @@ export class TreeSitterExtractor {
       parentNode.kind === 'struct' ||
       parentNode.kind === 'interface' ||
       parentNode.kind === 'trait' ||
-      parentNode.kind === 'enum'
+      parentNode.kind === 'enum' ||
+      parentNode.kind === 'module'
     );
   }