Jelajahi Sumber

Add type alias and exported variable extraction

Extend extraction to index two additional categories of symbols
that were previously invisible:

1. Type aliases (e.g. `export type X = ...` in TypeScript,
   `type X` in Go, `type X = ...` in Rust, `typealias X` in Swift,
   `type_alias` in Kotlin). Adds `typeAliasTypes` to the
   LanguageExtractor interface with values for all 13 languages.

2. Exported variable declarations that aren't functions, including:
   - Zustand stores: `export const useX = create(...)`
   - XState machines: `export const xMachine = createMachine(...)`
   - Zod schemas: `export const schema = z.object(...)`
   - Config objects: `export const config = { ... }`
   - Constants: `export const MAX = 3`
   - Arrays: `export const NAMES = [...] as const`

   The extractExportedVariables() method is called when visiting
   export_statement nodes. It skips variable_declarator values that
   are already handled by functionTypes (arrow_function,
   function_expression) to avoid duplicate extraction.

Adds 11 new test cases (59 total extraction tests, 215 total).

Tested on production monorepo: nodes increased from 958 to 1,172
(+22%), with 109 new variable nodes and 105 new type_alias nodes.
Only 4 files remain at 0 nodes — all are re-export barrels or
ambient declaration files with no extractable symbols.
Tanner Balluff 4 bulan lalu
induk
melakukan
ea9d0bf549
3 mengubah file dengan 267 tambahan dan 2 penghapusan
  1. 168 0
      __tests__/extraction.test.ts
  2. 3 2
      package-lock.json
  3. 96 0
      src/extraction/tree-sitter.ts

+ 168 - 0
__tests__/extraction.test.ts

@@ -290,6 +290,174 @@ export const fetchData = async () => {
   });
 });
 
+describe('Type Alias Extraction', () => {
+  it('should extract exported type aliases in TypeScript', () => {
+    const code = `
+export type AuthContextValue = {
+  user: User | null;
+  login: () => void;
+  logout: () => void;
+};
+`;
+    const result = extractFromSource('types.ts', code);
+
+    expect(result.nodes).toHaveLength(1);
+    expect(result.nodes[0]).toMatchObject({
+      kind: 'type_alias',
+      name: 'AuthContextValue',
+      isExported: true,
+    });
+  });
+
+  it('should extract non-exported type aliases', () => {
+    const code = `
+type InternalState = {
+  loading: boolean;
+  error: string | null;
+};
+`;
+    const result = extractFromSource('internal.ts', code);
+
+    expect(result.nodes).toHaveLength(1);
+    expect(result.nodes[0]).toMatchObject({
+      kind: 'type_alias',
+      name: 'InternalState',
+      isExported: false,
+    });
+  });
+
+  it('should extract multiple type aliases from the same file', () => {
+    const code = `
+export type UnitSystem = 'metric' | 'imperial';
+export type DateFormat = 'ISO' | 'US' | 'EU';
+type Internal = string;
+`;
+    const result = extractFromSource('config.ts', code);
+
+    const typeAliases = result.nodes.filter((n) => n.kind === 'type_alias');
+    expect(typeAliases).toHaveLength(3);
+
+    const exported = typeAliases.filter((n) => n.isExported);
+    expect(exported).toHaveLength(2);
+    expect(exported.map((n) => n.name).sort()).toEqual(['DateFormat', 'UnitSystem']);
+  });
+});
+
+describe('Exported Variable Extraction', () => {
+  it('should extract exported const with call expression (Zustand store)', () => {
+    const code = `
+export const useUIStore = create<UIState>((set) => ({
+  isOpen: false,
+  toggle: () => set((s) => ({ isOpen: !s.isOpen })),
+}));
+`;
+    const result = extractFromSource('store.ts', code);
+
+    const varNode = result.nodes.find((n) => n.kind === 'variable' && n.name === 'useUIStore');
+    expect(varNode).toBeDefined();
+    expect(varNode?.isExported).toBe(true);
+  });
+
+  it('should extract exported const with object literal', () => {
+    const code = `
+export const config = {
+  apiUrl: 'https://api.example.com',
+  timeout: 5000,
+};
+`;
+    const result = extractFromSource('config.ts', code);
+
+    const varNode = result.nodes.find((n) => n.kind === 'variable' && n.name === 'config');
+    expect(varNode).toBeDefined();
+    expect(varNode?.isExported).toBe(true);
+  });
+
+  it('should extract exported const with array literal', () => {
+    const code = `
+export const SCREEN_NAMES = ['home', 'settings', 'profile'] as const;
+`;
+    const result = extractFromSource('constants.ts', code);
+
+    const varNode = result.nodes.find((n) => n.kind === 'variable' && n.name === 'SCREEN_NAMES');
+    expect(varNode).toBeDefined();
+    expect(varNode?.isExported).toBe(true);
+  });
+
+  it('should extract exported const with primitive value', () => {
+    const code = `
+export const MAX_RETRIES = 3;
+export const API_VERSION = "v2";
+`;
+    const result = extractFromSource('constants.ts', code);
+
+    const variables = result.nodes.filter((n) => n.kind === 'variable');
+    expect(variables).toHaveLength(2);
+    expect(variables.map((n) => n.name).sort()).toEqual(['API_VERSION', 'MAX_RETRIES']);
+  });
+
+  it('should NOT duplicate arrow functions as both function and variable', () => {
+    const code = `
+export const useAuth = () => {
+  return useContext(AuthContext);
+};
+`;
+    const result = extractFromSource('hooks.ts', code);
+
+    // Should be extracted as function (from arrow function handler), NOT as variable
+    const funcNodes = result.nodes.filter((n) => n.kind === 'function' && n.name === 'useAuth');
+    const varNodes = result.nodes.filter((n) => n.kind === 'variable' && n.name === 'useAuth');
+    expect(funcNodes).toHaveLength(1);
+    expect(varNodes).toHaveLength(0);
+  });
+
+  it('should not extract non-exported const as exported variable', () => {
+    const code = `
+const internalConfig = {
+  debug: true,
+};
+`;
+    const result = extractFromSource('internal.ts', code);
+
+    // Non-exported const should NOT create a variable node
+    // (only export_statement triggers extractExportedVariables)
+    const varNodes = result.nodes.filter((n) => n.kind === 'variable' && n.name === 'internalConfig');
+    expect(varNodes).toHaveLength(0);
+  });
+
+  it('should extract Zod schema exports', () => {
+    const code = `
+export const userSchema = z.object({
+  id: z.string(),
+  name: z.string(),
+  email: z.string().email(),
+});
+`;
+    const result = extractFromSource('schemas.ts', code);
+
+    const varNode = result.nodes.find((n) => n.kind === 'variable' && n.name === 'userSchema');
+    expect(varNode).toBeDefined();
+    expect(varNode?.isExported).toBe(true);
+  });
+
+  it('should extract XState machine exports', () => {
+    const code = `
+export const authMachine = createMachine({
+  id: "auth",
+  initial: "idle",
+  states: {
+    idle: {},
+    authenticated: {},
+  },
+});
+`;
+    const result = extractFromSource('machine.ts', code);
+
+    const varNode = result.nodes.find((n) => n.kind === 'variable' && n.name === 'authMachine');
+    expect(varNode).toBeDefined();
+    expect(varNode?.isExported).toBe(true);
+  });
+});
+
 describe('Python Extraction', () => {
   it('should extract function definitions', () => {
     const code = `

+ 3 - 2
package-lock.json

@@ -1,12 +1,13 @@
 {
   "name": "@colbymchenry/codegraph",
-  "version": "0.2.6",
+  "version": "0.3.1",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "@colbymchenry/codegraph",
-      "version": "0.2.6",
+      "version": "0.3.1",
+      "hasInstallScript": true,
       "license": "MIT",
       "dependencies": {
         "@xenova/transformers": "^2.17.0",

+ 96 - 0
src/extraction/tree-sitter.ts

@@ -103,6 +103,8 @@ interface LanguageExtractor {
   structTypes: string[];
   /** Node types that represent enums */
   enumTypes: string[];
+  /** Node types that represent type aliases (e.g. `type X = ...`) */
+  typeAliasTypes: string[];
   /** Node types that represent imports */
   importTypes: string[];
   /** Node types that represent function calls */
@@ -138,6 +140,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: ['interface_declaration'],
     structTypes: [],
     enumTypes: ['enum_declaration'],
+    typeAliasTypes: ['type_alias_declaration'],
     importTypes: ['import_statement'],
     callTypes: ['call_expression'],
     nameField: 'name',
@@ -200,6 +203,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: [],
     structTypes: [],
     enumTypes: [],
+    typeAliasTypes: [],
     importTypes: ['import_statement'],
     callTypes: ['call_expression'],
     nameField: 'name',
@@ -232,6 +236,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: [],
     structTypes: [],
     enumTypes: [],
+    typeAliasTypes: [],
     importTypes: ['import_statement', 'import_from_statement'],
     callTypes: ['call'],
     nameField: 'name',
@@ -269,6 +274,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: ['interface_type'],
     structTypes: ['struct_type'],
     enumTypes: [],
+    typeAliasTypes: ['type_spec'], // Go type declarations
     importTypes: ['import_declaration'],
     callTypes: ['call_expression'],
     nameField: 'name',
@@ -293,6 +299,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: ['trait_item'],
     structTypes: ['struct_item'],
     enumTypes: ['enum_item'],
+    typeAliasTypes: ['type_item'], // Rust type aliases
     importTypes: ['use_declaration'],
     callTypes: ['call_expression'],
     nameField: 'name',
@@ -333,6 +340,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: ['interface_declaration'],
     structTypes: [],
     enumTypes: ['enum_declaration'],
+    typeAliasTypes: [],
     importTypes: ['import_declaration'],
     callTypes: ['method_invocation'],
     nameField: 'name',
@@ -375,6 +383,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: [],
     structTypes: ['struct_specifier'],
     enumTypes: ['enum_specifier'],
+    typeAliasTypes: ['type_definition'], // typedef
     importTypes: ['preproc_include'],
     callTypes: ['call_expression'],
     nameField: 'declarator',
@@ -388,6 +397,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: [],
     structTypes: ['struct_specifier'],
     enumTypes: ['enum_specifier'],
+    typeAliasTypes: ['type_definition', 'alias_declaration'], // typedef and using
     importTypes: ['preproc_include'],
     callTypes: ['call_expression'],
     nameField: 'declarator',
@@ -417,6 +427,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: ['interface_declaration'],
     structTypes: ['struct_declaration'],
     enumTypes: ['enum_declaration'],
+    typeAliasTypes: [],
     importTypes: ['using_directive'],
     callTypes: ['invocation_expression'],
     nameField: 'name',
@@ -461,6 +472,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: ['interface_declaration'],
     structTypes: [],
     enumTypes: ['enum_declaration'],
+    typeAliasTypes: [],
     importTypes: ['namespace_use_declaration'],
     callTypes: ['function_call_expression', 'member_call_expression', 'scoped_call_expression'],
     nameField: 'name',
@@ -494,6 +506,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: [], // Ruby uses modules
     structTypes: [],
     enumTypes: [],
+    typeAliasTypes: [],
     importTypes: ['call'], // require/require_relative
     callTypes: ['call', 'method_call'],
     nameField: 'name',
@@ -524,6 +537,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: ['protocol_declaration'],
     structTypes: ['struct_declaration'],
     enumTypes: ['enum_declaration'],
+    typeAliasTypes: ['typealias_declaration'],
     importTypes: ['import_declaration'],
     callTypes: ['call_expression'],
     nameField: 'name',
@@ -583,6 +597,7 @@ const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
     interfaceTypes: ['class_declaration'], // Interfaces use class_declaration with 'interface' modifier
     structTypes: [], // Kotlin uses data classes
     enumTypes: ['class_declaration'], // Enums use class_declaration with 'enum' modifier
+    typeAliasTypes: ['type_alias'],
     importTypes: ['import_header'],
     callTypes: ['call_expression'],
     nameField: 'simple_identifier',
@@ -800,6 +815,16 @@ export class TreeSitterExtractor {
       this.extractEnum(node);
       skipChildren = true; // extractEnum visits body children
     }
+    // Check for type alias declarations (e.g. `type X = ...` in TypeScript)
+    else if (this.extractor.typeAliasTypes.includes(nodeType)) {
+      this.extractTypeAlias(node);
+    }
+    // Check for export statements containing non-function variable declarations
+    // e.g. `export const X = create(...)`, `export const X = { ... }`
+    else if (nodeType === 'export_statement') {
+      this.extractExportedVariables(node);
+      // Don't skip children — still need to visit inner nodes (functions, calls, etc.)
+    }
     // Check for imports
     else if (this.extractor.importTypes.includes(nodeType)) {
       this.extractImport(node);
@@ -1081,6 +1106,77 @@ export class TreeSitterExtractor {
     });
   }
 
+  /**
+   * Extract a type alias (e.g. `export type X = ...` in TypeScript)
+   */
+  private extractTypeAlias(node: SyntaxNode): void {
+    if (!this.extractor) return;
+
+    const name = extractName(node, this.source, this.extractor);
+    if (name === '<anonymous>') return;
+    const docstring = getPrecedingDocstring(node, this.source);
+    const isExported = this.extractor.isExported?.(node, this.source);
+
+    this.createNode('type_alias', name, node, {
+      docstring,
+      isExported,
+    });
+  }
+
+  /**
+   * Extract an exported variable declaration that isn't a function.
+   * Handles patterns like:
+   *   export const X = create(...)
+   *   export const X = { ... }
+   *   export const X = [...]
+   *   export const X = "value"
+   *
+   * This is called for `export_statement` nodes that contain a
+   * `lexical_declaration` with `variable_declarator` children whose
+   * values are NOT already handled by functionTypes (arrow_function,
+   * function_expression).
+   */
+  private extractExportedVariables(exportNode: SyntaxNode): void {
+    if (!this.extractor) return;
+
+    // Find the lexical_declaration or variable_declaration child
+    for (let i = 0; i < exportNode.namedChildCount; i++) {
+      const decl = exportNode.namedChild(i);
+      if (!decl || (decl.type !== 'lexical_declaration' && decl.type !== 'variable_declaration')) {
+        continue;
+      }
+
+      // Iterate over each variable_declarator in the declaration
+      for (let j = 0; j < decl.namedChildCount; j++) {
+        const declarator = decl.namedChild(j);
+        if (!declarator || declarator.type !== 'variable_declarator') continue;
+
+        const nameNode = getChildByField(declarator, 'name');
+        if (!nameNode) continue;
+        const name = getNodeText(nameNode, this.source);
+
+        // Skip if the value is a function type — those are already handled
+        // by extractFunction via the functionTypes dispatch
+        const value = getChildByField(declarator, 'value');
+        if (value) {
+          const valueType = value.type;
+          if (
+            this.extractor.functionTypes.includes(valueType)
+          ) {
+            continue; // Already handled by extractFunction
+          }
+        }
+
+        const docstring = getPrecedingDocstring(exportNode, this.source);
+
+        this.createNode('variable', name, declarator, {
+          docstring,
+          isExported: true,
+        });
+      }
+    }
+  }
+
   /**
    * Extract an import
    */