Ver Fonte

Merge pull request #77 from colbymchenry/refactor/extract-language-configs

refactor: Extract per-language configs from tree-sitter.ts
Colby Mchenry há 2 meses atrás
pai
commit
eefa622965

+ 158 - 0
src/extraction/dfm-extractor.ts

@@ -0,0 +1,158 @@
+import { Node, Edge, ExtractionResult, ExtractionError, UnresolvedReference } from '../types';
+import { generateNodeId } from './tree-sitter-helpers';
+
+/**
+ * Custom extractor for Delphi DFM/FMX form files.
+ *
+ * DFM/FMX files describe the visual component hierarchy and event handler
+ * bindings. They use a simple text format (object/end blocks) that we parse
+ * with regex — no tree-sitter grammar exists for this format.
+ *
+ * Extracted information:
+ * - Components as NodeKind `component`
+ * - Nesting as EdgeKind `contains`
+ * - Event handlers (OnClick = MethodName) as UnresolvedReference → EdgeKind `references`
+ */
+export class DfmExtractor {
+  private filePath: string;
+  private source: string;
+  private nodes: Node[] = [];
+  private edges: Edge[] = [];
+  private unresolvedReferences: UnresolvedReference[] = [];
+  private errors: ExtractionError[] = [];
+
+  constructor(filePath: string, source: string) {
+    this.filePath = filePath;
+    this.source = source;
+  }
+
+  /**
+   * Extract components and event handler references from DFM/FMX source
+   */
+  extract(): ExtractionResult {
+    const startTime = Date.now();
+
+    try {
+      const fileNode = this.createFileNode();
+      this.parseComponents(fileNode.id);
+    } catch (error) {
+      this.errors.push({
+        message: `DFM extraction error: ${error instanceof Error ? error.message : String(error)}`,
+        severity: 'error',
+      });
+    }
+
+    return {
+      nodes: this.nodes,
+      edges: this.edges,
+      unresolvedReferences: this.unresolvedReferences,
+      errors: this.errors,
+      durationMs: Date.now() - startTime,
+    };
+  }
+
+  /** Create a file node for the DFM form file */
+  private createFileNode(): Node {
+    const lines = this.source.split('\n');
+    const id = generateNodeId(this.filePath, 'file', this.filePath, 1);
+
+    const fileNode: Node = {
+      id,
+      kind: 'file',
+      name: this.filePath.split('/').pop() || this.filePath,
+      qualifiedName: this.filePath,
+      filePath: this.filePath,
+      language: 'pascal',
+      startLine: 1,
+      endLine: lines.length,
+      startColumn: 0,
+      endColumn: lines[lines.length - 1]?.length || 0,
+      updatedAt: Date.now(),
+    };
+
+    this.nodes.push(fileNode);
+    return fileNode;
+  }
+
+  /** Parse object/end blocks and extract components + event handlers */
+  private parseComponents(fileNodeId: string): void {
+    const lines = this.source.split('\n');
+    const stack: string[] = [fileNodeId];
+
+    const objectPattern = /^\s*(object|inherited|inline)\s+(\w+)\s*:\s*(\w+)/;
+    const eventPattern = /^\s*(On\w+)\s*=\s*(\w+)\s*$/;
+    const endPattern = /^\s*end\s*$/;
+    const multiLineStart = /=\s*\(\s*$/;
+    const multiLineItemStart = /=\s*<\s*$/;
+    let inMultiLine = false;
+    let multiLineEndChar = ')';
+
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i]!;
+      const lineNum = i + 1;
+
+      // Skip multi-line properties
+      if (inMultiLine) {
+        if (line.trimEnd().endsWith(multiLineEndChar)) inMultiLine = false;
+        continue;
+      }
+      if (multiLineStart.test(line)) {
+        inMultiLine = true;
+        multiLineEndChar = ')';
+        continue;
+      }
+      if (multiLineItemStart.test(line)) {
+        inMultiLine = true;
+        multiLineEndChar = '>';
+        continue;
+      }
+
+      // Component declaration
+      const objMatch = line.match(objectPattern);
+      if (objMatch) {
+        const [, , name, typeName] = objMatch;
+        const nodeId = generateNodeId(this.filePath, 'component', name!, lineNum);
+        this.nodes.push({
+          id: nodeId,
+          kind: 'component',
+          name: name!,
+          qualifiedName: `${this.filePath}#${name}`,
+          filePath: this.filePath,
+          language: 'pascal',
+          startLine: lineNum,
+          endLine: lineNum,
+          startColumn: 0,
+          endColumn: line.length,
+          signature: typeName,
+          updatedAt: Date.now(),
+        });
+        this.edges.push({
+          source: stack[stack.length - 1]!,
+          target: nodeId,
+          kind: 'contains',
+        });
+        stack.push(nodeId);
+        continue;
+      }
+
+      // Event handler
+      const eventMatch = line.match(eventPattern);
+      if (eventMatch) {
+        const [, , methodName] = eventMatch;
+        this.unresolvedReferences.push({
+          fromNodeId: stack[stack.length - 1]!,
+          referenceName: methodName!,
+          referenceKind: 'references',
+          line: lineNum,
+          column: 0,
+        });
+        continue;
+      }
+
+      // Block end
+      if (endPattern.test(line)) {
+        if (stack.length > 1) stack.pop();
+      }
+    }
+  }
+}

+ 83 - 0
src/extraction/languages/c-cpp.ts

@@ -0,0 +1,83 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const cExtractor: LanguageExtractor = {
+  functionTypes: ['function_definition'],
+  classTypes: [],
+  methodTypes: [],
+  interfaceTypes: [],
+  structTypes: ['struct_specifier'],
+  enumTypes: ['enum_specifier'],
+  typeAliasTypes: ['type_definition'], // typedef
+  importTypes: ['preproc_include'],
+  callTypes: ['call_expression'],
+  variableTypes: ['declaration'],
+  nameField: 'declarator',
+  bodyField: 'body',
+  paramsField: 'parameters',
+  extractImport: (node, source) => {
+    const importText = source.substring(node.startIndex, node.endIndex).trim();
+    // C includes: #include <stdio.h>, #include "myheader.h"
+    const systemLib = node.namedChildren.find((c: SyntaxNode) => c.type === 'system_lib_string');
+    if (systemLib) {
+      return { moduleName: getNodeText(systemLib, source).replace(/^<|>$/g, ''), signature: importText };
+    }
+    const stringLiteral = node.namedChildren.find((c: SyntaxNode) => c.type === 'string_literal');
+    if (stringLiteral) {
+      const stringContent = stringLiteral.namedChildren.find((c: SyntaxNode) => c.type === 'string_content');
+      if (stringContent) {
+        return { moduleName: getNodeText(stringContent, source), signature: importText };
+      }
+    }
+    return null;
+  },
+};
+
+export const cppExtractor: LanguageExtractor = {
+  functionTypes: ['function_definition'],
+  classTypes: ['class_specifier'],
+  methodTypes: ['function_definition'],
+  interfaceTypes: [],
+  structTypes: ['struct_specifier'],
+  enumTypes: ['enum_specifier'],
+  typeAliasTypes: ['type_definition', 'alias_declaration'], // typedef and using
+  importTypes: ['preproc_include'],
+  callTypes: ['call_expression'],
+  variableTypes: ['declaration'],
+  nameField: 'declarator',
+  bodyField: 'body',
+  paramsField: 'parameters',
+  getVisibility: (node) => {
+    // Check for access specifier in parent
+    const parent = node.parent;
+    if (parent) {
+      for (let i = 0; i < parent.childCount; i++) {
+        const child = parent.child(i);
+        if (child?.type === 'access_specifier') {
+          const text = child.text;
+          if (text.includes('public')) return 'public';
+          if (text.includes('private')) return 'private';
+          if (text.includes('protected')) return 'protected';
+        }
+      }
+    }
+    return undefined;
+  },
+  extractImport: (node, source) => {
+    const importText = source.substring(node.startIndex, node.endIndex).trim();
+    // C++ includes: #include <iostream>, #include "myheader.h"
+    const systemLib = node.namedChildren.find((c: SyntaxNode) => c.type === 'system_lib_string');
+    if (systemLib) {
+      return { moduleName: getNodeText(systemLib, source).replace(/^<|>$/g, ''), signature: importText };
+    }
+    const stringLiteral = node.namedChildren.find((c: SyntaxNode) => c.type === 'string_literal');
+    if (stringLiteral) {
+      const stringContent = stringLiteral.namedChildren.find((c: SyntaxNode) => c.type === 'string_content');
+      if (stringContent) {
+        return { moduleName: getNodeText(stringContent, source), signature: importText };
+      }
+    }
+    return null;
+  },
+};

+ 64 - 0
src/extraction/languages/csharp.ts

@@ -0,0 +1,64 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const csharpExtractor: LanguageExtractor = {
+  functionTypes: [],
+  classTypes: ['class_declaration'],
+  methodTypes: ['method_declaration', 'constructor_declaration'],
+  interfaceTypes: ['interface_declaration'],
+  structTypes: ['struct_declaration'],
+  enumTypes: ['enum_declaration'],
+  typeAliasTypes: [],
+  importTypes: ['using_directive'],
+  callTypes: ['invocation_expression'],
+  variableTypes: ['local_declaration_statement', 'field_declaration'],
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'parameter_list',
+  getVisibility: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'modifier') {
+        const text = child.text;
+        if (text === 'public') return 'public';
+        if (text === 'private') return 'private';
+        if (text === 'protected') return 'protected';
+        if (text === 'internal') return 'internal';
+      }
+    }
+    return 'private'; // C# defaults to private
+  },
+  isStatic: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'modifier' && child.text === 'static') {
+        return true;
+      }
+    }
+    return false;
+  },
+  isAsync: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'modifier' && child.text === 'async') {
+        return true;
+      }
+    }
+    return false;
+  },
+  extractImport: (node, source) => {
+    const importText = source.substring(node.startIndex, node.endIndex).trim();
+    // C# using directives: using System, using System.Collections.Generic, using static X, using Alias = X
+    const qualifiedName = node.namedChildren.find((c: SyntaxNode) => c.type === 'qualified_name');
+    if (qualifiedName) {
+      return { moduleName: getNodeText(qualifiedName, source), signature: importText };
+    }
+    // Simple namespace like "using System;" - get the first identifier
+    const identifier = node.namedChildren.find((c: SyntaxNode) => c.type === 'identifier');
+    if (identifier) {
+      return { moduleName: getNodeText(identifier, source), signature: importText };
+    }
+    return null;
+  },
+};

+ 134 - 0
src/extraction/languages/dart.ts

@@ -0,0 +1,134 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const dartExtractor: LanguageExtractor = {
+  functionTypes: ['function_signature'],
+  classTypes: ['class_definition'],
+  methodTypes: ['method_signature'],
+  interfaceTypes: [],
+  structTypes: [],
+  enumTypes: ['enum_declaration'],
+  typeAliasTypes: ['type_alias'],
+  importTypes: ['import_or_export'],
+  callTypes: [],  // Dart calls use identifier+selector, handled via function body traversal
+  variableTypes: [],
+  extraClassNodeTypes: ['mixin_declaration', 'extension_declaration'],
+  resolveBody: (node, bodyField) => {
+    // Dart: function_body is a next sibling of function_signature/method_signature
+    if (node.type === 'function_signature' || node.type === 'method_signature') {
+      const next = node.nextNamedSibling;
+      if (next?.type === 'function_body') return next;
+      return null;
+    }
+    // For class/mixin/extension: try standard field, then class_body/extension_body
+    const standard = node.childForFieldName(bodyField);
+    if (standard) return standard;
+    return node.namedChildren.find((c: SyntaxNode) =>
+      c.type === 'class_body' || c.type === 'extension_body'
+    ) || null;
+  },
+  nameField: 'name',
+  bodyField: 'body', // class_definition uses 'body' field
+  paramsField: 'formal_parameter_list',
+  returnField: 'type',
+  getSignature: (node, source) => {
+    // For function_signature: extract params + return type
+    // For method_signature: delegate to inner function_signature
+    let sig = node;
+    if (node.type === 'method_signature') {
+      const inner = node.namedChildren.find((c: SyntaxNode) =>
+        c.type === 'function_signature' || c.type === 'getter_signature' || c.type === 'setter_signature'
+      );
+      if (inner) sig = inner;
+    }
+    const params = sig.namedChildren.find((c: SyntaxNode) => c.type === 'formal_parameter_list');
+    const retType = sig.namedChildren.find((c: SyntaxNode) =>
+      c.type === 'type_identifier' || c.type === 'void_type'
+    );
+    if (!params && !retType) return undefined;
+    let result = '';
+    if (retType) result += getNodeText(retType, source) + ' ';
+    if (params) result += getNodeText(params, source);
+    return result.trim() || undefined;
+  },
+  getVisibility: (node) => {
+    // Dart convention: _ prefix means private, otherwise public
+    let nameNode: SyntaxNode | null = null;
+    if (node.type === 'method_signature') {
+      const inner = node.namedChildren.find((c: SyntaxNode) =>
+        c.type === 'function_signature' || c.type === 'getter_signature' || c.type === 'setter_signature'
+      );
+      if (inner) nameNode = inner.namedChildren.find((c: SyntaxNode) => c.type === 'identifier') || null;
+    } else {
+      nameNode = node.childForFieldName('name');
+    }
+    if (nameNode && nameNode.text.startsWith('_')) return 'private';
+    return 'public';
+  },
+  isAsync: (node) => {
+    // In Dart, 'async' is on the function_body (next sibling), not the signature
+    const nextSibling = node.nextNamedSibling;
+    if (nextSibling?.type === 'function_body') {
+      for (let i = 0; i < nextSibling.childCount; i++) {
+        const child = nextSibling.child(i);
+        if (child?.type === 'async') return true;
+      }
+    }
+    return false;
+  },
+  isStatic: (node) => {
+    // For method_signature, check for 'static' child
+    if (node.type === 'method_signature') {
+      for (let i = 0; i < node.childCount; i++) {
+        const child = node.child(i);
+        if (child?.type === 'static') return true;
+      }
+    }
+    return false;
+  },
+  extractImport: (node, source) => {
+    const importText = source.substring(node.startIndex, node.endIndex).trim();
+    let moduleName = '';
+
+    // Dart imports: import 'dart:async'; import 'package:foo/bar.dart' as bar;
+    const libraryImport = node.namedChildren.find((c: SyntaxNode) => c.type === 'library_import');
+    if (libraryImport) {
+      const importSpec = libraryImport.namedChildren.find((c: SyntaxNode) => c.type === 'import_specification');
+      if (importSpec) {
+        const configurableUri = importSpec.namedChildren.find((c: SyntaxNode) => c.type === 'configurable_uri');
+        if (configurableUri) {
+          const uri = configurableUri.namedChildren.find((c: SyntaxNode) => c.type === 'uri');
+          if (uri) {
+            const stringLiteral = uri.namedChildren.find((c: SyntaxNode) => c.type === 'string_literal');
+            if (stringLiteral) {
+              moduleName = getNodeText(stringLiteral, source).replace(/['"]/g, '');
+            }
+          }
+        }
+      }
+    }
+
+    // Also handle exports: export 'src/foo.dart';
+    if (!moduleName) {
+      const libraryExport = node.namedChildren.find((c: SyntaxNode) => c.type === 'library_export');
+      if (libraryExport) {
+        const configurableUri = libraryExport.namedChildren.find((c: SyntaxNode) => c.type === 'configurable_uri');
+        if (configurableUri) {
+          const uri = configurableUri.namedChildren.find((c: SyntaxNode) => c.type === 'uri');
+          if (uri) {
+            const stringLiteral = uri.namedChildren.find((c: SyntaxNode) => c.type === 'string_literal');
+            if (stringLiteral) {
+              moduleName = getNodeText(stringLiteral, source).replace(/['"]/g, '');
+            }
+          }
+        }
+      }
+    }
+
+    if (moduleName) {
+      return { moduleName, signature: importText };
+    }
+    return null;
+  },
+};

+ 30 - 0
src/extraction/languages/go.ts

@@ -0,0 +1,30 @@
+import { getNodeText, getChildByField } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const goExtractor: LanguageExtractor = {
+  functionTypes: ['function_declaration'],
+  classTypes: [], // Go doesn't have classes
+  methodTypes: ['method_declaration'],
+  interfaceTypes: ['interface_type'],
+  structTypes: ['struct_type'],
+  enumTypes: [],
+  typeAliasTypes: ['type_spec'], // Go type declarations
+  importTypes: ['import_declaration'],
+  callTypes: ['call_expression'],
+  variableTypes: ['var_declaration', 'short_var_declaration', 'const_declaration'],
+  methodsAreTopLevel: true,
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'parameters',
+  returnField: 'result',
+  getSignature: (node, source) => {
+    const params = getChildByField(node, 'parameters');
+    const result = getChildByField(node, 'result');
+    if (!params) return undefined;
+    let sig = getNodeText(params, source);
+    if (result) {
+      sig += ' ' + getNodeText(result, source);
+    }
+    return sig;
+  },
+};

+ 44 - 0
src/extraction/languages/index.ts

@@ -0,0 +1,44 @@
+/**
+ * Per-language extraction configurations.
+ *
+ * Each file exports a LanguageExtractor config object.
+ * This barrel builds the EXTRACTORS map consumed by TreeSitterExtractor.
+ */
+
+import { Language } from '../../types';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+import { typescriptExtractor } from './typescript';
+import { javascriptExtractor } from './javascript';
+import { pythonExtractor } from './python';
+import { goExtractor } from './go';
+import { rustExtractor } from './rust';
+import { javaExtractor } from './java';
+import { cExtractor, cppExtractor } from './c-cpp';
+import { csharpExtractor } from './csharp';
+import { phpExtractor } from './php';
+import { rubyExtractor } from './ruby';
+import { swiftExtractor } from './swift';
+import { kotlinExtractor } from './kotlin';
+import { dartExtractor } from './dart';
+import { pascalExtractor } from './pascal';
+
+export const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
+  typescript: typescriptExtractor,
+  tsx: typescriptExtractor,
+  javascript: javascriptExtractor,
+  jsx: javascriptExtractor,
+  python: pythonExtractor,
+  go: goExtractor,
+  rust: rustExtractor,
+  java: javaExtractor,
+  c: cExtractor,
+  cpp: cppExtractor,
+  csharp: csharpExtractor,
+  php: phpExtractor,
+  ruby: rubyExtractor,
+  swift: swiftExtractor,
+  kotlin: kotlinExtractor,
+  dart: dartExtractor,
+  pascal: pascalExtractor,
+};

+ 57 - 0
src/extraction/languages/java.ts

@@ -0,0 +1,57 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText, getChildByField } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const javaExtractor: LanguageExtractor = {
+  functionTypes: [],
+  classTypes: ['class_declaration'],
+  methodTypes: ['method_declaration', 'constructor_declaration'],
+  interfaceTypes: ['interface_declaration'],
+  structTypes: [],
+  enumTypes: ['enum_declaration'],
+  typeAliasTypes: [],
+  importTypes: ['import_declaration'],
+  callTypes: ['method_invocation'],
+  variableTypes: ['local_variable_declaration', 'field_declaration'],
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'parameters',
+  returnField: 'type',
+  getSignature: (node, source) => {
+    const params = getChildByField(node, 'parameters');
+    const returnType = getChildByField(node, 'type');
+    if (!params) return undefined;
+    const paramsText = getNodeText(params, source);
+    return returnType ? getNodeText(returnType, source) + ' ' + paramsText : paramsText;
+  },
+  getVisibility: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'modifiers') {
+        const text = child.text;
+        if (text.includes('public')) return 'public';
+        if (text.includes('private')) return 'private';
+        if (text.includes('protected')) return 'protected';
+      }
+    }
+    return undefined;
+  },
+  isStatic: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'modifiers' && child.text.includes('static')) {
+        return true;
+      }
+    }
+    return false;
+  },
+  extractImport: (node, source) => {
+    const importText = source.substring(node.startIndex, node.endIndex).trim();
+    const scopedId = node.namedChildren.find((c: SyntaxNode) => c.type === 'scoped_identifier');
+    if (scopedId) {
+      const moduleName = source.substring(scopedId.startIndex, scopedId.endIndex);
+      return { moduleName, signature: importText };
+    }
+    return null;
+  },
+};

+ 56 - 0
src/extraction/languages/javascript.ts

@@ -0,0 +1,56 @@
+import { getNodeText, getChildByField } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const javascriptExtractor: LanguageExtractor = {
+  functionTypes: ['function_declaration', 'arrow_function', 'function_expression'],
+  classTypes: ['class_declaration'],
+  methodTypes: ['method_definition', 'field_definition'],
+  interfaceTypes: [],
+  structTypes: [],
+  enumTypes: [],
+  typeAliasTypes: [],
+  importTypes: ['import_statement'],
+  callTypes: ['call_expression'],
+  variableTypes: ['lexical_declaration', 'variable_declaration'],
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'parameters',
+  getSignature: (node, source) => {
+    const params = getChildByField(node, 'parameters');
+    return params ? getNodeText(params, source) : undefined;
+  },
+  isExported: (node, _source) => {
+    let current = node.parent;
+    while (current) {
+      if (current.type === 'export_statement') return true;
+      current = current.parent;
+    }
+    return false;
+  },
+  isAsync: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'async') return true;
+    }
+    return false;
+  },
+  isConst: (node) => {
+    if (node.type === 'lexical_declaration') {
+      for (let i = 0; i < node.childCount; i++) {
+        const child = node.child(i);
+        if (child?.type === 'const') return true;
+      }
+    }
+    return false;
+  },
+  extractImport: (node, source) => {
+    const sourceField = node.childForFieldName('source');
+    if (sourceField) {
+      const moduleName = source.substring(sourceField.startIndex, sourceField.endIndex).replace(/['"]/g, '');
+      if (moduleName) {
+        return { moduleName, signature: source.substring(node.startIndex, node.endIndex).trim() };
+      }
+    }
+    return null;
+  },
+};

+ 68 - 0
src/extraction/languages/kotlin.ts

@@ -0,0 +1,68 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText, getChildByField } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const kotlinExtractor: LanguageExtractor = {
+  functionTypes: ['function_declaration'],
+  classTypes: ['class_declaration'],
+  methodTypes: ['function_declaration'], // Methods are functions inside classes
+  interfaceTypes: ['class_declaration'], // Interfaces use class_declaration with 'interface' modifier
+  structTypes: [], // Kotlin uses data classes
+  enumTypes: ['class_declaration'], // Enums use class_declaration with 'enum' modifier
+  typeAliasTypes: ['type_alias'],
+  importTypes: ['import_header'],
+  callTypes: ['call_expression'],
+  variableTypes: ['property_declaration'],
+  nameField: 'simple_identifier',
+  bodyField: 'function_body',
+  paramsField: 'function_value_parameters',
+  returnField: 'type',
+  getSignature: (node, source) => {
+    // Kotlin function signature: fun name(params): ReturnType
+    const params = getChildByField(node, 'function_value_parameters');
+    const returnType = getChildByField(node, 'type');
+    if (!params) return undefined;
+    let sig = getNodeText(params, source);
+    if (returnType) {
+      sig += ': ' + getNodeText(returnType, source);
+    }
+    return sig;
+  },
+  getVisibility: (node) => {
+    // Check for visibility modifiers in Kotlin
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'modifiers') {
+        const text = child.text;
+        if (text.includes('public')) return 'public';
+        if (text.includes('private')) return 'private';
+        if (text.includes('protected')) return 'protected';
+        if (text.includes('internal')) return 'internal';
+      }
+    }
+    return 'public'; // Kotlin defaults to public
+  },
+  isStatic: (_node) => {
+    // Kotlin doesn't have static, uses companion objects
+    // Check if inside companion object would require more context
+    return false;
+  },
+  isAsync: (node) => {
+    // Kotlin uses suspend keyword for coroutines
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'modifiers' && child.text.includes('suspend')) {
+        return true;
+      }
+    }
+    return false;
+  },
+  extractImport: (node, source) => {
+    const importText = source.substring(node.startIndex, node.endIndex).trim();
+    const identifier = node.namedChildren.find((c: SyntaxNode) => c.type === 'identifier');
+    if (identifier) {
+      return { moduleName: source.substring(identifier.startIndex, identifier.endIndex), signature: importText };
+    }
+    return null;
+  },
+};

+ 62 - 0
src/extraction/languages/pascal.ts

@@ -0,0 +1,62 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText, getChildByField } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const pascalExtractor: LanguageExtractor = {
+  functionTypes: ['declProc'],
+  classTypes: ['declClass'],
+  methodTypes: ['declProc'],
+  interfaceTypes: ['declIntf'],
+  structTypes: [],
+  enumTypes: ['declEnum'],
+  typeAliasTypes: ['declType'],
+  importTypes: ['declUses'],
+  callTypes: ['exprCall'],
+  variableTypes: ['declField', 'declConst'],
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'args',
+  returnField: 'type',
+  getSignature: (node, source) => {
+    const args = getChildByField(node, 'args');
+    const returnType = node.namedChildren.find(
+      (c: SyntaxNode) => c.type === 'typeref'
+    );
+    if (!args && !returnType) return undefined;
+    let sig = '';
+    if (args) sig = getNodeText(args, source);
+    if (returnType) {
+      sig += ': ' + getNodeText(returnType, source);
+    }
+    return sig || undefined;
+  },
+  getVisibility: (node) => {
+    let current = node.parent;
+    while (current) {
+      if (current.type === 'declSection') {
+        for (let i = 0; i < current.childCount; i++) {
+          const child = current.child(i);
+          if (child?.type === 'kPublic' || child?.type === 'kPublished')
+            return 'public';
+          if (child?.type === 'kPrivate') return 'private';
+          if (child?.type === 'kProtected') return 'protected';
+        }
+      }
+      current = current.parent;
+    }
+    return undefined;
+  },
+  isExported: (_node, _source) => {
+    // In Pascal, symbols declared in the interface section are exported
+    return false;
+  },
+  isStatic: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      if (node.child(i)?.type === 'kClass') return true;
+    }
+    return false;
+  },
+  isConst: (node) => {
+    return node.type === 'declConst';
+  },
+};

+ 63 - 0
src/extraction/languages/php.ts

@@ -0,0 +1,63 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const phpExtractor: LanguageExtractor = {
+  functionTypes: ['function_definition'],
+  classTypes: ['class_declaration'],
+  methodTypes: ['method_declaration'],
+  interfaceTypes: ['interface_declaration'],
+  structTypes: [],
+  enumTypes: ['enum_declaration'],
+  typeAliasTypes: [],
+  importTypes: ['namespace_use_declaration'],
+  callTypes: ['function_call_expression', 'member_call_expression', 'scoped_call_expression'],
+  variableTypes: ['property_declaration', 'const_declaration'],
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'parameters',
+  returnField: 'return_type',
+  getVisibility: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'visibility_modifier') {
+        const text = child.text;
+        if (text === 'public') return 'public';
+        if (text === 'private') return 'private';
+        if (text === 'protected') return 'protected';
+      }
+    }
+    return 'public'; // PHP defaults to public
+  },
+  isStatic: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'static_modifier') return true;
+    }
+    return false;
+  },
+  extractImport: (node, source) => {
+    const importText = source.substring(node.startIndex, node.endIndex).trim();
+
+    // Check for grouped imports: use X\{A, B} - return null for core fallback
+    const namespacePrefix = node.namedChildren.find((c: SyntaxNode) => c.type === 'namespace_name');
+    const useGroup = node.namedChildren.find((c: SyntaxNode) => c.type === 'namespace_use_group');
+    if (namespacePrefix && useGroup) {
+      return null; // Grouped imports create multiple nodes - let core handle
+    }
+
+    // Single import - find namespace_use_clause
+    const useClause = node.namedChildren.find((c: SyntaxNode) => c.type === 'namespace_use_clause');
+    if (useClause) {
+      const qualifiedName = useClause.namedChildren.find((c: SyntaxNode) => c.type === 'qualified_name');
+      if (qualifiedName) {
+        return { moduleName: getNodeText(qualifiedName, source), signature: importText };
+      }
+      const name = useClause.namedChildren.find((c: SyntaxNode) => c.type === 'name');
+      if (name) {
+        return { moduleName: getNodeText(name, source), signature: importText };
+      }
+    }
+    return null;
+  },
+};

+ 53 - 0
src/extraction/languages/python.ts

@@ -0,0 +1,53 @@
+import { getNodeText, getChildByField } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const pythonExtractor: LanguageExtractor = {
+  functionTypes: ['function_definition'],
+  classTypes: ['class_definition'],
+  methodTypes: ['function_definition'], // Methods are functions inside classes
+  interfaceTypes: [],
+  structTypes: [],
+  enumTypes: [],
+  typeAliasTypes: [],
+  importTypes: ['import_statement', 'import_from_statement'],
+  callTypes: ['call'],
+  variableTypes: ['assignment'], // Python uses assignment for variable declarations
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'parameters',
+  returnField: 'return_type',
+  getSignature: (node, source) => {
+    const params = getChildByField(node, 'parameters');
+    const returnType = getChildByField(node, 'return_type');
+    if (!params) return undefined;
+    let sig = getNodeText(params, source);
+    if (returnType) {
+      sig += ' -> ' + getNodeText(returnType, source);
+    }
+    return sig;
+  },
+  isAsync: (node) => {
+    const prev = node.previousSibling;
+    return prev?.type === 'async';
+  },
+  isStatic: (node) => {
+    // Check for @staticmethod decorator
+    const prev = node.previousNamedSibling;
+    if (prev?.type === 'decorator') {
+      const text = prev.text;
+      return text.includes('staticmethod');
+    }
+    return false;
+  },
+  extractImport: (node, source) => {
+    const importText = source.substring(node.startIndex, node.endIndex).trim();
+    if (node.type === 'import_from_statement') {
+      const moduleNode = node.childForFieldName('module_name');
+      if (moduleNode) {
+        return { moduleName: source.substring(moduleNode.startIndex, moduleNode.endIndex), signature: importText };
+      }
+    }
+    // import_statement creates multiple imports - return null for core fallback
+    return null;
+  },
+};

+ 60 - 0
src/extraction/languages/ruby.ts

@@ -0,0 +1,60 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText, getChildByField } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const rubyExtractor: LanguageExtractor = {
+  functionTypes: ['method'],
+  classTypes: ['class'],
+  methodTypes: ['method', 'singleton_method'],
+  interfaceTypes: [], // Ruby uses modules
+  structTypes: [],
+  enumTypes: [],
+  typeAliasTypes: [],
+  importTypes: ['call'], // require/require_relative
+  callTypes: ['call', 'method_call'],
+  variableTypes: ['assignment'], // Ruby uses assignment like Python
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'parameters',
+  getVisibility: (node) => {
+    // Ruby visibility is based on preceding visibility modifiers
+    let sibling = node.previousNamedSibling;
+    while (sibling) {
+      if (sibling.type === 'call') {
+        const methodName = getChildByField(sibling, 'method');
+        if (methodName) {
+          const text = methodName.text;
+          if (text === 'private') return 'private';
+          if (text === 'protected') return 'protected';
+          if (text === 'public') return 'public';
+        }
+      }
+      sibling = sibling.previousNamedSibling;
+    }
+    return 'public';
+  },
+  extractImport: (node, source) => {
+    const importText = source.substring(node.startIndex, node.endIndex).trim();
+
+    // Check if this is a require/require_relative call
+    const identifier = node.namedChildren.find((c: SyntaxNode) => c.type === 'identifier');
+    if (!identifier) return null;
+    const methodName = getNodeText(identifier, source);
+    if (methodName !== 'require' && methodName !== 'require_relative') {
+      return null; // Not an import, skip
+    }
+
+    // Find the argument (string)
+    const argList = node.namedChildren.find((c: SyntaxNode) => c.type === 'argument_list');
+    if (argList) {
+      const stringNode = argList.namedChildren.find((c: SyntaxNode) => c.type === 'string');
+      if (stringNode) {
+        const stringContent = stringNode.namedChildren.find((c: SyntaxNode) => c.type === 'string_content');
+        if (stringContent) {
+          return { moduleName: getNodeText(stringContent, source), signature: importText };
+        }
+      }
+    }
+    return null;
+  },
+};

+ 78 - 0
src/extraction/languages/rust.ts

@@ -0,0 +1,78 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText, getChildByField } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const rustExtractor: LanguageExtractor = {
+  functionTypes: ['function_item'],
+  classTypes: [], // Rust has impl blocks
+  methodTypes: ['function_item'], // Methods are functions in impl blocks
+  interfaceTypes: ['trait_item'],
+  structTypes: ['struct_item'],
+  enumTypes: ['enum_item'],
+  typeAliasTypes: ['type_item'], // Rust type aliases
+  importTypes: ['use_declaration'],
+  callTypes: ['call_expression'],
+  variableTypes: ['let_declaration', 'const_item', 'static_item'],
+  interfaceKind: 'trait',
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'parameters',
+  returnField: 'return_type',
+  getSignature: (node, source) => {
+    const params = getChildByField(node, 'parameters');
+    const returnType = getChildByField(node, 'return_type');
+    if (!params) return undefined;
+    let sig = getNodeText(params, source);
+    if (returnType) {
+      sig += ' -> ' + getNodeText(returnType, source);
+    }
+    return sig;
+  },
+  isAsync: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'async') return true;
+    }
+    return false;
+  },
+  getVisibility: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'visibility_modifier') {
+        return child.text.includes('pub') ? 'public' : 'private';
+      }
+    }
+    return 'private'; // Rust defaults to private
+  },
+  extractImport: (node, source) => {
+    const importText = source.substring(node.startIndex, node.endIndex).trim();
+
+    // Helper to get the root crate/module from a scoped path
+    const getRootModule = (scopedNode: SyntaxNode): string => {
+      const firstChild = scopedNode.namedChild(0);
+      if (!firstChild) return source.substring(scopedNode.startIndex, scopedNode.endIndex);
+      if (firstChild.type === 'identifier' ||
+          firstChild.type === 'crate' ||
+          firstChild.type === 'super' ||
+          firstChild.type === 'self') {
+        return source.substring(firstChild.startIndex, firstChild.endIndex);
+      } else if (firstChild.type === 'scoped_identifier') {
+        return getRootModule(firstChild);
+      }
+      return source.substring(firstChild.startIndex, firstChild.endIndex);
+    };
+
+    // Find the use argument (scoped_use_list or scoped_identifier)
+    const useArg = node.namedChildren.find((c: SyntaxNode) =>
+      c.type === 'scoped_use_list' ||
+      c.type === 'scoped_identifier' ||
+      c.type === 'use_list' ||
+      c.type === 'identifier'
+    );
+
+    if (useArg) {
+      return { moduleName: getRootModule(useArg), signature: importText };
+    }
+    return null;
+  },
+};

+ 82 - 0
src/extraction/languages/swift.ts

@@ -0,0 +1,82 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText, getChildByField } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const swiftExtractor: LanguageExtractor = {
+  functionTypes: ['function_declaration'],
+  classTypes: ['class_declaration'],
+  methodTypes: ['function_declaration'], // Methods are functions inside classes
+  interfaceTypes: ['protocol_declaration'],
+  structTypes: ['struct_declaration'],
+  enumTypes: ['enum_declaration'],
+  typeAliasTypes: ['typealias_declaration'],
+  importTypes: ['import_declaration'],
+  callTypes: ['call_expression'],
+  variableTypes: ['property_declaration', 'constant_declaration'],
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'parameter',
+  returnField: 'return_type',
+  getSignature: (node, source) => {
+    // Swift function signature: func name(params) -> ReturnType
+    const params = getChildByField(node, 'parameter');
+    const returnType = getChildByField(node, 'return_type');
+    if (!params) return undefined;
+    let sig = getNodeText(params, source);
+    if (returnType) {
+      sig += ' -> ' + getNodeText(returnType, source);
+    }
+    return sig;
+  },
+  getVisibility: (node) => {
+    // Check for visibility modifiers in Swift
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'modifiers') {
+        const text = child.text;
+        if (text.includes('public')) return 'public';
+        if (text.includes('private')) return 'private';
+        if (text.includes('internal')) return 'internal';
+        if (text.includes('fileprivate')) return 'private';
+      }
+    }
+    return 'internal'; // Swift defaults to internal
+  },
+  isStatic: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'modifiers') {
+        if (child.text.includes('static') || child.text.includes('class')) {
+          return true;
+        }
+      }
+    }
+    return false;
+  },
+  classifyClassNode: (node) => {
+    // Swift uses class_declaration for classes, structs, and enums
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'struct') return 'struct';
+      if (child?.type === 'enum') return 'enum';
+    }
+    return 'class';
+  },
+  isAsync: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'modifiers' && child.text.includes('async')) {
+        return true;
+      }
+    }
+    return false;
+  },
+  extractImport: (node, source) => {
+    const importText = source.substring(node.startIndex, node.endIndex).trim();
+    const identifier = node.namedChildren.find((c: SyntaxNode) => c.type === 'identifier');
+    if (identifier) {
+      return { moduleName: source.substring(identifier.startIndex, identifier.endIndex), signature: importText };
+    }
+    return null;
+  },
+};

+ 88 - 0
src/extraction/languages/typescript.ts

@@ -0,0 +1,88 @@
+import { getNodeText, getChildByField } from '../tree-sitter-helpers';
+import type { LanguageExtractor } from '../tree-sitter-types';
+
+export const typescriptExtractor: LanguageExtractor = {
+  functionTypes: ['function_declaration', 'arrow_function', 'function_expression'],
+  classTypes: ['class_declaration'],
+  methodTypes: ['method_definition', 'public_field_definition'],
+  interfaceTypes: ['interface_declaration'],
+  structTypes: [],
+  enumTypes: ['enum_declaration'],
+  typeAliasTypes: ['type_alias_declaration'],
+  importTypes: ['import_statement'],
+  callTypes: ['call_expression'],
+  variableTypes: ['lexical_declaration', 'variable_declaration'],
+  nameField: 'name',
+  bodyField: 'body',
+  paramsField: 'parameters',
+  returnField: 'return_type',
+  getSignature: (node, source) => {
+    const params = getChildByField(node, 'parameters');
+    const returnType = getChildByField(node, 'return_type');
+    if (!params) return undefined;
+    let sig = getNodeText(params, source);
+    if (returnType) {
+      sig += ': ' + getNodeText(returnType, source).replace(/^:\s*/, '');
+    }
+    return sig;
+  },
+  getVisibility: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'accessibility_modifier') {
+        const text = child.text;
+        if (text === 'public') return 'public';
+        if (text === 'private') return 'private';
+        if (text === 'protected') return 'protected';
+      }
+    }
+    return undefined;
+  },
+  isExported: (node, _source) => {
+    // Walk the parent chain to find an export_statement ancestor.
+    // This correctly handles deeply nested nodes like arrow functions
+    // inside variable declarations: `export const X = () => { ... }`
+    // where the arrow_function is 3 levels deep under export_statement.
+    let current = node.parent;
+    while (current) {
+      if (current.type === 'export_statement') return true;
+      current = current.parent;
+    }
+    return false;
+  },
+  isAsync: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'async') return true;
+    }
+    return false;
+  },
+  isStatic: (node) => {
+    for (let i = 0; i < node.childCount; i++) {
+      const child = node.child(i);
+      if (child?.type === 'static') return true;
+    }
+    return false;
+  },
+  isConst: (node) => {
+    // For lexical_declaration, check if it's 'const' or 'let'
+    // For variable_declaration, it's always 'var'
+    if (node.type === 'lexical_declaration') {
+      for (let i = 0; i < node.childCount; i++) {
+        const child = node.child(i);
+        if (child?.type === 'const') return true;
+      }
+    }
+    return false;
+  },
+  extractImport: (node, source) => {
+    const sourceField = node.childForFieldName('source');
+    if (sourceField) {
+      const moduleName = source.substring(sourceField.startIndex, sourceField.endIndex).replace(/['"]/g, '');
+      if (moduleName) {
+        return { moduleName, signature: source.substring(node.startIndex, node.endIndex).trim() };
+      }
+    }
+    return null;
+  },
+};

+ 348 - 0
src/extraction/liquid-extractor.ts

@@ -0,0 +1,348 @@
+import { Node, Edge, ExtractionResult, ExtractionError, UnresolvedReference } from '../types';
+import { generateNodeId } from './tree-sitter-helpers';
+
+/**
+ * LiquidExtractor - Extracts relationships from Liquid template files
+ *
+ * Liquid is a templating language (used by Shopify, Jekyll, etc.) that doesn't
+ * have traditional functions or classes. Instead, we extract:
+ * - Section references ({% section 'name' %})
+ * - Snippet references ({% render 'name' %} and {% include 'name' %})
+ * - Schema blocks ({% schema %}...{% endschema %})
+ */
+export class LiquidExtractor {
+  private filePath: string;
+  private source: string;
+  private nodes: Node[] = [];
+  private edges: Edge[] = [];
+  private unresolvedReferences: UnresolvedReference[] = [];
+  private errors: ExtractionError[] = [];
+
+  constructor(filePath: string, source: string) {
+    this.filePath = filePath;
+    this.source = source;
+  }
+
+  /**
+   * Extract from Liquid source
+   */
+  extract(): ExtractionResult {
+    const startTime = Date.now();
+
+    try {
+      // Create file node
+      const fileNode = this.createFileNode();
+
+      // Extract render/include statements (snippet references)
+      this.extractSnippetReferences(fileNode.id);
+
+      // Extract section references
+      this.extractSectionReferences(fileNode.id);
+
+      // Extract schema block
+      this.extractSchema(fileNode.id);
+
+      // Extract assign statements as variables
+      this.extractAssignments(fileNode.id);
+    } catch (error) {
+      this.errors.push({
+        message: `Liquid extraction error: ${error instanceof Error ? error.message : String(error)}`,
+        severity: 'error',
+      });
+    }
+
+    return {
+      nodes: this.nodes,
+      edges: this.edges,
+      unresolvedReferences: this.unresolvedReferences,
+      errors: this.errors,
+      durationMs: Date.now() - startTime,
+    };
+  }
+
+  /**
+   * Create a file node for the Liquid template
+   */
+  private createFileNode(): Node {
+    const lines = this.source.split('\n');
+    const id = generateNodeId(this.filePath, 'file', this.filePath, 1);
+
+    const fileNode: Node = {
+      id,
+      kind: 'file',
+      name: this.filePath.split('/').pop() || this.filePath,
+      qualifiedName: this.filePath,
+      filePath: this.filePath,
+      language: 'liquid',
+      startLine: 1,
+      endLine: lines.length,
+      startColumn: 0,
+      endColumn: lines[lines.length - 1]?.length || 0,
+      updatedAt: Date.now(),
+    };
+
+    this.nodes.push(fileNode);
+    return fileNode;
+  }
+
+  /**
+   * Extract {% render 'snippet' %} and {% include 'snippet' %} references
+   */
+  private extractSnippetReferences(fileNodeId: string): void {
+    // Match {% render 'name' %} or {% include 'name' %} with optional parameters
+    const renderRegex = /\{%[-]?\s*(render|include)\s+['"]([^'"]+)['"]/g;
+    let match;
+
+    while ((match = renderRegex.exec(this.source)) !== null) {
+      const [fullMatch, tagType, snippetName] = match;
+      const line = this.getLineNumber(match.index);
+
+      // Create an import node for searchability
+      const importNodeId = generateNodeId(this.filePath, 'import', snippetName!, line);
+      const importNode: Node = {
+        id: importNodeId,
+        kind: 'import',
+        name: snippetName!,
+        qualifiedName: `${this.filePath}::import:${snippetName}`,
+        filePath: this.filePath,
+        language: 'liquid',
+        signature: fullMatch,
+        startLine: line,
+        endLine: line,
+        startColumn: match.index - this.getLineStart(line),
+        endColumn: match.index - this.getLineStart(line) + fullMatch.length,
+        updatedAt: Date.now(),
+      };
+      this.nodes.push(importNode);
+
+      // Add containment edge from file to import
+      this.edges.push({
+        source: fileNodeId,
+        target: importNodeId,
+        kind: 'contains',
+      });
+
+      // Create a component node for the snippet reference
+      const nodeId = generateNodeId(this.filePath, 'component', `${tagType}:${snippetName}`, line);
+
+      const node: Node = {
+        id: nodeId,
+        kind: 'component',
+        name: snippetName!,
+        qualifiedName: `${this.filePath}::${tagType}:${snippetName}`,
+        filePath: this.filePath,
+        language: 'liquid',
+        startLine: line,
+        endLine: line,
+        startColumn: match.index - this.getLineStart(line),
+        endColumn: match.index - this.getLineStart(line) + fullMatch.length,
+        updatedAt: Date.now(),
+      };
+
+      this.nodes.push(node);
+
+      // Add containment edge from file
+      this.edges.push({
+        source: fileNodeId,
+        target: nodeId,
+        kind: 'contains',
+      });
+
+      // Add unresolved reference to the snippet file
+      this.unresolvedReferences.push({
+        fromNodeId: fileNodeId,
+        referenceName: `snippets/${snippetName}.liquid`,
+        referenceKind: 'references',
+        line,
+        column: match.index - this.getLineStart(line),
+      });
+    }
+  }
+
+  /**
+   * Extract {% section 'name' %} references
+   */
+  private extractSectionReferences(fileNodeId: string): void {
+    // Match {% section 'name' %}
+    const sectionRegex = /\{%[-]?\s*section\s+['"]([^'"]+)['"]/g;
+    let match;
+
+    while ((match = sectionRegex.exec(this.source)) !== null) {
+      const [fullMatch, sectionName] = match;
+      const line = this.getLineNumber(match.index);
+
+      // Create an import node for searchability
+      const importNodeId = generateNodeId(this.filePath, 'import', sectionName!, line);
+      const importNode: Node = {
+        id: importNodeId,
+        kind: 'import',
+        name: sectionName!,
+        qualifiedName: `${this.filePath}::import:${sectionName}`,
+        filePath: this.filePath,
+        language: 'liquid',
+        signature: fullMatch,
+        startLine: line,
+        endLine: line,
+        startColumn: match.index - this.getLineStart(line),
+        endColumn: match.index - this.getLineStart(line) + fullMatch.length,
+        updatedAt: Date.now(),
+      };
+      this.nodes.push(importNode);
+
+      // Add containment edge from file to import
+      this.edges.push({
+        source: fileNodeId,
+        target: importNodeId,
+        kind: 'contains',
+      });
+
+      // Create a component node for the section reference
+      const nodeId = generateNodeId(this.filePath, 'component', `section:${sectionName}`, line);
+
+      const node: Node = {
+        id: nodeId,
+        kind: 'component',
+        name: sectionName!,
+        qualifiedName: `${this.filePath}::section:${sectionName}`,
+        filePath: this.filePath,
+        language: 'liquid',
+        startLine: line,
+        endLine: line,
+        startColumn: match.index - this.getLineStart(line),
+        endColumn: match.index - this.getLineStart(line) + fullMatch.length,
+        updatedAt: Date.now(),
+      };
+
+      this.nodes.push(node);
+
+      // Add containment edge from file
+      this.edges.push({
+        source: fileNodeId,
+        target: nodeId,
+        kind: 'contains',
+      });
+
+      // Add unresolved reference to the section file
+      this.unresolvedReferences.push({
+        fromNodeId: fileNodeId,
+        referenceName: `sections/${sectionName}.liquid`,
+        referenceKind: 'references',
+        line,
+        column: match.index - this.getLineStart(line),
+      });
+    }
+  }
+
+  /**
+   * Extract {% schema %}...{% endschema %} blocks
+   */
+  private extractSchema(fileNodeId: string): void {
+    // Match {% schema %}...{% endschema %}
+    const schemaRegex = /\{%[-]?\s*schema\s*[-]?%\}([\s\S]*?)\{%[-]?\s*endschema\s*[-]?%\}/g;
+    let match;
+
+    while ((match = schemaRegex.exec(this.source)) !== null) {
+      const [fullMatch, schemaContent] = match;
+      const startLine = this.getLineNumber(match.index);
+      const endLine = this.getLineNumber(match.index + fullMatch.length);
+
+      // Try to parse the schema JSON to get the name
+      let schemaName = 'schema';
+      try {
+        const schemaJson = JSON.parse(schemaContent!);
+        if (schemaJson.name) {
+          schemaName = schemaJson.name;
+        }
+      } catch {
+        // Schema isn't valid JSON, use default name
+      }
+
+      // Create a node for the schema
+      const nodeId = generateNodeId(this.filePath, 'constant', `schema:${schemaName}`, startLine);
+
+      const node: Node = {
+        id: nodeId,
+        kind: 'constant',
+        name: schemaName,
+        qualifiedName: `${this.filePath}::schema:${schemaName}`,
+        filePath: this.filePath,
+        language: 'liquid',
+        startLine,
+        endLine,
+        startColumn: match.index - this.getLineStart(startLine),
+        endColumn: 0,
+        docstring: schemaContent?.trim().substring(0, 200), // Store first 200 chars as docstring
+        updatedAt: Date.now(),
+      };
+
+      this.nodes.push(node);
+
+      // Add containment edge from file
+      this.edges.push({
+        source: fileNodeId,
+        target: nodeId,
+        kind: 'contains',
+      });
+    }
+  }
+
+  /**
+   * Extract {% assign var = value %} statements
+   */
+  private extractAssignments(fileNodeId: string): void {
+    // Match {% assign variable_name = ... %}
+    const assignRegex = /\{%[-]?\s*assign\s+(\w+)\s*=/g;
+    let match;
+
+    while ((match = assignRegex.exec(this.source)) !== null) {
+      const [, variableName] = match;
+      const line = this.getLineNumber(match.index);
+
+      // Create a variable node
+      const nodeId = generateNodeId(this.filePath, 'variable', variableName!, line);
+
+      const node: Node = {
+        id: nodeId,
+        kind: 'variable',
+        name: variableName!,
+        qualifiedName: `${this.filePath}::${variableName}`,
+        filePath: this.filePath,
+        language: 'liquid',
+        startLine: line,
+        endLine: line,
+        startColumn: match.index - this.getLineStart(line),
+        endColumn: match.index - this.getLineStart(line) + match[0].length,
+        updatedAt: Date.now(),
+      };
+
+      this.nodes.push(node);
+
+      // Add containment edge from file
+      this.edges.push({
+        source: fileNodeId,
+        target: nodeId,
+        kind: 'contains',
+      });
+    }
+  }
+
+  /**
+   * Get the line number for a character index
+   */
+  private getLineNumber(index: number): number {
+    const substring = this.source.substring(0, index);
+    return (substring.match(/\n/g) || []).length + 1;
+  }
+
+  /**
+   * Get the character index of the start of a line
+   */
+  private getLineStart(lineNumber: number): number {
+    const lines = this.source.split('\n');
+    let index = 0;
+    for (let i = 0; i < lineNumber - 1 && i < lines.length; i++) {
+      index += lines[i]!.length + 1; // +1 for newline
+    }
+    return index;
+  }
+}

+ 198 - 0
src/extraction/svelte-extractor.ts

@@ -0,0 +1,198 @@
+import { Node, Edge, ExtractionResult, ExtractionError, UnresolvedReference, Language } from '../types';
+import { generateNodeId } from './tree-sitter-helpers';
+import { TreeSitterExtractor } from './tree-sitter';
+import { isLanguageSupported } from './grammars';
+
+/**
+ * SvelteExtractor - Extracts code relationships from Svelte component files
+ *
+ * Svelte files are multi-language (script + template + style). Rather than
+ * parsing the full Svelte grammar, we extract the <script> block content
+ * and delegate it to the TypeScript/JavaScript TreeSitterExtractor.
+ *
+ * Every .svelte file produces a component node (Svelte components are always importable).
+ */
+export class SvelteExtractor {
+  private filePath: string;
+  private source: string;
+  private nodes: Node[] = [];
+  private edges: Edge[] = [];
+  private unresolvedReferences: UnresolvedReference[] = [];
+  private errors: ExtractionError[] = [];
+
+  constructor(filePath: string, source: string) {
+    this.filePath = filePath;
+    this.source = source;
+  }
+
+  /**
+   * Extract from Svelte source
+   */
+  extract(): ExtractionResult {
+    const startTime = Date.now();
+
+    try {
+      // Create component node for the .svelte file itself
+      const componentNode = this.createComponentNode();
+
+      // Extract and process script blocks
+      const scriptBlocks = this.extractScriptBlocks();
+
+      for (const block of scriptBlocks) {
+        this.processScriptBlock(block, componentNode.id);
+      }
+    } catch (error) {
+      this.errors.push({
+        message: `Svelte extraction error: ${error instanceof Error ? error.message : String(error)}`,
+        severity: 'error',
+      });
+    }
+
+    return {
+      nodes: this.nodes,
+      edges: this.edges,
+      unresolvedReferences: this.unresolvedReferences,
+      errors: this.errors,
+      durationMs: Date.now() - startTime,
+    };
+  }
+
+  /**
+   * Create a component node for the .svelte file
+   */
+  private createComponentNode(): Node {
+    const lines = this.source.split('\n');
+    const fileName = this.filePath.split(/[/\\]/).pop() || this.filePath;
+    const componentName = fileName.replace(/\.svelte$/, '');
+    const id = generateNodeId(this.filePath, 'component', componentName, 1);
+
+    const node: Node = {
+      id,
+      kind: 'component',
+      name: componentName,
+      qualifiedName: `${this.filePath}::${componentName}`,
+      filePath: this.filePath,
+      language: 'svelte',
+      startLine: 1,
+      endLine: lines.length,
+      startColumn: 0,
+      endColumn: lines[lines.length - 1]?.length || 0,
+      isExported: true, // Svelte components are always importable
+      updatedAt: Date.now(),
+    };
+
+    this.nodes.push(node);
+    return node;
+  }
+
+  /**
+   * Extract <script> blocks from the Svelte source
+   */
+  private extractScriptBlocks(): Array<{
+    content: string;
+    startLine: number;
+    isModule: boolean;
+    isTypeScript: boolean;
+  }> {
+    const blocks: Array<{
+      content: string;
+      startLine: number;
+      isModule: boolean;
+      isTypeScript: boolean;
+    }> = [];
+
+    const scriptRegex = /<script(\s[^>]*)?>(?<content>[\s\S]*?)<\/script>/g;
+    let match;
+
+    while ((match = scriptRegex.exec(this.source)) !== null) {
+      const attrs = match[1] || '';
+      const content = match.groups?.content || match[2] || '';
+
+      // Detect TypeScript from lang attribute
+      const isTypeScript = /lang\s*=\s*["'](ts|typescript)["']/.test(attrs);
+
+      // Detect module script
+      const isModule = /context\s*=\s*["']module["']/.test(attrs);
+
+      // Calculate start line of the script content (line after <script>)
+      const beforeScript = this.source.substring(0, match.index);
+      const scriptTagLine = (beforeScript.match(/\n/g) || []).length;
+      // The content starts on the line after the opening <script> tag
+      const openingTag = match[0].substring(0, match[0].indexOf('>') + 1);
+      const openingTagLines = (openingTag.match(/\n/g) || []).length;
+      const contentStartLine = scriptTagLine + openingTagLines + 1; // 0-indexed line
+
+      blocks.push({
+        content,
+        startLine: contentStartLine,
+        isModule,
+        isTypeScript,
+      });
+    }
+
+    return blocks;
+  }
+
+  /**
+   * Process a script block by delegating to TreeSitterExtractor
+   */
+  private processScriptBlock(
+    block: { content: string; startLine: number; isModule: boolean; isTypeScript: boolean },
+    componentNodeId: string
+  ): void {
+    const scriptLanguage: Language = block.isTypeScript ? 'typescript' : 'javascript';
+
+    // Check if the script language parser is available
+    if (!isLanguageSupported(scriptLanguage)) {
+      this.errors.push({
+        message: `Parser for ${scriptLanguage} not available, cannot parse Svelte script block`,
+        severity: 'warning',
+      });
+      return;
+    }
+
+    // Delegate to TreeSitterExtractor
+    const extractor = new TreeSitterExtractor(this.filePath, block.content, scriptLanguage);
+    const result = extractor.extract();
+
+    // Offset line numbers from script block back to .svelte file positions
+    for (const node of result.nodes) {
+      node.startLine += block.startLine;
+      node.endLine += block.startLine;
+      node.language = 'svelte'; // Mark as svelte, not TS/JS
+
+      this.nodes.push(node);
+
+      // Add containment edge from component to this node
+      this.edges.push({
+        source: componentNodeId,
+        target: node.id,
+        kind: 'contains',
+      });
+    }
+
+    // Offset edges (they reference line numbers)
+    for (const edge of result.edges) {
+      if (edge.line) {
+        edge.line += block.startLine;
+      }
+      this.edges.push(edge);
+    }
+
+    // Offset unresolved references
+    for (const ref of result.unresolvedReferences) {
+      ref.line += block.startLine;
+      ref.filePath = this.filePath;
+      ref.language = 'svelte';
+      this.unresolvedReferences.push(ref);
+    }
+
+    // Carry over errors
+    for (const error of result.errors) {
+      if (error.line) {
+        error.line += block.startLine;
+      }
+      this.errors.push(error);
+    }
+  }
+}

+ 80 - 0
src/extraction/tree-sitter-helpers.ts

@@ -0,0 +1,80 @@
+/**
+ * Tree-sitter Shared Helpers
+ *
+ * Utility functions used by the core TreeSitterExtractor and per-language extractors.
+ * Extracted to a leaf module to avoid circular imports between tree-sitter.ts and languages/.
+ */
+
+import { Node as SyntaxNode } from 'web-tree-sitter';
+import * as crypto from 'crypto';
+import { NodeKind } from '../types';
+
+/**
+ * Generate a unique node ID
+ *
+ * Uses a 32-character (128-bit) hash to avoid collisions when indexing
+ * large codebases with many files containing similar symbols.
+ */
+export function generateNodeId(
+  filePath: string,
+  kind: NodeKind,
+  name: string,
+  line: number
+): string {
+  const hash = crypto
+    .createHash('sha256')
+    .update(`${filePath}:${kind}:${name}:${line}`)
+    .digest('hex')
+    .substring(0, 32);
+  return `${kind}:${hash}`;
+}
+
+/**
+ * Extract text from a syntax node
+ */
+export function getNodeText(node: SyntaxNode, source: string): string {
+  return source.substring(node.startIndex, node.endIndex);
+}
+
+/**
+ * Find a child node by field name
+ */
+export function getChildByField(node: SyntaxNode, fieldName: string): SyntaxNode | null {
+  return node.childForFieldName(fieldName);
+}
+
+/**
+ * Get the docstring/comment preceding a node
+ */
+export function getPrecedingDocstring(node: SyntaxNode, source: string): string | undefined {
+  let sibling = node.previousNamedSibling;
+  const comments: string[] = [];
+
+  while (sibling) {
+    if (
+      sibling.type === 'comment' ||
+      sibling.type === 'line_comment' ||
+      sibling.type === 'block_comment' ||
+      sibling.type === 'documentation_comment'
+    ) {
+      comments.unshift(getNodeText(sibling, source));
+      sibling = sibling.previousNamedSibling;
+    } else {
+      break;
+    }
+  }
+
+  if (comments.length === 0) return undefined;
+
+  // Clean up comment markers
+  return comments
+    .map((c) =>
+      c
+        .replace(/^\/\*\*?|\*\/$/g, '')
+        .replace(/^\/\/\s?/gm, '')
+        .replace(/^\s*\*\s?/gm, '')
+        .trim()
+    )
+    .join('\n')
+    .trim();
+}

+ 166 - 0
src/extraction/tree-sitter-types.ts

@@ -0,0 +1,166 @@
+/**
+ * Tree-sitter Extraction Types
+ *
+ * Defines the LanguageExtractor interface and related types used by
+ * the core TreeSitterExtractor and per-language extraction configs.
+ * Extracted to a leaf module to avoid circular imports.
+ */
+
+import { Node as SyntaxNode } from 'web-tree-sitter';
+import {
+  Node,
+  NodeKind,
+  UnresolvedReference,
+} from '../types';
+
+/**
+ * Information returned by a language's extractImport hook.
+ */
+export interface ImportInfo {
+  /** The module/package name being imported */
+  moduleName: string;
+  /** Full import statement text for display */
+  signature: string;
+  /** If true, the hook already created unresolved references itself */
+  handledRefs?: boolean;
+}
+
+/**
+ * Information about a single variable within a declaration.
+ * Returned by a language's extractVariables hook.
+ */
+export interface VariableInfo {
+  /** Variable name */
+  name: string;
+  /** Node kind: 'variable' or 'constant' */
+  kind: NodeKind;
+  /** Optional signature string */
+  signature?: string;
+  /** If set, this declarator is actually a function and should be extracted as such */
+  delegateToFunction?: SyntaxNode;
+  /** The AST node to use for positioning (may differ from the declaration node) */
+  positionNode?: SyntaxNode;
+}
+
+/**
+ * Context object passed to language hooks that need to call back into the core extractor.
+ * Provides a controlled API surface — hooks can create nodes, visit children, and add
+ * references without accessing the full TreeSitterExtractor internals.
+ */
+export interface ExtractorContext {
+  /** Create a node and add it to the extraction result */
+  createNode(kind: NodeKind, name: string, node: SyntaxNode, extra?: Partial<Node>): Node | null;
+  /** Visit a child node (dispatches through the standard visitNode logic) */
+  visitNode(node: SyntaxNode): void;
+  /** Visit a function body to extract calls */
+  visitFunctionBody(body: SyntaxNode, functionId: string): void;
+  /** Add an unresolved reference */
+  addUnresolvedReference(ref: UnresolvedReference): void;
+  /** Current file path */
+  readonly filePath: string;
+  /** Current source text */
+  readonly source: string;
+  /** Stack of parent node IDs (current scope) */
+  readonly nodeStack: readonly string[];
+  /** All nodes extracted so far */
+  readonly nodes: readonly Node[];
+}
+
+/**
+ * Language-specific extraction configuration.
+ *
+ * Each supported language provides an implementation of this interface
+ * that configures which AST node types to look for and how to extract
+ * language-specific details like signatures, visibility, and imports.
+ */
+export interface LanguageExtractor {
+  // --- Node type mappings ---
+
+  /** Node types that represent functions */
+  functionTypes: string[];
+  /** Node types that represent classes */
+  classTypes: string[];
+  /** Node types that represent methods */
+  methodTypes: string[];
+  /** Node types that represent interfaces/protocols/traits */
+  interfaceTypes: string[];
+  /** Node types that represent structs */
+  structTypes: string[];
+  /** Node types that represent enums */
+  enumTypes: string[];
+  /** Node types that represent type aliases (e.g. `type X = ...`) */
+  typeAliasTypes: string[];
+  /** Node types that represent imports */
+  importTypes: string[];
+  /** Node types that represent function calls */
+  callTypes: string[];
+  /** Node types that represent variable declarations (const, let, var, etc.) */
+  variableTypes: string[];
+
+  // --- Field name mappings ---
+
+  /** Field name for identifier/name */
+  nameField: string;
+  /** Field name for body */
+  bodyField: string;
+  /** Field name for parameters */
+  paramsField: string;
+  /** Field name for return type */
+  returnField?: string;
+
+  // --- Existing hooks ---
+
+  /** Extract signature from node */
+  getSignature?: (node: SyntaxNode, source: string) => string | undefined;
+  /** Extract visibility from node */
+  getVisibility?: (node: SyntaxNode) => 'public' | 'private' | 'protected' | 'internal' | undefined;
+  /** Check if node is exported */
+  isExported?: (node: SyntaxNode, source: string) => boolean;
+  /** Check if node is async */
+  isAsync?: (node: SyntaxNode) => boolean;
+  /** Check if node is static */
+  isStatic?: (node: SyntaxNode) => boolean;
+  /** Check if variable declaration is a constant (const vs let/var) */
+  isConst?: (node: SyntaxNode) => boolean;
+
+  // --- New config properties ---
+
+  /** Additional node types to treat as class declarations (e.g. Dart: 'mixin_declaration') */
+  extraClassNodeTypes?: string[];
+  /** Whether methods can be top-level without enclosing class (Go: true) */
+  methodsAreTopLevel?: boolean;
+  /** NodeKind to use for interface-like declarations (Rust: 'trait'). Default: 'interface' */
+  interfaceKind?: NodeKind;
+
+  // --- New hooks ---
+
+  /**
+   * Custom node visitor. Return true if the node was fully handled (skip default dispatch).
+   * Used by languages with fundamentally different AST structures (e.g. Pascal).
+   */
+  visitNode?: (node: SyntaxNode, ctx: ExtractorContext) => boolean;
+
+  /**
+   * Classify a class_declaration node when the grammar reuses one node type
+   * for multiple concepts (e.g. Swift uses class_declaration for classes, structs, and enums).
+   */
+  classifyClassNode?: (node: SyntaxNode) => 'class' | 'struct' | 'enum';
+
+  /**
+   * Resolve the body node for a function/method/class when it's not a child field.
+   * (e.g. Dart puts function_body as a sibling, not a child.)
+   */
+  resolveBody?: (node: SyntaxNode, bodyField: string) => SyntaxNode | null;
+
+  /**
+   * Extract import information from an import node.
+   * Return null if the node isn't a recognized import form.
+   */
+  extractImport?: (node: SyntaxNode, source: string) => ImportInfo | null;
+
+  /**
+   * Extract variable declarations from a variable declaration node.
+   * Returns info about each declared variable, allowing the core to create nodes.
+   */
+  extractVariables?: (node: SyntaxNode, source: string) => VariableInfo[];
+}

Diff do ficheiro suprimidas por serem muito extensas
+ 76 - 1015
src/extraction/tree-sitter.ts


Alguns ficheiros não foram mostrados porque muitos ficheiros mudaram neste diff