tree-sitter-types.ts 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. /**
  2. * Tree-sitter Extraction Types
  3. *
  4. * Defines the LanguageExtractor interface and related types used by
  5. * the core TreeSitterExtractor and per-language extraction configs.
  6. * Extracted to a leaf module to avoid circular imports.
  7. */
  8. import { Node as SyntaxNode } from 'web-tree-sitter';
  9. import {
  10. Node,
  11. NodeKind,
  12. UnresolvedReference,
  13. } from '../types';
  14. /**
  15. * Information returned by a language's extractImport hook.
  16. */
  17. export interface ImportInfo {
  18. /** The module/package name being imported */
  19. moduleName: string;
  20. /** Full import statement text for display */
  21. signature: string;
  22. /** If true, the hook already created unresolved references itself */
  23. handledRefs?: boolean;
  24. }
  25. /**
  26. * Information about a single variable within a declaration.
  27. * Returned by a language's extractVariables hook.
  28. */
  29. export interface VariableInfo {
  30. /** Variable name */
  31. name: string;
  32. /** Node kind: 'variable' or 'constant' */
  33. kind: NodeKind;
  34. /** Optional signature string */
  35. signature?: string;
  36. /** If set, this declarator is actually a function and should be extracted as such */
  37. delegateToFunction?: SyntaxNode;
  38. /** The AST node to use for positioning (may differ from the declaration node) */
  39. positionNode?: SyntaxNode;
  40. }
  41. /**
  42. * Context object passed to language hooks that need to call back into the core extractor.
  43. * Provides a controlled API surface — hooks can create nodes, visit children, and add
  44. * references without accessing the full TreeSitterExtractor internals.
  45. */
  46. export interface ExtractorContext {
  47. /** Create a node and add it to the extraction result */
  48. createNode(kind: NodeKind, name: string, node: SyntaxNode, extra?: Partial<Node>): Node | null;
  49. /** Visit a child node (dispatches through the standard visitNode logic) */
  50. visitNode(node: SyntaxNode): void;
  51. /** Visit a function body to extract calls */
  52. visitFunctionBody(body: SyntaxNode, functionId: string): void;
  53. /** Add an unresolved reference */
  54. addUnresolvedReference(ref: UnresolvedReference): void;
  55. /** Current file path */
  56. readonly filePath: string;
  57. /** Current source text */
  58. readonly source: string;
  59. /** Stack of parent node IDs (current scope) */
  60. readonly nodeStack: readonly string[];
  61. /** All nodes extracted so far */
  62. readonly nodes: readonly Node[];
  63. }
  64. /**
  65. * Language-specific extraction configuration.
  66. *
  67. * Each supported language provides an implementation of this interface
  68. * that configures which AST node types to look for and how to extract
  69. * language-specific details like signatures, visibility, and imports.
  70. */
  71. export interface LanguageExtractor {
  72. // --- Node type mappings ---
  73. /** Node types that represent functions */
  74. functionTypes: string[];
  75. /** Node types that represent classes */
  76. classTypes: string[];
  77. /** Node types that represent methods */
  78. methodTypes: string[];
  79. /** Node types that represent interfaces/protocols/traits */
  80. interfaceTypes: string[];
  81. /** Node types that represent structs */
  82. structTypes: string[];
  83. /** Node types that represent enums */
  84. enumTypes: string[];
  85. /** Node types that represent type aliases (e.g. `type X = ...`) */
  86. typeAliasTypes: string[];
  87. /** Node types that represent imports */
  88. importTypes: string[];
  89. /** Node types that represent function calls */
  90. callTypes: string[];
  91. /** Node types that represent variable declarations (const, let, var, etc.) */
  92. variableTypes: string[];
  93. // --- Field name mappings ---
  94. /** Field name for identifier/name */
  95. nameField: string;
  96. /** Field name for body */
  97. bodyField: string;
  98. /** Field name for parameters */
  99. paramsField: string;
  100. /** Field name for return type */
  101. returnField?: string;
  102. // --- Existing hooks ---
  103. /** Extract signature from node */
  104. getSignature?: (node: SyntaxNode, source: string) => string | undefined;
  105. /** Extract visibility from node */
  106. getVisibility?: (node: SyntaxNode) => 'public' | 'private' | 'protected' | 'internal' | undefined;
  107. /** Check if node is exported */
  108. isExported?: (node: SyntaxNode, source: string) => boolean;
  109. /** Check if node is async */
  110. isAsync?: (node: SyntaxNode) => boolean;
  111. /** Check if node is static */
  112. isStatic?: (node: SyntaxNode) => boolean;
  113. /** Check if variable declaration is a constant (const vs let/var) */
  114. isConst?: (node: SyntaxNode) => boolean;
  115. // --- New config properties ---
  116. /** Additional node types to treat as class declarations (e.g. Dart: 'mixin_declaration') */
  117. extraClassNodeTypes?: string[];
  118. /** Whether methods can be top-level without enclosing class (Go: true) */
  119. methodsAreTopLevel?: boolean;
  120. /** NodeKind to use for interface-like declarations (Rust: 'trait'). Default: 'interface' */
  121. interfaceKind?: NodeKind;
  122. // --- New hooks ---
  123. /**
  124. * Custom node visitor. Return true if the node was fully handled (skip default dispatch).
  125. * Used by languages with fundamentally different AST structures (e.g. Pascal).
  126. */
  127. visitNode?: (node: SyntaxNode, ctx: ExtractorContext) => boolean;
  128. /**
  129. * Classify a class_declaration node when the grammar reuses one node type
  130. * for multiple concepts (e.g. Swift uses class_declaration for classes, structs, and enums).
  131. */
  132. classifyClassNode?: (node: SyntaxNode) => 'class' | 'struct' | 'enum';
  133. /**
  134. * Resolve the body node for a function/method/class when it's not a child field.
  135. * (e.g. Dart puts function_body as a sibling, not a child.)
  136. */
  137. resolveBody?: (node: SyntaxNode, bodyField: string) => SyntaxNode | null;
  138. /**
  139. * Extract import information from an import node.
  140. * Return null if the node isn't a recognized import form.
  141. */
  142. extractImport?: (node: SyntaxNode, source: string) => ImportInfo | null;
  143. /**
  144. * Extract variable declarations from a variable declaration node.
  145. * Returns info about each declared variable, allowing the core to create nodes.
  146. */
  147. extractVariables?: (node: SyntaxNode, source: string) => VariableInfo[];
  148. }