tree-sitter-types.ts 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. /**
  2. * Tree-sitter Extraction Types
  3. *
  4. * Defines the LanguageExtractor interface and related types used by
  5. * the core TreeSitterExtractor and per-language extraction configs.
  6. * Extracted to a leaf module to avoid circular imports.
  7. */
  8. import { Node as SyntaxNode } from 'web-tree-sitter';
  9. import {
  10. Node,
  11. NodeKind,
  12. UnresolvedReference,
  13. } from '../types';
  14. /**
  15. * Information returned by a language's extractImport hook.
  16. */
  17. export interface ImportInfo {
  18. /** The module/package name being imported */
  19. moduleName: string;
  20. /** Full import statement text for display */
  21. signature: string;
  22. /** If true, the hook already created unresolved references itself */
  23. handledRefs?: boolean;
  24. }
  25. /**
  26. * Information about a single variable within a declaration.
  27. * Returned by a language's extractVariables hook.
  28. */
  29. export interface VariableInfo {
  30. /** Variable name */
  31. name: string;
  32. /** Node kind: 'variable' or 'constant' */
  33. kind: NodeKind;
  34. /** Optional signature string */
  35. signature?: string;
  36. /** If set, this declarator is actually a function and should be extracted as such */
  37. delegateToFunction?: SyntaxNode;
  38. /** The AST node to use for positioning (may differ from the declaration node) */
  39. positionNode?: SyntaxNode;
  40. }
  41. /**
  42. * Context object passed to language hooks that need to call back into the core extractor.
  43. * Provides a controlled API surface — hooks can create nodes, visit children, and add
  44. * references without accessing the full TreeSitterExtractor internals.
  45. */
  46. export interface ExtractorContext {
  47. /** Create a node and add it to the extraction result */
  48. createNode(kind: NodeKind, name: string, node: SyntaxNode, extra?: Partial<Node>): Node | null;
  49. /** Visit a child node (dispatches through the standard visitNode logic) */
  50. visitNode(node: SyntaxNode): void;
  51. /** Visit a function body to extract calls */
  52. visitFunctionBody(body: SyntaxNode, functionId: string): void;
  53. /** Add an unresolved reference */
  54. addUnresolvedReference(ref: UnresolvedReference): void;
  55. /** Push a node ID onto the scope stack (for containment/qualified name building) */
  56. pushScope(nodeId: string): void;
  57. /** Pop the last node ID from the scope stack */
  58. popScope(): void;
  59. /** Current file path */
  60. readonly filePath: string;
  61. /** Current source text */
  62. readonly source: string;
  63. /** Stack of parent node IDs (current scope) */
  64. readonly nodeStack: readonly string[];
  65. /** All nodes extracted so far */
  66. readonly nodes: readonly Node[];
  67. }
  68. /**
  69. * Language-specific extraction configuration.
  70. *
  71. * Each supported language provides an implementation of this interface
  72. * that configures which AST node types to look for and how to extract
  73. * language-specific details like signatures, visibility, and imports.
  74. */
  75. export interface LanguageExtractor {
  76. // --- Node type mappings ---
  77. /** Node types that represent functions */
  78. functionTypes: string[];
  79. /** Node types that represent classes */
  80. classTypes: string[];
  81. /** Node types that represent methods */
  82. methodTypes: string[];
  83. /** Node types that represent interfaces/protocols/traits */
  84. interfaceTypes: string[];
  85. /** Node types that represent structs */
  86. structTypes: string[];
  87. /** Node types that represent enums */
  88. enumTypes: string[];
  89. /** Node types that represent enum members/cases (e.g. Swift: 'enum_entry', Rust: 'enum_variant') */
  90. enumMemberTypes?: string[];
  91. /** Node types that represent type aliases (e.g. `type X = ...`) */
  92. typeAliasTypes: string[];
  93. /** Node types that represent imports */
  94. importTypes: string[];
  95. /** Node types that represent function calls */
  96. callTypes: string[];
  97. /** Node types that represent variable declarations (const, let, var, etc.) */
  98. variableTypes: string[];
  99. /** Node types that represent class fields (extracted as 'field' kind inside class bodies) */
  100. fieldTypes?: string[];
  101. /** Node types that represent class properties (extracted as 'property' kind inside class bodies) */
  102. propertyTypes?: string[];
  103. // --- Field name mappings ---
  104. /** Field name for identifier/name */
  105. nameField: string;
  106. /** Field name for body */
  107. bodyField: string;
  108. /** Field name for parameters */
  109. paramsField: string;
  110. /** Field name for return type */
  111. returnField?: string;
  112. // --- Existing hooks ---
  113. /** Extract signature from node */
  114. getSignature?: (node: SyntaxNode, source: string) => string | undefined;
  115. /** Extract visibility from node */
  116. getVisibility?: (node: SyntaxNode) => 'public' | 'private' | 'protected' | 'internal' | undefined;
  117. /** Check if node is exported */
  118. isExported?: (node: SyntaxNode, source: string) => boolean;
  119. /** Check if node is async */
  120. isAsync?: (node: SyntaxNode) => boolean;
  121. /** Check if node is static */
  122. isStatic?: (node: SyntaxNode) => boolean;
  123. /** Check if variable declaration is a constant (const vs let/var) */
  124. isConst?: (node: SyntaxNode) => boolean;
  125. // --- New config properties ---
  126. /** Additional node types to treat as class declarations (e.g. Dart: 'mixin_declaration') */
  127. extraClassNodeTypes?: string[];
  128. /** Whether methods can be top-level without enclosing class (Go: true) */
  129. methodsAreTopLevel?: boolean;
  130. /** NodeKind to use for interface-like declarations (Rust: 'trait'). Default: 'interface' */
  131. interfaceKind?: NodeKind;
  132. // --- New hooks ---
  133. /**
  134. * Custom node visitor. Return true if the node was fully handled (skip default dispatch).
  135. * Used by languages with fundamentally different AST structures (e.g. Pascal).
  136. */
  137. visitNode?: (node: SyntaxNode, ctx: ExtractorContext) => boolean;
  138. /**
  139. * Classify a class_declaration node when the grammar reuses one node type
  140. * for multiple concepts (e.g. Swift uses class_declaration for classes, structs, and enums).
  141. */
  142. classifyClassNode?: (node: SyntaxNode) => 'class' | 'struct' | 'enum' | 'interface' | 'trait';
  143. /**
  144. * Resolve the body node for a function/method/class when it's not a child field.
  145. * (e.g. Dart puts function_body as a sibling, not a child.)
  146. */
  147. resolveBody?: (node: SyntaxNode, bodyField: string) => SyntaxNode | null;
  148. /**
  149. * Extract import information from an import node.
  150. * Return null if the node isn't a recognized import form.
  151. */
  152. extractImport?: (node: SyntaxNode, source: string) => ImportInfo | null;
  153. /**
  154. * Extract variable declarations from a variable declaration node.
  155. * Returns info about each declared variable, allowing the core to create nodes.
  156. */
  157. extractVariables?: (node: SyntaxNode, source: string) => VariableInfo[];
  158. /**
  159. * Extract receiver/owner type name from a method declaration.
  160. * Used by Go to get the struct receiver (e.g., "scrapeLoop" from "func (sl *scrapeLoop) run()").
  161. * When present, the receiver type is included in the qualified name for better searchability.
  162. */
  163. getReceiverType?: (node: SyntaxNode, source: string) => string | undefined;
  164. /**
  165. * Resolve the actual node kind for a type alias declaration.
  166. * Used by Go where `type_spec` is the named declaration wrapper for structs/interfaces:
  167. * `type Foo struct { ... }` → type_spec (name: "Foo") → struct_type
  168. * Returns 'struct', 'interface', etc. to override the default 'type_alias' kind,
  169. * or undefined to keep it as a type alias.
  170. */
  171. resolveTypeAliasKind?: (node: SyntaxNode, source: string) => NodeKind | undefined;
  172. /**
  173. * Check if a function/method name is a misparse artifact that should be skipped.
  174. * Used by C/C++ where macros (e.g. NLOHMANN_JSON_NAMESPACE_BEGIN) cause tree-sitter
  175. * to misparse namespace blocks as function_definitions. When this returns true,
  176. * the function node is NOT created, but the body is still visited for calls and
  177. * structural nodes (classes, structs, enums).
  178. */
  179. isMisparsedFunction?: (name: string, node: SyntaxNode) => boolean;
  180. /**
  181. * Detect bare method calls that don't use call expression syntax.
  182. * Used by Ruby where `reset` (no parens, no receiver) is a method call but
  183. * tree-sitter parses it as a plain `identifier` node instead of `call`/`method_call`.
  184. * Returns the callee name if this node is a bare call, or undefined if not.
  185. */
  186. extractBareCall?: (node: SyntaxNode, source: string) => string | undefined;
  187. }