tree-sitter-helpers.ts 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. /**
  2. * Tree-sitter Shared Helpers
  3. *
  4. * Utility functions used by the core TreeSitterExtractor and per-language extractors.
  5. * Extracted to a leaf module to avoid circular imports between tree-sitter.ts and languages/.
  6. */
  7. import { Node as SyntaxNode } from 'web-tree-sitter';
  8. import * as crypto from 'crypto';
  9. import { NodeKind } from '../types';
  10. /**
  11. * Generate a unique node ID
  12. *
  13. * Uses a 32-character (128-bit) hash to avoid collisions when indexing
  14. * large codebases with many files containing similar symbols.
  15. */
  16. export function generateNodeId(
  17. filePath: string,
  18. kind: NodeKind,
  19. name: string,
  20. line: number
  21. ): string {
  22. const hash = crypto
  23. .createHash('sha256')
  24. .update(`${filePath}:${kind}:${name}:${line}`)
  25. .digest('hex')
  26. .substring(0, 32);
  27. return `${kind}:${hash}`;
  28. }
  29. /**
  30. * Extract text from a syntax node
  31. */
  32. export function getNodeText(node: SyntaxNode, source: string): string {
  33. return source.substring(node.startIndex, node.endIndex);
  34. }
  35. /**
  36. * Find a child node by field name
  37. */
  38. export function getChildByField(node: SyntaxNode, fieldName: string): SyntaxNode | null {
  39. return node.childForFieldName(fieldName);
  40. }
  41. /**
  42. * Get the docstring/comment preceding a node
  43. */
  44. export function getPrecedingDocstring(node: SyntaxNode, source: string): string | undefined {
  45. let sibling = node.previousNamedSibling;
  46. const comments: string[] = [];
  47. while (sibling) {
  48. if (
  49. sibling.type === 'comment' ||
  50. sibling.type === 'line_comment' ||
  51. sibling.type === 'block_comment' ||
  52. sibling.type === 'documentation_comment'
  53. ) {
  54. comments.unshift(getNodeText(sibling, source));
  55. sibling = sibling.previousNamedSibling;
  56. } else {
  57. break;
  58. }
  59. }
  60. if (comments.length === 0) return undefined;
  61. // Clean up comment markers
  62. return comments
  63. .map((c) =>
  64. c
  65. .replace(/^\/\*\*?|\*\/$/g, '')
  66. .replace(/^\/\/\s?/gm, '')
  67. .replace(/^\s*\*\s?/gm, '')
  68. .trim()
  69. )
  70. .join('\n')
  71. .trim();
  72. }