grammars.ts 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. /**
  2. * Grammar Loading and Caching
  3. *
  4. * Manages tree-sitter language grammars.
  5. */
  6. import Parser from 'tree-sitter';
  7. import { Language } from '../types';
  8. // Grammar module imports
  9. // eslint-disable-next-line @typescript-eslint/no-require-imports
  10. const TypeScript = require('tree-sitter-typescript').typescript;
  11. // eslint-disable-next-line @typescript-eslint/no-require-imports
  12. const TSX = require('tree-sitter-typescript').tsx;
  13. // eslint-disable-next-line @typescript-eslint/no-require-imports
  14. const JavaScript = require('tree-sitter-javascript');
  15. // eslint-disable-next-line @typescript-eslint/no-require-imports
  16. const Python = require('tree-sitter-python');
  17. // eslint-disable-next-line @typescript-eslint/no-require-imports
  18. const Go = require('tree-sitter-go');
  19. // eslint-disable-next-line @typescript-eslint/no-require-imports
  20. const Rust = require('tree-sitter-rust');
  21. // eslint-disable-next-line @typescript-eslint/no-require-imports
  22. const Java = require('tree-sitter-java');
  23. // eslint-disable-next-line @typescript-eslint/no-require-imports
  24. const C = require('tree-sitter-c');
  25. // eslint-disable-next-line @typescript-eslint/no-require-imports
  26. const Cpp = require('tree-sitter-cpp');
  27. // eslint-disable-next-line @typescript-eslint/no-require-imports
  28. const CSharp = require('tree-sitter-c-sharp');
  29. // eslint-disable-next-line @typescript-eslint/no-require-imports
  30. const PHP = require('tree-sitter-php').php;
  31. // eslint-disable-next-line @typescript-eslint/no-require-imports
  32. const Ruby = require('tree-sitter-ruby');
  33. // eslint-disable-next-line @typescript-eslint/no-require-imports
  34. const Swift = require('tree-sitter-swift');
  35. // eslint-disable-next-line @typescript-eslint/no-require-imports
  36. const Kotlin = require('tree-sitter-kotlin');
  37. // Note: tree-sitter-liquid has ABI compatibility issues with tree-sitter 0.22+
  38. // Liquid extraction is handled separately via regex in tree-sitter.ts
  39. /**
  40. * Mapping of Language to tree-sitter grammar
  41. */
  42. const GRAMMAR_MAP: Record<string, unknown> = {
  43. typescript: TypeScript,
  44. tsx: TSX,
  45. javascript: JavaScript,
  46. jsx: JavaScript, // JSX uses the JavaScript grammar
  47. python: Python,
  48. go: Go,
  49. rust: Rust,
  50. java: Java,
  51. c: C,
  52. cpp: Cpp,
  53. csharp: CSharp,
  54. php: PHP,
  55. ruby: Ruby,
  56. swift: Swift,
  57. kotlin: Kotlin,
  58. // liquid: uses custom regex-based extraction, not tree-sitter
  59. };
  60. /**
  61. * File extension to Language mapping
  62. */
  63. export const EXTENSION_MAP: Record<string, Language> = {
  64. '.ts': 'typescript',
  65. '.tsx': 'tsx',
  66. '.js': 'javascript',
  67. '.mjs': 'javascript',
  68. '.cjs': 'javascript',
  69. '.jsx': 'jsx',
  70. '.py': 'python',
  71. '.pyw': 'python',
  72. '.go': 'go',
  73. '.rs': 'rust',
  74. '.java': 'java',
  75. '.c': 'c',
  76. '.h': 'c', // Could also be C++, defaulting to C
  77. '.cpp': 'cpp',
  78. '.cc': 'cpp',
  79. '.cxx': 'cpp',
  80. '.hpp': 'cpp',
  81. '.hxx': 'cpp',
  82. '.cs': 'csharp',
  83. '.php': 'php',
  84. '.rb': 'ruby',
  85. '.rake': 'ruby',
  86. '.swift': 'swift',
  87. '.kt': 'kotlin',
  88. '.kts': 'kotlin',
  89. '.liquid': 'liquid',
  90. };
  91. /**
  92. * Cache for initialized parsers
  93. */
  94. const parserCache = new Map<Language, Parser>();
  95. /**
  96. * Get a parser for the specified language
  97. */
  98. export function getParser(language: Language): Parser | null {
  99. // Check cache first
  100. if (parserCache.has(language)) {
  101. return parserCache.get(language)!;
  102. }
  103. // Get grammar for language
  104. const grammar = GRAMMAR_MAP[language];
  105. if (!grammar) {
  106. return null;
  107. }
  108. // Create and cache parser
  109. const parser = new Parser();
  110. parser.setLanguage(grammar as Parameters<typeof parser.setLanguage>[0]);
  111. parserCache.set(language, parser);
  112. return parser;
  113. }
  114. /**
  115. * Detect language from file extension
  116. */
  117. export function detectLanguage(filePath: string): Language {
  118. const ext = filePath.substring(filePath.lastIndexOf('.')).toLowerCase();
  119. return EXTENSION_MAP[ext] || 'unknown';
  120. }
  121. /**
  122. * Check if a language is supported
  123. */
  124. export function isLanguageSupported(language: Language): boolean {
  125. // Liquid uses custom regex-based extraction, not tree-sitter
  126. if (language === 'liquid') return true;
  127. return language !== 'unknown' && language in GRAMMAR_MAP;
  128. }
  129. /**
  130. * Get all supported languages
  131. */
  132. export function getSupportedLanguages(): Language[] {
  133. const languages = Object.keys(GRAMMAR_MAP) as Language[];
  134. // Add Liquid which uses custom extraction
  135. languages.push('liquid');
  136. return languages;
  137. }
  138. /**
  139. * Clear the parser cache (useful for testing)
  140. */
  141. export function clearParserCache(): void {
  142. parserCache.clear();
  143. }
  144. /**
  145. * Get language display name
  146. */
  147. export function getLanguageDisplayName(language: Language): string {
  148. const names: Record<Language, string> = {
  149. typescript: 'TypeScript',
  150. javascript: 'JavaScript',
  151. tsx: 'TypeScript (TSX)',
  152. jsx: 'JavaScript (JSX)',
  153. python: 'Python',
  154. go: 'Go',
  155. rust: 'Rust',
  156. java: 'Java',
  157. c: 'C',
  158. cpp: 'C++',
  159. csharp: 'C#',
  160. php: 'PHP',
  161. ruby: 'Ruby',
  162. swift: 'Swift',
  163. kotlin: 'Kotlin',
  164. liquid: 'Liquid',
  165. unknown: 'Unknown',
  166. };
  167. return names[language] || language;
  168. }