|
|
@@ -1,73 +1,88 @@
|
|
|
/**
|
|
|
* Grammar Loading and Caching
|
|
|
*
|
|
|
- * Manages tree-sitter language grammars.
|
|
|
+ * Uses lazy per-language loading so one missing native grammar does not
|
|
|
+ * break extraction for all other languages.
|
|
|
*/
|
|
|
|
|
|
import Parser from 'tree-sitter';
|
|
|
import { Language } from '../types';
|
|
|
|
|
|
-// Grammar module imports — wrapped in tryRequire so a missing native binding
|
|
|
-// (e.g. tree-sitter-kotlin on Windows) degrades gracefully instead of crashing.
|
|
|
-// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
-function tryRequire(id: string, prop?: string): unknown | null {
|
|
|
- try {
|
|
|
- // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
- const mod = require(id);
|
|
|
- return prop ? mod[prop] : mod;
|
|
|
- } catch {
|
|
|
- console.warn(`[CodeGraph] Failed to load ${id} — ${prop ?? id} parsing will be unavailable on this platform.`);
|
|
|
- return null;
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-const TypeScript = tryRequire('tree-sitter-typescript', 'typescript');
|
|
|
-const TSX = tryRequire('tree-sitter-typescript', 'tsx');
|
|
|
-const JavaScript = tryRequire('tree-sitter-javascript');
|
|
|
-const Python = tryRequire('tree-sitter-python');
|
|
|
-const Go = tryRequire('tree-sitter-go');
|
|
|
-const Rust = tryRequire('tree-sitter-rust');
|
|
|
-const Java = tryRequire('tree-sitter-java');
|
|
|
-const C = tryRequire('tree-sitter-c');
|
|
|
-const Cpp = tryRequire('tree-sitter-cpp');
|
|
|
-const CSharp = tryRequire('tree-sitter-c-sharp');
|
|
|
-const PHP = tryRequire('tree-sitter-php', 'php');
|
|
|
-const Ruby = tryRequire('tree-sitter-ruby');
|
|
|
-const Swift = tryRequire('tree-sitter-swift');
|
|
|
-const Kotlin = tryRequire('tree-sitter-kotlin');
|
|
|
-const Dart = tryRequire('@sengac/tree-sitter-dart');
|
|
|
-// Note: tree-sitter-liquid has ABI compatibility issues with tree-sitter 0.22+
|
|
|
-// Liquid extraction is handled separately via regex in tree-sitter.ts
|
|
|
+type GrammarLoader = () => unknown;
|
|
|
+type GrammarLanguage = Exclude<Language, 'liquid' | 'unknown'>;
|
|
|
|
|
|
/**
|
|
|
- * Mapping of Language to tree-sitter grammar.
|
|
|
- * Parsers that failed to load are excluded.
|
|
|
+ * Lazy grammar loaders — each language's native binding is only loaded
|
|
|
+ * on first use, so a failure in one grammar doesn't affect others.
|
|
|
*/
|
|
|
-const GRAMMAR_MAP: Record<string, unknown> = {};
|
|
|
-
|
|
|
-const grammarEntries: [string, unknown][] = [
|
|
|
- ['typescript', TypeScript],
|
|
|
- ['tsx', TSX],
|
|
|
- ['javascript', JavaScript],
|
|
|
- ['jsx', JavaScript], // JSX uses the JavaScript grammar
|
|
|
- ['python', Python],
|
|
|
- ['go', Go],
|
|
|
- ['rust', Rust],
|
|
|
- ['java', Java],
|
|
|
- ['c', C],
|
|
|
- ['cpp', Cpp],
|
|
|
- ['csharp', CSharp],
|
|
|
- ['php', PHP],
|
|
|
- ['ruby', Ruby],
|
|
|
- ['swift', Swift],
|
|
|
- ['kotlin', Kotlin],
|
|
|
- ['dart', Dart],
|
|
|
- // liquid: uses custom regex-based extraction, not tree-sitter
|
|
|
-];
|
|
|
-
|
|
|
-for (const [lang, grammar] of grammarEntries) {
|
|
|
- if (grammar) GRAMMAR_MAP[lang] = grammar;
|
|
|
-}
|
|
|
+const grammarLoaders: Record<GrammarLanguage, GrammarLoader> = {
|
|
|
+ typescript: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-typescript').typescript;
|
|
|
+ },
|
|
|
+ tsx: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-typescript').tsx;
|
|
|
+ },
|
|
|
+ javascript: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-javascript');
|
|
|
+ },
|
|
|
+ jsx: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-javascript');
|
|
|
+ },
|
|
|
+ python: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-python');
|
|
|
+ },
|
|
|
+ go: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-go');
|
|
|
+ },
|
|
|
+ rust: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-rust');
|
|
|
+ },
|
|
|
+ java: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-java');
|
|
|
+ },
|
|
|
+ c: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-c');
|
|
|
+ },
|
|
|
+ cpp: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-cpp');
|
|
|
+ },
|
|
|
+ csharp: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-c-sharp');
|
|
|
+ },
|
|
|
+ php: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-php').php;
|
|
|
+ },
|
|
|
+ ruby: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-ruby');
|
|
|
+ },
|
|
|
+ swift: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-swift');
|
|
|
+ },
|
|
|
+ kotlin: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('tree-sitter-kotlin');
|
|
|
+ },
|
|
|
+ dart: () => {
|
|
|
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
|
+ return require('@sengac/tree-sitter-dart');
|
|
|
+ },
|
|
|
+ // Note: tree-sitter-liquid has ABI compatibility issues with tree-sitter 0.22+
|
|
|
+ // Liquid extraction is handled separately via regex in tree-sitter.ts
|
|
|
+};
|
|
|
|
|
|
/**
|
|
|
* File extension to Language mapping
|
|
|
@@ -103,30 +118,59 @@ export const EXTENSION_MAP: Record<string, Language> = {
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
- * Cache for initialized parsers
|
|
|
+ * Caches for loaded grammars and parsers
|
|
|
*/
|
|
|
const parserCache = new Map<Language, Parser>();
|
|
|
+const grammarCache = new Map<Language, unknown | null>();
|
|
|
+const unavailableGrammarErrors = new Map<Language, string>();
|
|
|
+
|
|
|
+/**
|
|
|
+ * Load a grammar on demand, caching the result.
|
|
|
+ * Returns null if the grammar is not available on this platform.
|
|
|
+ */
|
|
|
+function loadGrammar(language: Language): unknown | null {
|
|
|
+ if (grammarCache.has(language)) {
|
|
|
+ return grammarCache.get(language) ?? null;
|
|
|
+ }
|
|
|
+
|
|
|
+ const loader = grammarLoaders[language as GrammarLanguage];
|
|
|
+ if (!loader) {
|
|
|
+ grammarCache.set(language, null);
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ const grammar = loader();
|
|
|
+ if (!grammar) {
|
|
|
+ throw new Error(`Grammar loader returned empty value for ${language}`);
|
|
|
+ }
|
|
|
+ grammarCache.set(language, grammar);
|
|
|
+ return grammar;
|
|
|
+ } catch (error) {
|
|
|
+ const message = error instanceof Error ? error.message : String(error);
|
|
|
+ console.warn(`[CodeGraph] Failed to load ${language} grammar — parsing will be unavailable: ${message}`);
|
|
|
+ unavailableGrammarErrors.set(language, message);
|
|
|
+ grammarCache.set(language, null);
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
/**
|
|
|
* Get a parser for the specified language
|
|
|
*/
|
|
|
export function getParser(language: Language): Parser | null {
|
|
|
- // Check cache first
|
|
|
if (parserCache.has(language)) {
|
|
|
return parserCache.get(language)!;
|
|
|
}
|
|
|
|
|
|
- // Get grammar for language
|
|
|
- const grammar = GRAMMAR_MAP[language];
|
|
|
+ const grammar = loadGrammar(language);
|
|
|
if (!grammar) {
|
|
|
return null;
|
|
|
}
|
|
|
|
|
|
- // Create and cache parser
|
|
|
const parser = new Parser();
|
|
|
parser.setLanguage(grammar as Parameters<typeof parser.setLanguage>[0]);
|
|
|
parserCache.set(language, parser);
|
|
|
-
|
|
|
return parser;
|
|
|
}
|
|
|
|
|
|
@@ -139,29 +183,41 @@ export function detectLanguage(filePath: string): Language {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Check if a language is supported
|
|
|
+ * Check if a language is supported by currently available parsers.
|
|
|
*/
|
|
|
export function isLanguageSupported(language: Language): boolean {
|
|
|
- // Liquid uses custom regex-based extraction, not tree-sitter
|
|
|
- if (language === 'liquid') return true;
|
|
|
- return language !== 'unknown' && language in GRAMMAR_MAP;
|
|
|
+ if (language === 'liquid') return true; // custom regex extractor
|
|
|
+ if (language === 'unknown') return false;
|
|
|
+ return loadGrammar(language) !== null;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Get all supported languages
|
|
|
+ * Get all currently supported languages.
|
|
|
*/
|
|
|
export function getSupportedLanguages(): Language[] {
|
|
|
- const languages = Object.keys(GRAMMAR_MAP) as Language[];
|
|
|
- // Add Liquid which uses custom extraction
|
|
|
- languages.push('liquid');
|
|
|
- return languages;
|
|
|
+ const available = (Object.keys(grammarLoaders) as GrammarLanguage[])
|
|
|
+ .filter((language) => loadGrammar(language) !== null);
|
|
|
+ return [...available, 'liquid'];
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Clear the parser cache (useful for testing)
|
|
|
+ * Clear parser/grammar caches (useful for testing)
|
|
|
*/
|
|
|
export function clearParserCache(): void {
|
|
|
parserCache.clear();
|
|
|
+ grammarCache.clear();
|
|
|
+ unavailableGrammarErrors.clear();
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Report grammars that failed to load.
|
|
|
+ */
|
|
|
+export function getUnavailableGrammarErrors(): Partial<Record<Language, string>> {
|
|
|
+ const out: Partial<Record<Language, string>> = {};
|
|
|
+ for (const [language, message] of unavailableGrammarErrors.entries()) {
|
|
|
+ out[language] = message;
|
|
|
+ }
|
|
|
+ return out;
|
|
|
}
|
|
|
|
|
|
/**
|