/** * Grammar Loading and Caching * * Uses web-tree-sitter (WASM) for universal cross-platform support. * Grammars are loaded lazily — only languages actually present in the project * are compiled, keeping V8 WASM memory pressure low on large codebases. */ import * as path from 'path'; import { Parser, Language as WasmLanguage } from 'web-tree-sitter'; import { Language } from '../types'; export type GrammarLanguage = Exclude; /** * WASM filename map — maps each language to its .wasm grammar file * in the tree-sitter-wasms package. */ const WASM_GRAMMAR_FILES: Record = { typescript: 'tree-sitter-typescript.wasm', tsx: 'tree-sitter-tsx.wasm', javascript: 'tree-sitter-javascript.wasm', jsx: 'tree-sitter-javascript.wasm', python: 'tree-sitter-python.wasm', go: 'tree-sitter-go.wasm', rust: 'tree-sitter-rust.wasm', java: 'tree-sitter-java.wasm', c: 'tree-sitter-c.wasm', cpp: 'tree-sitter-cpp.wasm', csharp: 'tree-sitter-c_sharp.wasm', php: 'tree-sitter-php.wasm', ruby: 'tree-sitter-ruby.wasm', swift: 'tree-sitter-swift.wasm', kotlin: 'tree-sitter-kotlin.wasm', dart: 'tree-sitter-dart.wasm', pascal: 'tree-sitter-pascal.wasm', scala: 'tree-sitter-scala.wasm', lua: 'tree-sitter-lua.wasm', r: 'tree-sitter-r.wasm', luau: 'tree-sitter-luau.wasm', objc: 'tree-sitter-objc.wasm', }; /** * File extension to Language mapping */ export const EXTENSION_MAP: Record = { '.ts': 'typescript', '.tsx': 'tsx', // ESM/CJS TypeScript module extensions — parsed as TS (no JSX). (#366) '.mts': 'typescript', '.cts': 'typescript', '.js': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript', // SAP HANA XS Classic server-side JavaScript. (#556) '.xsjs': 'javascript', '.xsjslib': 'javascript', '.jsx': 'jsx', '.py': 'python', '.pyw': 'python', '.go': 'go', '.rs': 'rust', '.java': 'java', '.c': 'c', '.h': 'c', // Could also be C++, defaulting to C '.cpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp', '.hpp': 'cpp', '.hxx': 'cpp', '.cs': 'csharp', // ASP.NET Razor / Blazor markup — custom RazorExtractor (links @model/@inject/ // component tags to their C# types; markup isn't a tree-sitter grammar). '.cshtml': 'razor', '.razor': 'razor', '.php': 'php', // Drupal-specific PHP file extensions '.module': 'php', '.install': 'php', '.theme': 'php', '.inc': 'php', // YAML (used for Drupal routing files; no symbol extraction, file-level tracking only) '.yml': 'yaml', '.yaml': 'yaml', // Twig templates (file-level tracking only, no symbol extraction) '.twig': 'twig', '.rb': 'ruby', '.rake': 'ruby', '.swift': 'swift', '.kt': 'kotlin', '.kts': 'kotlin', '.dart': 'dart', '.liquid': 'liquid', '.svelte': 'svelte', '.vue': 'vue', '.astro': 'astro', '.r': 'r', '.pas': 'pascal', '.dpr': 'pascal', '.dpk': 'pascal', '.lpr': 'pascal', '.dfm': 'pascal', '.fmx': 'pascal', '.scala': 'scala', '.sc': 'scala', '.lua': 'lua', '.luau': 'luau', '.m': 'objc', '.mm': 'objc', // XML: file-level tracking; the MyBatis extractor matches `` // shape and emits SQL-statement nodes (other XML returns empty). '.xml': 'xml', // Spring config: `application.properties` / `application-*.properties`. Same // shape as the `.yml` variants — the YAML/properties extractor emits one node // per leaf key, and the Spring resolver links `@Value("${k}")` references. '.properties': 'properties', }; /** * Whether a file is one CodeGraph can parse, based purely on its extension. * This is the single source of truth for "should we index this file" — derived * from EXTENSION_MAP so parser support and indexing selection never drift. * * `overrides` is the project's validated custom extension → language map (from * `codegraph.json`); when present its extensions count as indexable in addition * to the built-ins. Omitting it is byte-identical to the zero-config behavior. */ export function isSourceFile(filePath: string, overrides?: Record): boolean { if (isPlayRoutesFile(filePath)) return true; // Play `conf/routes` is extensionless if (isShopifyLiquidJson(filePath)) return true; // Shopify OS 2.0 JSON templates / section groups const dot = filePath.lastIndexOf('.'); if (dot < 0) return false; const ext = filePath.slice(dot).toLowerCase(); return ext in EXTENSION_MAP || (!!overrides && ext in overrides); } /** * Shopify OS 2.0 JSON template (`templates/*.json`) or section group * (`sections/*.json`) — these reference sections by `"type"`, so the Liquid * extractor links them. (config/ + locales/ JSON have no section refs.) */ export function isShopifyLiquidJson(filePath: string): boolean { // Allow nested template dirs (`templates/customers/login.json`), not just // top-level (`templates/product.json`). return /(^|\/)(templates|sections)\/.+\.json$/i.test(filePath); } /** * Play Framework routes file: the extensionless `conf/routes` (and included * `conf/*.routes`). No grammar — route extraction is done by the Play framework * resolver, so it's processed through the no-grammar (`yaml`-style) path. */ export function isPlayRoutesFile(filePath: string): boolean { return ( filePath === 'conf/routes' || filePath.endsWith('/conf/routes') || filePath.endsWith('.routes') ); } /** * Caches for loaded grammars and parsers */ const parserCache = new Map(); const languageCache = new Map(); const unavailableGrammarErrors = new Map(); let parserInitialized = false; /** * Initialize the tree-sitter WASM runtime. Must be called before loading grammars. * Does NOT load any grammar WASM files — use loadGrammarsForLanguages() for that. * Idempotent — safe to call multiple times. */ export async function initGrammars(): Promise { if (parserInitialized) return; await Parser.init(); parserInitialized = true; } /** * Load grammar WASM files for specific languages only. * Skips languages that are already loaded or have no WASM grammar. * Must be called after initGrammars(). */ export async function loadGrammarsForLanguages(languages: Language[]): Promise { if (!parserInitialized) { await initGrammars(); } // SFC languages (svelte/vue/astro) have no grammar of their own — their // extractors delegate