project-config.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. /**
  2. * Project-scoped configuration: a committed `codegraph.json` at the project
  3. * root that a team shares through version control.
  4. *
  5. * Today it carries one thing — `extensions`, an opt-in map from a custom file
  6. * extension to one of CodeGraph's supported languages. The built-in
  7. * extension → language table (`EXTENSION_MAP` in `extraction/grammars.ts`) is
  8. * otherwise hardcoded, so a codebase that uses a non-standard extension for a
  9. * supported language (e.g. `.dota_lua` for Lua) sees those files silently
  10. * skipped. This lets the project map them once, in a version-controlled file:
  11. *
  12. * {
  13. * "extensions": {
  14. * ".dota_lua": "lua",
  15. * ".tpl": "php"
  16. * }
  17. * }
  18. *
  19. * User mappings merge on TOP of the built-ins and win on conflict, so a project
  20. * can also re-point a built-in extension (e.g. force `.h` → `cpp`). Absent or
  21. * malformed config is the zero-config default — no overrides, no error. Invalid
  22. * individual entries are warned-and-skipped (never fatal): an unparseable
  23. * project file must not break indexing.
  24. */
  25. import * as fs from 'fs';
  26. import * as path from 'path';
  27. import { Language } from './types';
  28. import { isLanguageSupported } from './extraction/grammars';
  29. import { logWarn } from './errors';
  30. /** Filename of the project-scoped config, resolved relative to the project root. */
  31. export const PROJECT_CONFIG_FILENAME = 'codegraph.json';
  32. export interface ProjectConfig {
  33. /** Map of custom file extension (`.foo`) to a supported language id. */
  34. extensions?: Record<string, string>;
  35. /**
  36. * Gitignore-style patterns naming gitignored directories whose embedded git
  37. * repositories should be indexed anyway — the explicit opt-in to override
  38. * `.gitignore` for nested-repo discovery (#622, #699). Absent/empty (the
  39. * default) means `.gitignore` is fully respected: gitignored embedded repos
  40. * are never discovered or indexed (#970, #976).
  41. */
  42. includeIgnored?: string[];
  43. /**
  44. * Gitignore-style patterns for paths to keep OUT of the index — even when
  45. * they are git-TRACKED, which `.gitignore` cannot do (#999). The escape hatch
  46. * for a committed vendor/theme/SDK directory (e.g. a checked-in Metronic theme
  47. * under `static/`) that bloats the graph and slows indexing but isn't really
  48. * your code. Matched against project-root-relative paths, so a directory like
  49. * `"static/"`, a double-star vendor glob, or `"assets/theme"` all work.
  50. * Absent/empty (the default) excludes nothing beyond the built-in defaults
  51. * and your `.gitignore`.
  52. */
  53. exclude?: string[];
  54. }
  55. /** Parsed, validated view of a project's `codegraph.json`. */
  56. interface ParsedConfig {
  57. extensions: Record<string, Language>;
  58. includeIgnored: string[];
  59. exclude: string[];
  60. }
  61. interface CacheEntry {
  62. mtimeMs: number;
  63. config: ParsedConfig;
  64. }
  65. /**
  66. * Cache keyed by project root. The loader is called once per indexing/scan/sync
  67. * operation (and per watch event), so the mtime guard keeps repeat calls to one
  68. * `stat` while a single `codegraph.json` is in force. Keying by root keeps two
  69. * projects in the same process (the daemon / multi-project MCP server) isolated.
  70. */
  71. const cache = new Map<string, CacheEntry>();
  72. /** Shared frozen empties so the no-config path allocates nothing. */
  73. const EMPTY_EXTENSIONS: Record<string, Language> = Object.freeze({});
  74. const EMPTY_CONFIG: ParsedConfig = Object.freeze({
  75. extensions: EMPTY_EXTENSIONS,
  76. includeIgnored: Object.freeze([]) as unknown as string[],
  77. exclude: Object.freeze([]) as unknown as string[],
  78. });
  79. /**
  80. * Normalize a user-provided extension key to the `.ext` lowercase form used by
  81. * the built-in map. Returns null for keys that can never match a real file
  82. * extension (so the caller warns and skips):
  83. * - empty / just "."
  84. * - multi-part (".d.ts") — language detection keys off the FINAL extension
  85. * only (`lastIndexOf('.')`), so a multi-dot key would never be consulted.
  86. * - anything containing a path separator.
  87. */
  88. function normalizeExtKey(raw: string): string | null {
  89. if (typeof raw !== 'string') return null;
  90. let ext = raw.trim().toLowerCase();
  91. if (!ext) return null;
  92. if (!ext.startsWith('.')) ext = '.' + ext;
  93. const body = ext.slice(1);
  94. if (!body) return null;
  95. if (body.includes('.') || body.includes('/') || body.includes('\\')) return null;
  96. return ext;
  97. }
  98. /**
  99. * Read + JSON-parse a `codegraph.json` once and return its validated view.
  100. * Every failure mode degrades to the zero-config default — a missing file, bad
  101. * JSON, or a typo'd value never throws.
  102. */
  103. function parseConfig(file: string): ParsedConfig {
  104. let raw: string;
  105. try {
  106. raw = fs.readFileSync(file, 'utf-8');
  107. } catch {
  108. return EMPTY_CONFIG;
  109. }
  110. let parsed: unknown;
  111. try {
  112. parsed = JSON.parse(raw);
  113. } catch (err) {
  114. logWarn(`Ignoring ${PROJECT_CONFIG_FILENAME}: not valid JSON`, {
  115. file,
  116. error: err instanceof Error ? err.message : String(err),
  117. });
  118. return EMPTY_CONFIG;
  119. }
  120. if (!parsed || typeof parsed !== 'object') return EMPTY_CONFIG;
  121. const extensions = extractExtensions(parsed, file);
  122. const includeIgnored = extractIncludeIgnored(parsed, file);
  123. const exclude = extractExclude(parsed, file);
  124. if (extensions === EMPTY_EXTENSIONS && includeIgnored.length === 0 && exclude.length === 0) {
  125. return EMPTY_CONFIG;
  126. }
  127. return { extensions, includeIgnored, exclude };
  128. }
  129. /**
  130. * Validate the `extensions` map. Every failure mode degrades to "no overrides
  131. * from this entry" — a bad value or a typo'd language never throws.
  132. */
  133. function extractExtensions(parsed: object, file: string): Record<string, Language> {
  134. const exts = (parsed as ProjectConfig).extensions;
  135. if (!exts || typeof exts !== 'object' || Array.isArray(exts)) return EMPTY_EXTENSIONS;
  136. const out: Record<string, Language> = {};
  137. for (const [rawKey, rawVal] of Object.entries(exts)) {
  138. const key = normalizeExtKey(rawKey);
  139. if (!key) {
  140. logWarn(`Ignoring extension mapping in ${PROJECT_CONFIG_FILENAME}: "${rawKey}" is not a valid file extension`, { file });
  141. continue;
  142. }
  143. if (typeof rawVal !== 'string' || !isLanguageSupported(rawVal as Language)) {
  144. logWarn(`Ignoring extension "${rawKey}" in ${PROJECT_CONFIG_FILENAME}: "${String(rawVal)}" is not a supported language`, { file });
  145. continue;
  146. }
  147. out[key] = rawVal as Language;
  148. }
  149. return Object.keys(out).length > 0 ? out : EMPTY_EXTENSIONS;
  150. }
  151. /**
  152. * Validate the `includeIgnored` patterns: an array of non-empty gitignore-style
  153. * strings. A non-array value or a non-string/blank entry warns-and-skips; never
  154. * throws. Patterns are kept verbatim (trimmed) so they match exactly as a
  155. * `.gitignore` line would.
  156. */
  157. function extractIncludeIgnored(parsed: object, file: string): string[] {
  158. const raw = (parsed as ProjectConfig).includeIgnored;
  159. if (raw === undefined) return [];
  160. if (!Array.isArray(raw)) {
  161. logWarn(`Ignoring "includeIgnored" in ${PROJECT_CONFIG_FILENAME}: must be an array of gitignore-style patterns`, { file });
  162. return [];
  163. }
  164. const out: string[] = [];
  165. for (const entry of raw) {
  166. if (typeof entry !== 'string' || !entry.trim()) {
  167. logWarn(`Ignoring an "includeIgnored" entry in ${PROJECT_CONFIG_FILENAME}: every pattern must be a non-empty string`, { file });
  168. continue;
  169. }
  170. out.push(entry.trim());
  171. }
  172. return out;
  173. }
  174. /**
  175. * Validate the `exclude` patterns: an array of non-empty gitignore-style
  176. * strings naming paths to keep out of the index even when git-tracked (#999). A
  177. * non-array value or a non-string/blank entry warns-and-skips; never throws.
  178. * Patterns are kept verbatim (trimmed) so they match exactly as a `.gitignore`
  179. * line would, against project-root-relative paths.
  180. */
  181. function extractExclude(parsed: object, file: string): string[] {
  182. const raw = (parsed as ProjectConfig).exclude;
  183. if (raw === undefined) return [];
  184. if (!Array.isArray(raw)) {
  185. logWarn(`Ignoring "exclude" in ${PROJECT_CONFIG_FILENAME}: must be an array of gitignore-style patterns`, { file });
  186. return [];
  187. }
  188. const out: string[] = [];
  189. for (const entry of raw) {
  190. if (typeof entry !== 'string' || !entry.trim()) {
  191. logWarn(`Ignoring an "exclude" entry in ${PROJECT_CONFIG_FILENAME}: every pattern must be a non-empty string`, { file });
  192. continue;
  193. }
  194. out.push(entry.trim());
  195. }
  196. return out;
  197. }
  198. /**
  199. * Load the parsed `codegraph.json` for a project, mtime-cached. A missing or
  200. * malformed file yields the zero-config default. One `stat` (and at most one
  201. * read/parse) while a single config file is in force, shared across every field.
  202. */
  203. function loadParsedConfig(rootDir: string): ParsedConfig {
  204. const file = path.join(rootDir, PROJECT_CONFIG_FILENAME);
  205. let mtimeMs: number;
  206. try {
  207. mtimeMs = fs.statSync(file).mtimeMs;
  208. } catch {
  209. // No config file — drop any stale cache entry and return the default.
  210. cache.delete(rootDir);
  211. return EMPTY_CONFIG;
  212. }
  213. const entry = cache.get(rootDir);
  214. if (entry && entry.mtimeMs === mtimeMs) return entry.config;
  215. const config = parseConfig(file);
  216. cache.set(rootDir, { mtimeMs, config });
  217. return config;
  218. }
  219. /**
  220. * Load the validated extension overrides for a project, mtime-cached.
  221. *
  222. * Returns a map of `.ext` → supported language id. The result merges on top of
  223. * the built-in extension map at the point of use (see `detectLanguage` /
  224. * `isSourceFile`), with these user mappings taking precedence. Returns an empty
  225. * map when there is no `codegraph.json` (the zero-config default).
  226. */
  227. export function loadExtensionOverrides(rootDir: string): Record<string, Language> {
  228. return loadParsedConfig(rootDir).extensions;
  229. }
  230. /**
  231. * Load the validated `includeIgnored` patterns for a project, mtime-cached.
  232. *
  233. * These name gitignored directories whose embedded git repositories should be
  234. * indexed despite `.gitignore` (#622, #699). An empty result — the zero-config
  235. * default — means `.gitignore` is fully respected: gitignored embedded repos
  236. * are never discovered or indexed (#970, #976).
  237. */
  238. export function loadIncludeIgnoredPatterns(rootDir: string): string[] {
  239. return loadParsedConfig(rootDir).includeIgnored;
  240. }
  241. /**
  242. * Load the validated `exclude` patterns for a project, mtime-cached.
  243. *
  244. * These name paths to keep OUT of the index even when git-tracked — the escape
  245. * hatch for a committed vendor/theme/SDK directory `.gitignore` can't drop
  246. * (#999). An empty result — the zero-config default — excludes nothing beyond
  247. * the built-in defaults and the project's `.gitignore`.
  248. */
  249. export function loadExcludePatterns(rootDir: string): string[] {
  250. return loadParsedConfig(rootDir).exclude;
  251. }
  252. /** Test/maintenance hook: forget cached config (e.g. after rewriting it in a test). */
  253. export function clearProjectConfigCache(): void {
  254. cache.clear();
  255. }