project-config.ts 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. /**
  2. * Project-scoped configuration: a committed `codegraph.json` at the project
  3. * root that a team shares through version control.
  4. *
  5. * Today it carries one thing — `extensions`, an opt-in map from a custom file
  6. * extension to one of CodeGraph's supported languages. The built-in
  7. * extension → language table (`EXTENSION_MAP` in `extraction/grammars.ts`) is
  8. * otherwise hardcoded, so a codebase that uses a non-standard extension for a
  9. * supported language (e.g. `.dota_lua` for Lua) sees those files silently
  10. * skipped. This lets the project map them once, in a version-controlled file:
  11. *
  12. * {
  13. * "extensions": {
  14. * ".dota_lua": "lua",
  15. * ".tpl": "php"
  16. * }
  17. * }
  18. *
  19. * User mappings merge on TOP of the built-ins and win on conflict, so a project
  20. * can also re-point a built-in extension (e.g. force `.h` → `cpp`). Absent or
  21. * malformed config is the zero-config default — no overrides, no error. Invalid
  22. * individual entries are warned-and-skipped (never fatal): an unparseable
  23. * project file must not break indexing.
  24. */
  25. import * as fs from 'fs';
  26. import * as path from 'path';
  27. import { Language } from './types';
  28. import { isLanguageSupported } from './extraction/grammars';
  29. import { logWarn } from './errors';
  30. /** Filename of the project-scoped config, resolved relative to the project root. */
  31. export const PROJECT_CONFIG_FILENAME = 'codegraph.json';
  32. export interface ProjectConfig {
  33. /** Map of custom file extension (`.foo`) to a supported language id. */
  34. extensions?: Record<string, string>;
  35. /**
  36. * Gitignore-style patterns naming gitignored directories whose embedded git
  37. * repositories should be indexed anyway — the explicit opt-in to override
  38. * `.gitignore` for nested-repo discovery (#622, #699). Absent/empty (the
  39. * default) means `.gitignore` is fully respected: gitignored embedded repos
  40. * are never discovered or indexed (#970, #976).
  41. */
  42. includeIgnored?: string[];
  43. }
  44. /** Parsed, validated view of a project's `codegraph.json`. */
  45. interface ParsedConfig {
  46. extensions: Record<string, Language>;
  47. includeIgnored: string[];
  48. }
  49. interface CacheEntry {
  50. mtimeMs: number;
  51. config: ParsedConfig;
  52. }
  53. /**
  54. * Cache keyed by project root. The loader is called once per indexing/scan/sync
  55. * operation (and per watch event), so the mtime guard keeps repeat calls to one
  56. * `stat` while a single `codegraph.json` is in force. Keying by root keeps two
  57. * projects in the same process (the daemon / multi-project MCP server) isolated.
  58. */
  59. const cache = new Map<string, CacheEntry>();
  60. /** Shared frozen empties so the no-config path allocates nothing. */
  61. const EMPTY_EXTENSIONS: Record<string, Language> = Object.freeze({});
  62. const EMPTY_CONFIG: ParsedConfig = Object.freeze({
  63. extensions: EMPTY_EXTENSIONS,
  64. includeIgnored: Object.freeze([]) as unknown as string[],
  65. });
  66. /**
  67. * Normalize a user-provided extension key to the `.ext` lowercase form used by
  68. * the built-in map. Returns null for keys that can never match a real file
  69. * extension (so the caller warns and skips):
  70. * - empty / just "."
  71. * - multi-part (".d.ts") — language detection keys off the FINAL extension
  72. * only (`lastIndexOf('.')`), so a multi-dot key would never be consulted.
  73. * - anything containing a path separator.
  74. */
  75. function normalizeExtKey(raw: string): string | null {
  76. if (typeof raw !== 'string') return null;
  77. let ext = raw.trim().toLowerCase();
  78. if (!ext) return null;
  79. if (!ext.startsWith('.')) ext = '.' + ext;
  80. const body = ext.slice(1);
  81. if (!body) return null;
  82. if (body.includes('.') || body.includes('/') || body.includes('\\')) return null;
  83. return ext;
  84. }
  85. /**
  86. * Read + JSON-parse a `codegraph.json` once and return its validated view.
  87. * Every failure mode degrades to the zero-config default — a missing file, bad
  88. * JSON, or a typo'd value never throws.
  89. */
  90. function parseConfig(file: string): ParsedConfig {
  91. let raw: string;
  92. try {
  93. raw = fs.readFileSync(file, 'utf-8');
  94. } catch {
  95. return EMPTY_CONFIG;
  96. }
  97. let parsed: unknown;
  98. try {
  99. parsed = JSON.parse(raw);
  100. } catch (err) {
  101. logWarn(`Ignoring ${PROJECT_CONFIG_FILENAME}: not valid JSON`, {
  102. file,
  103. error: err instanceof Error ? err.message : String(err),
  104. });
  105. return EMPTY_CONFIG;
  106. }
  107. if (!parsed || typeof parsed !== 'object') return EMPTY_CONFIG;
  108. const extensions = extractExtensions(parsed, file);
  109. const includeIgnored = extractIncludeIgnored(parsed, file);
  110. if (extensions === EMPTY_EXTENSIONS && includeIgnored.length === 0) return EMPTY_CONFIG;
  111. return { extensions, includeIgnored };
  112. }
  113. /**
  114. * Validate the `extensions` map. Every failure mode degrades to "no overrides
  115. * from this entry" — a bad value or a typo'd language never throws.
  116. */
  117. function extractExtensions(parsed: object, file: string): Record<string, Language> {
  118. const exts = (parsed as ProjectConfig).extensions;
  119. if (!exts || typeof exts !== 'object' || Array.isArray(exts)) return EMPTY_EXTENSIONS;
  120. const out: Record<string, Language> = {};
  121. for (const [rawKey, rawVal] of Object.entries(exts)) {
  122. const key = normalizeExtKey(rawKey);
  123. if (!key) {
  124. logWarn(`Ignoring extension mapping in ${PROJECT_CONFIG_FILENAME}: "${rawKey}" is not a valid file extension`, { file });
  125. continue;
  126. }
  127. if (typeof rawVal !== 'string' || !isLanguageSupported(rawVal as Language)) {
  128. logWarn(`Ignoring extension "${rawKey}" in ${PROJECT_CONFIG_FILENAME}: "${String(rawVal)}" is not a supported language`, { file });
  129. continue;
  130. }
  131. out[key] = rawVal as Language;
  132. }
  133. return Object.keys(out).length > 0 ? out : EMPTY_EXTENSIONS;
  134. }
  135. /**
  136. * Validate the `includeIgnored` patterns: an array of non-empty gitignore-style
  137. * strings. A non-array value or a non-string/blank entry warns-and-skips; never
  138. * throws. Patterns are kept verbatim (trimmed) so they match exactly as a
  139. * `.gitignore` line would.
  140. */
  141. function extractIncludeIgnored(parsed: object, file: string): string[] {
  142. const raw = (parsed as ProjectConfig).includeIgnored;
  143. if (raw === undefined) return [];
  144. if (!Array.isArray(raw)) {
  145. logWarn(`Ignoring "includeIgnored" in ${PROJECT_CONFIG_FILENAME}: must be an array of gitignore-style patterns`, { file });
  146. return [];
  147. }
  148. const out: string[] = [];
  149. for (const entry of raw) {
  150. if (typeof entry !== 'string' || !entry.trim()) {
  151. logWarn(`Ignoring an "includeIgnored" entry in ${PROJECT_CONFIG_FILENAME}: every pattern must be a non-empty string`, { file });
  152. continue;
  153. }
  154. out.push(entry.trim());
  155. }
  156. return out;
  157. }
  158. /**
  159. * Load the parsed `codegraph.json` for a project, mtime-cached. A missing or
  160. * malformed file yields the zero-config default. One `stat` (and at most one
  161. * read/parse) while a single config file is in force, shared across every field.
  162. */
  163. function loadParsedConfig(rootDir: string): ParsedConfig {
  164. const file = path.join(rootDir, PROJECT_CONFIG_FILENAME);
  165. let mtimeMs: number;
  166. try {
  167. mtimeMs = fs.statSync(file).mtimeMs;
  168. } catch {
  169. // No config file — drop any stale cache entry and return the default.
  170. cache.delete(rootDir);
  171. return EMPTY_CONFIG;
  172. }
  173. const entry = cache.get(rootDir);
  174. if (entry && entry.mtimeMs === mtimeMs) return entry.config;
  175. const config = parseConfig(file);
  176. cache.set(rootDir, { mtimeMs, config });
  177. return config;
  178. }
  179. /**
  180. * Load the validated extension overrides for a project, mtime-cached.
  181. *
  182. * Returns a map of `.ext` → supported language id. The result merges on top of
  183. * the built-in extension map at the point of use (see `detectLanguage` /
  184. * `isSourceFile`), with these user mappings taking precedence. Returns an empty
  185. * map when there is no `codegraph.json` (the zero-config default).
  186. */
  187. export function loadExtensionOverrides(rootDir: string): Record<string, Language> {
  188. return loadParsedConfig(rootDir).extensions;
  189. }
  190. /**
  191. * Load the validated `includeIgnored` patterns for a project, mtime-cached.
  192. *
  193. * These name gitignored directories whose embedded git repositories should be
  194. * indexed despite `.gitignore` (#622, #699). An empty result — the zero-config
  195. * default — means `.gitignore` is fully respected: gitignored embedded repos
  196. * are never discovered or indexed (#970, #976).
  197. */
  198. export function loadIncludeIgnoredPatterns(rootDir: string): string[] {
  199. return loadParsedConfig(rootDir).includeIgnored;
  200. }
  201. /** Test/maintenance hook: forget cached config (e.g. after rewriting it in a test). */
  202. export function clearProjectConfigCache(): void {
  203. cache.clear();
  204. }