drupal.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. /**
  2. * Drupal Framework Resolver
  3. *
  4. * Supports Drupal 8/9/10/11 (Composer-based projects). Drupal 7 is not supported.
  5. *
  6. * ## What this resolver does
  7. *
  8. * 1. **Detection** — reads composer.json and checks for any `drupal/*` dependency in
  9. * `require` or `require-dev`.
  10. *
  11. * 2. **Route extraction** — parses `*.routing.yml` files and emits `route` nodes for each
  12. * Drupal route, with `references` edges to the `_controller`, `_form`, or entity handler
  13. * class/method.
  14. *
  15. * 3. **Hook detection** — scans `.module`, `.install`, `.theme`, and `.inc` files for Drupal
  16. * hook implementations. Two strategies are used:
  17. * a. Docblock: `@Implements hook_X()` → precise, no false positives.
  18. * b. Name pattern: function `{moduleName}_{hookSuffix}()` → catches hooks without
  19. * docblocks but may produce false positives on helper functions.
  20. * Detected hooks emit an `UnresolvedRef` from the implementing function node to the
  21. * canonical `hook_X` name, linking implementations to the hook when `codegraph_callers`
  22. * is invoked.
  23. *
  24. * ## Design decisions (review in future iterations)
  25. *
  26. * - Hook graph resolution (v1): hook references are stored as UnresolvedRef pointing to the
  27. * canonical `hook_X` name. If Drupal core is indexed, these will resolve to core hook
  28. * definitions. Without core, they remain unresolved but are still searchable via
  29. * `codegraph_search("form_alter")`. Full hook-node creation (virtual nodes for every hook)
  30. * is deferred to a future iteration.
  31. *
  32. * - Services / plugins (out of scope for v1): `*.services.yml` service definitions and plugin
  33. * annotations (`@Block`, `@FormElement`, etc.) are not extracted. Add a TODO below when
  34. * ready to implement.
  35. *
  36. * - Twig templates (out of scope for v1): `.twig` files are tracked as file nodes but no
  37. * symbol extraction is performed (no tree-sitter Twig grammar). Implement when a Twig
  38. * grammar WASM is available.
  39. *
  40. * ## TODOs for future iterations
  41. *
  42. * - TODO: Extract service definitions from `*.services.yml` files (class → service-id edges).
  43. * - TODO: Extract plugin annotations (`@Block`, `@FormElement`, `@Field`, etc.) from PHP
  44. * docblocks and emit plugin nodes with references to the annotated class.
  45. * - TODO: Add Twig symbol extraction when a tree-sitter Twig grammar becomes available.
  46. * - TODO: Improve hook resolution: create virtual `hook_*` nodes so `codegraph_callers`
  47. * returns all implementations even when Drupal core is not indexed.
  48. */
  49. import { generateNodeId } from '../../extraction/tree-sitter-helpers';
  50. import { Node } from '../../types';
  51. import { FrameworkResolver, ResolutionContext, ResolvedRef, UnresolvedRef } from '../types';
  52. // ---------------------------------------------------------------------------
  53. // Helpers
  54. // ---------------------------------------------------------------------------
  55. /**
  56. * Parse the last PHP namespace segment from a FQCN like `\Drupal\mymodule\Controller\Foo`.
  57. * Returns `null` for strings that don't look like a FQCN.
  58. */
  59. function lastSegment(fqcn: string): string | null {
  60. const clean = fqcn.replace(/^\\+/, '').trim();
  61. if (!clean.includes('\\')) return null;
  62. const parts = clean.split('\\');
  63. return parts[parts.length - 1] ?? null;
  64. }
  65. /**
  66. * Derive the Drupal module name from a file path.
  67. * e.g. `web/modules/custom/my_module/my_module.module` → `my_module`
  68. */
  69. function moduleNameFromPath(filePath: string): string | null {
  70. const match = filePath.match(/\/([^/]+)\.[^./]+$/);
  71. return match ? match[1]! : null;
  72. }
  73. // ---------------------------------------------------------------------------
  74. // Route extraction helpers
  75. // ---------------------------------------------------------------------------
  76. /**
  77. * Extract route nodes and handler references from a Drupal `*.routing.yml` file.
  78. *
  79. * Drupal routing YAML format:
  80. *
  81. * route.name:
  82. * path: '/some/path'
  83. * defaults:
  84. * _controller: '\Drupal\module\Controller\MyController::method'
  85. * _form: '\Drupal\module\Form\MyForm'
  86. * _title: 'Page title'
  87. * requirements:
  88. * _permission: 'access content'
  89. * methods: [GET, POST] # optional
  90. */
  91. function extractDrupalRoutes(
  92. filePath: string,
  93. content: string
  94. ): { nodes: Node[]; references: UnresolvedRef[] } {
  95. const nodes: Node[] = [];
  96. const references: UnresolvedRef[] = [];
  97. const now = Date.now();
  98. const lines = content.split('\n');
  99. type PendingRoute = { name: string; lineNum: number };
  100. let pending: PendingRoute | null = null;
  101. let currentPath: string | null = null;
  102. let handlerRefs: string[] = [];
  103. let methods: string[] = [];
  104. const flushRoute = () => {
  105. if (!pending || !currentPath) return;
  106. const methodTag = methods.length > 0 ? ` [${methods.join(',')}]` : '';
  107. const routeNode: Node = {
  108. id: `route:${filePath}:${pending.lineNum}:${currentPath}`,
  109. kind: 'route',
  110. name: `${currentPath}${methodTag}`,
  111. qualifiedName: `${filePath}::${pending.name}`,
  112. filePath,
  113. startLine: pending.lineNum,
  114. endLine: pending.lineNum,
  115. startColumn: 0,
  116. endColumn: 0,
  117. language: 'yaml',
  118. updatedAt: now,
  119. };
  120. nodes.push(routeNode);
  121. for (const handler of handlerRefs) {
  122. references.push({
  123. fromNodeId: routeNode.id,
  124. referenceName: handler,
  125. referenceKind: 'references',
  126. line: pending.lineNum,
  127. column: 0,
  128. filePath,
  129. language: 'yaml',
  130. });
  131. }
  132. };
  133. for (let i = 0; i < lines.length; i++) {
  134. const line = lines[i]!;
  135. const trimmed = line.trim();
  136. if (!trimmed || trimmed.startsWith('#')) continue;
  137. // Top-level route name: no leading whitespace, ends with a colon (no value after)
  138. if (/^\S.*:\s*$/.test(line) && !/^\s/.test(line)) {
  139. flushRoute();
  140. pending = { name: trimmed.slice(0, -1).trim(), lineNum: i + 1 };
  141. currentPath = null;
  142. handlerRefs = [];
  143. methods = [];
  144. continue;
  145. }
  146. // path: '/some/path'
  147. const pathMatch = trimmed.match(/^path:\s*['"]?([^'"#\n]+?)['"]?\s*(?:#.*)?$/);
  148. if (pathMatch) {
  149. currentPath = pathMatch[1]!.trim();
  150. continue;
  151. }
  152. // _controller: '\Drupal\...\Class::method'
  153. const controllerMatch = trimmed.match(/^_controller:\s*['"]?([^'"#\n]+?)['"]?\s*(?:#.*)?$/);
  154. if (controllerMatch) {
  155. handlerRefs.push(controllerMatch[1]!.trim());
  156. continue;
  157. }
  158. // _form: '\Drupal\...\Form\MyForm'
  159. const formMatch = trimmed.match(/^_form:\s*['"]?([^'"#\n]+?)['"]?\s*(?:#.*)?$/);
  160. if (formMatch) {
  161. handlerRefs.push(formMatch[1]!.trim());
  162. continue;
  163. }
  164. // _entity_form / _entity_list / _entity_view: entity.type
  165. const entityMatch = trimmed.match(/^_(entity_form|entity_list|entity_view):\s*['"]?([^'"#\n]+?)['"]?\s*(?:#.*)?$/);
  166. if (entityMatch) {
  167. handlerRefs.push(entityMatch[2]!.trim());
  168. continue;
  169. }
  170. // methods: [GET, POST] or methods: [GET]
  171. const methodsMatch = trimmed.match(/^methods:\s*\[([^\]]+)\]/);
  172. if (methodsMatch) {
  173. methods = methodsMatch[1]!.split(',').map((m) => m.trim().toUpperCase()).filter(Boolean);
  174. continue;
  175. }
  176. }
  177. flushRoute();
  178. return { nodes, references };
  179. }
  180. // ---------------------------------------------------------------------------
  181. // Hook detection helpers
  182. // ---------------------------------------------------------------------------
  183. const HOOK_FILE_EXTENSIONS = ['.module', '.install', '.theme', '.inc'];
  184. function isDrupalHookFile(filePath: string): boolean {
  185. return HOOK_FILE_EXTENSIONS.some((ext) => filePath.endsWith(ext));
  186. }
  187. /**
  188. * Extract hook implementation references from a Drupal PHP file.
  189. *
  190. * Strategy A (primary): look for docblocks containing `Implements hook_X().`
  191. * followed immediately by the function definition. This is the Drupal coding
  192. * standard and is precise.
  193. *
  194. * Strategy B (fallback): for functions whose name starts with `{moduleName}_`,
  195. * treat the suffix as the hook name. Catches hooks without docblocks but may
  196. * produce false positives on non-hook helper functions.
  197. *
  198. * Each detected hook emits an UnresolvedRef from the implementing function node
  199. * (identified by computing the same ID tree-sitter would generate) to the
  200. * canonical hook name, e.g. `hook_form_alter`.
  201. */
  202. function extractDrupalHooks(
  203. filePath: string,
  204. content: string
  205. ): { nodes: Node[]; references: UnresolvedRef[] } {
  206. const references: UnresolvedRef[] = [];
  207. // Build a map of function name → 1-indexed line number for all top-level functions.
  208. // This mirrors tree-sitter's line numbering so we can reconstruct node IDs.
  209. const funcLineMap = new Map<string, number>();
  210. const funcDef = /^function\s+(\w+)\s*\(/gm;
  211. let fm: RegExpExecArray | null;
  212. while ((fm = funcDef.exec(content)) !== null) {
  213. const name = fm[1]!;
  214. if (!funcLineMap.has(name)) {
  215. // line = number of newlines before match start + 1
  216. funcLineMap.set(name, content.slice(0, fm.index).split('\n').length);
  217. }
  218. }
  219. const emitHookRef = (hookName: string, funcName: string) => {
  220. const lineNum = funcLineMap.get(funcName);
  221. if (lineNum === undefined) return;
  222. const nodeId = generateNodeId(filePath, 'function', funcName, lineNum);
  223. references.push({
  224. fromNodeId: nodeId,
  225. referenceName: hookName,
  226. referenceKind: 'references',
  227. line: lineNum,
  228. column: 0,
  229. filePath,
  230. language: 'php',
  231. });
  232. };
  233. // Strategy A: docblock `Implements hook_X().` followed by function definition.
  234. // The docblock and function may be separated by blank lines.
  235. const docblockPattern =
  236. /\/\*\*[\s\S]*?(?:@|\*\s+)Implements\s+(hook_\w+)\s*\(\)[\s\S]*?\*\/\s*\n(?:\s*\n)*function\s+(\w+)\s*\(/g;
  237. const docblockMatched = new Set<string>();
  238. let match: RegExpExecArray | null;
  239. while ((match = docblockPattern.exec(content)) !== null) {
  240. const [, hookName, funcName] = match;
  241. emitHookRef(hookName!, funcName!);
  242. docblockMatched.add(funcName!);
  243. }
  244. // Strategy B: fallback name-pattern matching for functions without docblocks.
  245. // Only applies to functions whose name starts with {moduleName}_ and that were
  246. // not already matched by Strategy A.
  247. const moduleName = moduleNameFromPath(filePath);
  248. if (moduleName) {
  249. const prefix = moduleName + '_';
  250. for (const [funcName] of funcLineMap) {
  251. if (docblockMatched.has(funcName)) continue;
  252. if (!funcName.startsWith(prefix)) continue;
  253. const hookSuffix = funcName.slice(prefix.length);
  254. if (!hookSuffix) continue;
  255. // Emit a reference to hook_{suffix} — the resolver will link it if the
  256. // hook is defined somewhere in the indexed graph (e.g. Drupal core).
  257. emitHookRef(`hook_${hookSuffix}`, funcName);
  258. }
  259. }
  260. return { nodes: [], references };
  261. }
  262. // ---------------------------------------------------------------------------
  263. // Resolver
  264. // ---------------------------------------------------------------------------
  265. export const drupalResolver: FrameworkResolver = {
  266. name: 'drupal',
  267. languages: ['php', 'yaml'],
  268. // Drupal route handlers are FQCNs (`\Drupal\…\Class::method`, the single-colon
  269. // controller-service form `\Drupal\…\Class:method`, or a bare `\…\FormClass`)
  270. // and hook refs are canonical `hook_*` names — none match a declared symbol, so
  271. // resolveOne's pre-filter would drop them before resolve() runs. Claim the
  272. // shapes resolve() handles (mirrors the Rails `controller#action` claim).
  273. claimsReference(name: string): boolean {
  274. return (
  275. name.startsWith('hook_') ||
  276. name.includes('\\') ||
  277. /^[A-Za-z_]\w*::?\w+$/.test(name)
  278. );
  279. },
  280. detect(context: ResolutionContext): boolean {
  281. // Primary: composer.json identifies a Drupal project/module/theme/profile.
  282. // A contrib module often has an EMPTY `require` (no `drupal/*` dep) but still
  283. // declares `"name": "drupal/<module>"` and `"type": "drupal-module"`, so check
  284. // those too — checking deps alone misses every standalone contrib module.
  285. const composer = context.readFile('composer.json');
  286. if (composer) {
  287. try {
  288. const json = JSON.parse(composer) as {
  289. name?: string;
  290. type?: string;
  291. require?: Record<string, string>;
  292. 'require-dev'?: Record<string, string>;
  293. };
  294. if (typeof json.name === 'string' && json.name.startsWith('drupal/')) return true;
  295. if (typeof json.type === 'string' && json.type.startsWith('drupal-')) return true;
  296. const deps = { ...json.require, ...(json['require-dev'] ?? {}) };
  297. if (Object.keys(deps).some((k) => k.startsWith('drupal/'))) return true;
  298. } catch {
  299. // malformed composer.json — fall through to file-based detection
  300. }
  301. }
  302. // Fallback (composer-less module, or a non-Drupal composer.json): the
  303. // unmistakable Drupal signature is a `*.info.yml` manifest alongside a
  304. // Drupal PHP/route file. Require both so a stray `.info.yml` elsewhere
  305. // doesn't trigger a false positive.
  306. const files = context.getAllFiles();
  307. const hasInfoYml = files.some((f) => f.endsWith('.info.yml'));
  308. if (!hasInfoYml) return false;
  309. return files.some(
  310. (f) =>
  311. f.endsWith('.routing.yml') ||
  312. f.endsWith('.module') ||
  313. f.endsWith('.install') ||
  314. f.endsWith('.theme')
  315. );
  316. },
  317. resolve(ref: UnresolvedRef, context: ResolutionContext): ResolvedRef | null {
  318. const name = ref.referenceName;
  319. // _controller: '\Drupal\module\...\ClassName::methodName' (double colon) or the
  320. // single-colon controller-service form '\Drupal\...\ClassName:methodName'.
  321. const controllerMatch = name.match(/^\\?(?:Drupal\\[^:]+\\)?([^\\:]+):{1,2}(\w+)$/);
  322. if (controllerMatch) {
  323. const [, className, methodName] = controllerMatch;
  324. const classNodes = context.getNodesByName(className!);
  325. for (const cls of classNodes) {
  326. if (cls.kind !== 'class') continue;
  327. const fileNodes = context.getNodesInFile(cls.filePath);
  328. const method = fileNodes.find((n) => n.kind === 'method' && n.name === methodName);
  329. if (method) {
  330. return { original: ref, targetNodeId: method.id, confidence: 0.9, resolvedBy: 'framework' };
  331. }
  332. return { original: ref, targetNodeId: cls.id, confidence: 0.7, resolvedBy: 'framework' };
  333. }
  334. }
  335. // _form / _entity_form: '\Drupal\module\...\ClassName' (bare FQCN, no method)
  336. if (name.includes('\\') && !name.includes(':')) {
  337. const className = lastSegment(name);
  338. if (className) {
  339. const classNodes = context.getNodesByName(className);
  340. const cls = classNodes.find((n) => n.kind === 'class');
  341. if (cls) {
  342. return { original: ref, targetNodeId: cls.id, confidence: 0.85, resolvedBy: 'framework' };
  343. }
  344. }
  345. }
  346. // hook_X — find any function whose name ends in _{hookSuffix} in a hook file
  347. if (name.startsWith('hook_')) {
  348. const hookSuffix = name.slice(5); // strip 'hook_'
  349. const candidates = context.getNodesByKind('function').filter(
  350. (n) => n.name.endsWith(`_${hookSuffix}`) && isDrupalHookFile(n.filePath)
  351. );
  352. if (candidates.length > 0) {
  353. return {
  354. original: ref,
  355. targetNodeId: candidates[0]!.id,
  356. confidence: 0.75,
  357. resolvedBy: 'framework',
  358. };
  359. }
  360. }
  361. return null;
  362. },
  363. extract(filePath: string, content: string): { nodes: Node[]; references: UnresolvedRef[] } {
  364. if (filePath.endsWith('.routing.yml')) {
  365. return extractDrupalRoutes(filePath, content);
  366. }
  367. if (isDrupalHookFile(filePath) || filePath.endsWith('.php')) {
  368. return extractDrupalHooks(filePath, content);
  369. }
  370. return { nodes: [], references: [] };
  371. },
  372. };