java.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. import type { Node as SyntaxNode } from 'web-tree-sitter';
  2. import { getNodeText, getChildByField } from '../tree-sitter-helpers';
  3. import type { ExtractorContext, LanguageExtractor } from '../tree-sitter-types';
  4. /**
  5. * Tree-sitter-java node types for a method's `type` (return) field that can
  6. * never be a method receiver — there's no class to chain a `.method()` on, so we
  7. * store no `returnType` for them.
  8. */
  9. const JAVA_NON_CLASS_RETURN_NODES = new Set([
  10. 'void_type',
  11. 'integral_type', // int, long, short, byte, char
  12. 'floating_point_type', // float, double
  13. 'boolean_type',
  14. ]);
  15. /**
  16. * Normalize a Java type node to the bare class name a chained
  17. * `foo.getThing().bar()` could be called on (the #645/#608 mechanism):
  18. * primitives/void/arrays yield undefined (no class to chain on), `List<Foo>`
  19. * is unwrapped to its base `List`, and a dotted package/outer-class qualifier
  20. * (`java.util.List`) is stripped to the simple name.
  21. */
  22. function normalizeJavaType(typeNode: SyntaxNode | null, source: string): string | undefined {
  23. if (!typeNode) return undefined;
  24. if (JAVA_NON_CLASS_RETURN_NODES.has(typeNode.type)) return undefined;
  25. // An array (`Foo[]`) isn't a receiver you call instance methods on.
  26. if (typeNode.type === 'array_type') return undefined;
  27. // Strip type arguments (`List<Foo>` → `List`) — the chain resolves on the base.
  28. const raw = getNodeText(typeNode, source).trim().replace(/<[^>]*>/g, '');
  29. // Strip a dotted package / outer-class qualifier (`java.util.List` → `List`).
  30. const last = raw.split('.').pop()?.trim();
  31. if (!last || !/^[A-Za-z_]\w*$/.test(last)) return undefined;
  32. return last;
  33. }
  34. /**
  35. * A Java method's declared return type. Reads the `type` field; constructors
  36. * (no `type` field) → undefined.
  37. */
  38. function extractJavaReturnType(node: SyntaxNode, source: string): string | undefined {
  39. return normalizeJavaType(getChildByField(node, 'type'), source);
  40. }
  41. // ---------------------------------------------------------------------------
  42. // Lombok-generated member synthesis (#912)
  43. // ---------------------------------------------------------------------------
  44. // Lombok generates methods at compile time, so they never appear in the source
  45. // AST and static extraction misses them — `bean.getX()`, `bean.setX()`,
  46. // `Bean.builder()`, and `log.info(...)` calls then resolve to nothing and call
  47. // chains break silently. We synthesize the mechanical, well-documented ones.
  48. /** Lombok logging annotations — all generate a field named `log` by default. */
  49. const LOMBOK_LOG_ANNOTATIONS = new Set([
  50. 'Slf4j', 'Log4j', 'Log4j2', 'Log', 'CommonsLog', 'JBossLog', 'Flogger', 'XSlf4j', 'CustomLog',
  51. ]);
  52. /** Simple names of every annotation in a node's `modifiers` child (`@lombok.Getter` → `Getter`). */
  53. function lombokAnnotationNames(node: SyntaxNode): Set<string> {
  54. const names = new Set<string>();
  55. const modifiers = node.namedChildren.find((c: SyntaxNode) => c.type === 'modifiers');
  56. if (!modifiers) return names;
  57. for (const child of modifiers.namedChildren) {
  58. if (child.type === 'marker_annotation' || child.type === 'annotation') {
  59. const nameNode = getChildByField(child, 'name');
  60. const simple = nameNode ? nameNode.text.trim().split('.').pop() : undefined;
  61. if (simple) names.add(simple);
  62. }
  63. }
  64. return names;
  65. }
  66. /** Text of a declaration's `modifiers` child (keyword modifiers are anonymous, so match on text). */
  67. function modifierTextOf(node: SyntaxNode): string {
  68. const modifiers = node.namedChildren.find((c: SyntaxNode) => c.type === 'modifiers');
  69. return modifiers ? modifiers.text : '';
  70. }
  71. function capitalizeJava(name: string): string {
  72. return name ? name.charAt(0).toUpperCase() + name.slice(1) : name;
  73. }
  74. /** Lombok getter name: `getX`, or `isX` for a primitive boolean (keeping an existing `isFoo` field name). */
  75. function lombokGetterName(fieldName: string, isBooleanPrimitive: boolean): string {
  76. if (isBooleanPrimitive) {
  77. return /^is[A-Z]/.test(fieldName) ? fieldName : 'is' + capitalizeJava(fieldName);
  78. }
  79. return 'get' + capitalizeJava(fieldName);
  80. }
  81. /** Lombok setter name: `setX` (a primitive boolean field `isFoo` sets via `setFoo`). */
  82. function lombokSetterName(fieldName: string, isBooleanPrimitive: boolean): string {
  83. const base = isBooleanPrimitive && /^is[A-Z]/.test(fieldName) ? fieldName.slice(2) : fieldName;
  84. return 'set' + capitalizeJava(base);
  85. }
  86. /**
  87. * Synthesize the members Lombok generates at compile time. Covers the common,
  88. * mechanical annotations:
  89. *
  90. * @Getter / @Setter (class- or field-level) → getX()/isX(), setX()
  91. * @Data → getters + setters (non-final)
  92. * + equals/hashCode/toString
  93. * @Value → getters + equals/hashCode/toString (immutable, no setters)
  94. * @Builder / @SuperBuilder → static builder()
  95. * @ToString / @EqualsAndHashCode → those methods
  96. * @Slf4j and the other @Log* annotations → the `log` field
  97. *
  98. * Each node is anchored on the field's (or class's) name token — a leaf, so it
  99. * pulls in no spurious value-reference scope — carries a `lombok` decorator and
  100. * a docstring naming the generating annotation, so it reads as generated rather
  101. * than hand-written. Deliberately NOT synthesized: constructors (`new X()`
  102. * already links to the class via `instantiates`, and overloaded
  103. * @NoArgs/@AllArgs/@RequiredArgs ctors share a name → would collide on a
  104. * synthetic node id), the fluent builder setters, and `@Accessors(fluent=true)`
  105. * naming. A member the source already declares is never overridden.
  106. */
  107. function synthesizeLombokMembers(classNode: SyntaxNode, ctx: ExtractorContext): void {
  108. const classAnns = lombokAnnotationNames(classNode);
  109. const classGetter = classAnns.has('Getter');
  110. const classSetter = classAnns.has('Setter');
  111. const isData = classAnns.has('Data');
  112. const isValue = classAnns.has('Value');
  113. const hasBuilder = classAnns.has('Builder') || classAnns.has('SuperBuilder');
  114. const hasToString = isData || isValue || classAnns.has('ToString');
  115. const hasEquals = isData || isValue || classAnns.has('EqualsAndHashCode');
  116. const logAnn = [...classAnns].find((a) => LOMBOK_LOG_ANNOTATIONS.has(a));
  117. const body = getChildByField(classNode, 'body');
  118. if (!body) return;
  119. const fields = body.namedChildren.filter((c: SyntaxNode) => c.type === 'field_declaration');
  120. // Leave immediately when nothing Lombok is present, so a non-Lombok class
  121. // pays nothing beyond one scan of its direct field declarations (and an
  122. // annotated class skips even that — this hook runs for every Java class).
  123. const classHasLombok =
  124. classGetter || classSetter || isData || isValue || hasBuilder || hasToString || hasEquals || !!logAnn;
  125. if (!classHasLombok && !fields.some((f: SyntaxNode) => lombokAnnotationNames(f).size > 0)) {
  126. return;
  127. }
  128. // Members already declared directly in this class. Lombok never overrides an
  129. // explicit member, so we skip a name the source already has. Methods and
  130. // fields are tracked separately: they're distinct namespaces in Java (a
  131. // boolean field `isRunning` and its generated getter `isRunning()` coexist),
  132. // and the node id is keyed by kind so they never actually collide.
  133. const classId = ctx.nodeStack[ctx.nodeStack.length - 1];
  134. const classRec = ctx.nodes.find((n) => n.id === classId);
  135. const classQN = classRec?.qualifiedName;
  136. const takenMethods = new Set<string>();
  137. const takenFields = new Set<string>();
  138. if (classQN) {
  139. for (const n of ctx.nodes) {
  140. if (n.filePath === ctx.filePath && n.qualifiedName === `${classQN}::${n.name}`) {
  141. if (n.kind === 'method' || n.kind === 'function') takenMethods.add(n.name);
  142. else if (n.kind === 'field' || n.kind === 'variable' || n.kind === 'constant' || n.kind === 'property') {
  143. takenFields.add(n.name);
  144. }
  145. }
  146. }
  147. }
  148. const classNameNode = getChildByField(classNode, 'name') ?? classNode;
  149. const className = classRec?.name ?? getNodeText(classNameNode, ctx.source).trim();
  150. const emitMethod = (
  151. name: string,
  152. anchor: SyntaxNode,
  153. signature: string,
  154. fromAnnotation: string,
  155. extra: { returnType?: string; isStatic?: boolean } = {}
  156. ): void => {
  157. if (!name || takenMethods.has(name)) return;
  158. takenMethods.add(name);
  159. ctx.createNode('method', name, anchor, {
  160. visibility: 'public',
  161. signature,
  162. docstring: `Lombok-generated (${fromAnnotation})`,
  163. decorators: ['lombok'],
  164. isStatic: extra.isStatic,
  165. returnType: extra.returnType,
  166. });
  167. };
  168. // Per-field getters/setters.
  169. for (const fd of fields) {
  170. const mods = modifierTextOf(fd);
  171. if (/\bstatic\b/.test(mods)) continue; // Lombok skips static fields.
  172. const isFinal = /\bfinal\b/.test(mods);
  173. const fieldAnns = lombokAnnotationNames(fd);
  174. const fieldGetter = fieldAnns.has('Getter');
  175. const fieldSetter = fieldAnns.has('Setter');
  176. const wantGetter = classGetter || isData || isValue || fieldGetter;
  177. const wantSetter = (classSetter || isData || fieldSetter) && !isFinal;
  178. if (!wantGetter && !wantSetter) continue;
  179. const typeNode = getChildByField(fd, 'type');
  180. const typeText = typeNode ? getNodeText(typeNode, ctx.source).trim() : 'Object';
  181. const isBooleanPrimitive = typeNode?.type === 'boolean_type';
  182. const returnType = normalizeJavaType(typeNode, ctx.source);
  183. for (const vd of fd.namedChildren) {
  184. if (vd.type !== 'variable_declarator') continue;
  185. const nameNode = getChildByField(vd, 'name');
  186. if (!nameNode) continue;
  187. const fieldName = getNodeText(nameNode, ctx.source).trim();
  188. if (!fieldName) continue;
  189. if (wantGetter) {
  190. const g = lombokGetterName(fieldName, isBooleanPrimitive);
  191. emitMethod(g, nameNode, `${typeText} ${g}()`,
  192. fieldGetter ? '@Getter' : isData ? '@Data' : isValue ? '@Value' : '@Getter',
  193. { returnType });
  194. }
  195. if (wantSetter) {
  196. const s = lombokSetterName(fieldName, isBooleanPrimitive);
  197. emitMethod(s, nameNode, `void ${s}(${typeText} ${fieldName})`,
  198. fieldSetter ? '@Setter' : isData ? '@Data' : '@Setter');
  199. }
  200. }
  201. }
  202. // Class-level synthesized methods.
  203. if (hasBuilder) {
  204. emitMethod('builder', classNameNode, `static ${className}.${className}Builder builder()`,
  205. classAnns.has('SuperBuilder') ? '@SuperBuilder' : '@Builder',
  206. { isStatic: true, returnType: `${className}Builder` });
  207. }
  208. if (hasToString) {
  209. emitMethod('toString', classNameNode, 'String toString()',
  210. isData ? '@Data' : isValue ? '@Value' : '@ToString');
  211. }
  212. if (hasEquals) {
  213. const from = isData ? '@Data' : isValue ? '@Value' : '@EqualsAndHashCode';
  214. emitMethod('equals', classNameNode, 'boolean equals(Object o)', from);
  215. emitMethod('hashCode', classNameNode, 'int hashCode()', from);
  216. }
  217. // Logger field (@Slf4j and friends).
  218. if (logAnn && !takenFields.has('log')) {
  219. takenFields.add('log');
  220. ctx.createNode('field', 'log', classNameNode, {
  221. visibility: 'private',
  222. isStatic: true,
  223. signature: 'Logger log',
  224. docstring: `Lombok-generated (@${logAnn})`,
  225. decorators: ['lombok'],
  226. });
  227. }
  228. }
  229. export const javaExtractor: LanguageExtractor = {
  230. functionTypes: [],
  231. classTypes: ['class_declaration'],
  232. methodTypes: ['method_declaration', 'constructor_declaration'],
  233. // `annotation_type_declaration` is `@interface Foo { … }` — an annotation
  234. // definition. Without it, annotation types (`@SerializedName`, `@GetMapping`,
  235. // JPA/Spring annotations) aren't nodes, so the `@Foo` usages that DO get
  236. // extracted can't resolve and the annotation file shows zero dependents.
  237. interfaceTypes: ['interface_declaration', 'annotation_type_declaration'],
  238. structTypes: [],
  239. enumTypes: ['enum_declaration'],
  240. enumMemberTypes: ['enum_constant'],
  241. typeAliasTypes: [],
  242. importTypes: ['import_declaration'],
  243. callTypes: ['method_invocation'],
  244. variableTypes: ['local_variable_declaration'],
  245. fieldTypes: ['field_declaration'],
  246. nameField: 'name',
  247. bodyField: 'body',
  248. paramsField: 'parameters',
  249. returnField: 'type',
  250. getReturnType: extractJavaReturnType,
  251. synthesizeMembers: synthesizeLombokMembers,
  252. getSignature: (node, source) => {
  253. const params = getChildByField(node, 'parameters');
  254. const returnType = getChildByField(node, 'type');
  255. if (!params) return undefined;
  256. const paramsText = getNodeText(params, source);
  257. return returnType ? getNodeText(returnType, source) + ' ' + paramsText : paramsText;
  258. },
  259. getVisibility: (node) => {
  260. for (let i = 0; i < node.childCount; i++) {
  261. const child = node.child(i);
  262. if (child?.type === 'modifiers') {
  263. const text = child.text;
  264. if (text.includes('public')) return 'public';
  265. if (text.includes('private')) return 'private';
  266. if (text.includes('protected')) return 'protected';
  267. }
  268. }
  269. return undefined;
  270. },
  271. isStatic: (node) => {
  272. for (let i = 0; i < node.childCount; i++) {
  273. const child = node.child(i);
  274. if (child?.type === 'modifiers' && child.text.includes('static')) {
  275. return true;
  276. }
  277. }
  278. return false;
  279. },
  280. // A `static final` field is a Java constant (`MAX_ITEMS`, lookup tables,
  281. // shared config). Drives `constant` kind so value-reference edges target it;
  282. // instance / `final`-only / `static`-only fields stay mutable `field`s.
  283. isConst: (node) => {
  284. for (let i = 0; i < node.childCount; i++) {
  285. const child = node.child(i);
  286. if (child?.type === 'modifiers') {
  287. const text = child.text;
  288. return /\bstatic\b/.test(text) && /\bfinal\b/.test(text);
  289. }
  290. }
  291. return false;
  292. },
  293. extractImport: (node, source) => {
  294. const importText = source.substring(node.startIndex, node.endIndex).trim();
  295. const scopedId = node.namedChildren.find((c: SyntaxNode) => c.type === 'scoped_identifier');
  296. if (scopedId) {
  297. const moduleName = source.substring(scopedId.startIndex, scopedId.endIndex);
  298. return { moduleName, signature: importText };
  299. }
  300. return null;
  301. },
  302. packageTypes: ['package_declaration'],
  303. extractPackage: (node, source) => {
  304. // package_declaration → scoped_identifier or identifier (single-segment)
  305. const id = node.namedChildren.find(
  306. (c: SyntaxNode) => c.type === 'scoped_identifier' || c.type === 'identifier'
  307. );
  308. return id ? source.substring(id.startIndex, id.endIndex).trim() : null;
  309. },
  310. };