c-cpp.ts 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. import type { Node as SyntaxNode } from 'web-tree-sitter';
  2. import { getChildByField, getNodeText } from '../tree-sitter-helpers';
  3. import type { LanguageExtractor } from '../tree-sitter-types';
  4. /**
  5. * Find the function NAME's `qualified_identifier` (`Foo::bar`) inside a
  6. * declarator, skipping the `parameter_list` — a parameter with a qualified type
  7. * (`const std::string& x`) must NOT be mistaken for the method name. Without the
  8. * skip, a plain free function `std::string TableFileName(const std::string&...)`
  9. * was named `string` (from the parameter type), so calls to it never resolved
  10. * and its file looked like nothing depended on it.
  11. */
  12. function findDeclaratorQualifiedId(declarator: SyntaxNode): SyntaxNode | undefined {
  13. const queue: SyntaxNode[] = [declarator];
  14. while (queue.length > 0) {
  15. const current = queue.shift()!;
  16. if (current.type === 'qualified_identifier') return current;
  17. for (let i = 0; i < current.namedChildCount; i++) {
  18. const child = current.namedChild(i);
  19. // Don't descend into parameters or the trailing return type — their types
  20. // (`const std::string&`, `-> std::string`) aren't the function name.
  21. if (child && child.type !== 'parameter_list' && child.type !== 'trailing_return_type') {
  22. queue.push(child);
  23. }
  24. }
  25. }
  26. return undefined;
  27. }
  28. function extractCppQualifiedMethodName(node: SyntaxNode, source: string): string | undefined {
  29. const declarator = getChildByField(node, 'declarator');
  30. if (!declarator) return undefined;
  31. const qid = findDeclaratorQualifiedId(declarator);
  32. if (!qid) return undefined;
  33. const parts = getNodeText(qid, source).trim().split('::').filter(Boolean);
  34. return parts[parts.length - 1];
  35. }
  36. function extractCppReceiverType(node: SyntaxNode, source: string): string | undefined {
  37. const declarator = getChildByField(node, 'declarator');
  38. if (!declarator) return undefined;
  39. const qid = findDeclaratorQualifiedId(declarator);
  40. if (!qid) return undefined;
  41. const parts = getNodeText(qid, source).trim().split('::').filter(Boolean);
  42. return parts.length > 1 ? parts.slice(0, -1).join('::') : undefined;
  43. }
  44. export const cExtractor: LanguageExtractor = {
  45. functionTypes: ['function_definition'],
  46. classTypes: [],
  47. methodTypes: [],
  48. interfaceTypes: [],
  49. structTypes: ['struct_specifier'],
  50. enumTypes: ['enum_specifier'],
  51. enumMemberTypes: ['enumerator'],
  52. typeAliasTypes: ['type_definition'], // typedef
  53. importTypes: ['preproc_include'],
  54. callTypes: ['call_expression'],
  55. variableTypes: ['declaration'],
  56. nameField: 'declarator',
  57. bodyField: 'body',
  58. paramsField: 'parameters',
  59. resolveTypeAliasKind: (node, _source) => {
  60. // C typedef: `typedef enum { ... } name;` or `typedef struct { ... } name;`
  61. // The inner enum_specifier/struct_specifier is anonymous, but we want the typedef name
  62. // to become the enum/struct node name.
  63. for (let i = 0; i < node.namedChildCount; i++) {
  64. const child = node.namedChild(i);
  65. if (!child) continue;
  66. if (child.type === 'enum_specifier' && getChildByField(child, 'body')) return 'enum';
  67. if (child.type === 'struct_specifier' && getChildByField(child, 'body')) return 'struct';
  68. }
  69. return undefined;
  70. },
  71. extractImport: (node, source) => {
  72. const importText = source.substring(node.startIndex, node.endIndex).trim();
  73. // C includes: #include <stdio.h>, #include "myheader.h"
  74. const systemLib = node.namedChildren.find((c: SyntaxNode) => c.type === 'system_lib_string');
  75. if (systemLib) {
  76. return { moduleName: getNodeText(systemLib, source).replace(/^<|>$/g, ''), signature: importText };
  77. }
  78. const stringLiteral = node.namedChildren.find((c: SyntaxNode) => c.type === 'string_literal');
  79. if (stringLiteral) {
  80. const stringContent = stringLiteral.namedChildren.find((c: SyntaxNode) => c.type === 'string_content');
  81. if (stringContent) {
  82. return { moduleName: getNodeText(stringContent, source), signature: importText };
  83. }
  84. }
  85. return null;
  86. },
  87. };
  88. export const cppExtractor: LanguageExtractor = {
  89. functionTypes: ['function_definition'],
  90. classTypes: ['class_specifier'],
  91. methodTypes: ['function_definition'],
  92. interfaceTypes: [],
  93. structTypes: ['struct_specifier'],
  94. enumTypes: ['enum_specifier'],
  95. enumMemberTypes: ['enumerator'],
  96. typeAliasTypes: ['type_definition', 'alias_declaration'], // typedef and using
  97. importTypes: ['preproc_include'],
  98. callTypes: ['call_expression'],
  99. variableTypes: ['declaration'],
  100. nameField: 'declarator',
  101. bodyField: 'body',
  102. paramsField: 'parameters',
  103. resolveName: extractCppQualifiedMethodName,
  104. getReceiverType: extractCppReceiverType,
  105. getVisibility: (node) => {
  106. // Check for access specifier in parent
  107. const parent = node.parent;
  108. if (parent) {
  109. for (let i = 0; i < parent.childCount; i++) {
  110. const child = parent.child(i);
  111. if (child?.type === 'access_specifier') {
  112. const text = child.text;
  113. if (text.includes('public')) return 'public';
  114. if (text.includes('private')) return 'private';
  115. if (text.includes('protected')) return 'protected';
  116. }
  117. }
  118. }
  119. return undefined;
  120. },
  121. resolveTypeAliasKind: (node, _source) => {
  122. // C++ typedef: `typedef enum { ... } name;` or `typedef struct { ... } name;`
  123. for (let i = 0; i < node.namedChildCount; i++) {
  124. const child = node.namedChild(i);
  125. if (!child) continue;
  126. if (child.type === 'enum_specifier' && getChildByField(child, 'body')) return 'enum';
  127. if (child.type === 'struct_specifier' && getChildByField(child, 'body')) return 'struct';
  128. }
  129. return undefined;
  130. },
  131. isMisparsedFunction: (name) => {
  132. // C++ macros like NLOHMANN_JSON_NAMESPACE_BEGIN cause tree-sitter to misparse
  133. // namespace blocks as function_definitions (e.g. name = "namespace detail").
  134. // Also filter C++ keywords that tree-sitter occasionally misinterprets as
  135. // function/method names (e.g. switch statements inside macro-confused scopes).
  136. if (name.startsWith('namespace')) return true;
  137. const cppKeywords = ['switch', 'if', 'for', 'while', 'do', 'case', 'return'];
  138. return cppKeywords.includes(name);
  139. },
  140. extractImport: (node, source) => {
  141. const importText = source.substring(node.startIndex, node.endIndex).trim();
  142. // C++ includes: #include <iostream>, #include "myheader.h"
  143. const systemLib = node.namedChildren.find((c: SyntaxNode) => c.type === 'system_lib_string');
  144. if (systemLib) {
  145. return { moduleName: getNodeText(systemLib, source).replace(/^<|>$/g, ''), signature: importText };
  146. }
  147. const stringLiteral = node.namedChildren.find((c: SyntaxNode) => c.type === 'string_literal');
  148. if (stringLiteral) {
  149. const stringContent = stringLiteral.namedChildren.find((c: SyntaxNode) => c.type === 'string_content');
  150. if (stringContent) {
  151. return { moduleName: getNodeText(stringContent, source), signature: importText };
  152. }
  153. }
  154. return null;
  155. },
  156. };