import type { Node as SyntaxNode } from 'web-tree-sitter'; import { getChildByField, getNodeText } from '../tree-sitter-helpers'; import type { LanguageExtractor } from '../tree-sitter-types'; /** * Find the function NAME's `qualified_identifier` (`Foo::bar`) inside a * declarator, skipping the `parameter_list` — a parameter with a qualified type * (`const std::string& x`) must NOT be mistaken for the method name. Without the * skip, a plain free function `std::string TableFileName(const std::string&...)` * was named `string` (from the parameter type), so calls to it never resolved * and its file looked like nothing depended on it. */ function findDeclaratorQualifiedId(declarator: SyntaxNode): SyntaxNode | undefined { const queue: SyntaxNode[] = [declarator]; while (queue.length > 0) { const current = queue.shift()!; if (current.type === 'qualified_identifier') return current; for (let i = 0; i < current.namedChildCount; i++) { const child = current.namedChild(i); // Don't descend into parameters or the trailing return type — their types // (`const std::string&`, `-> std::string`) aren't the function name. if (child && child.type !== 'parameter_list' && child.type !== 'trailing_return_type') { queue.push(child); } } } return undefined; } function extractCppQualifiedMethodName(node: SyntaxNode, source: string): string | undefined { const declarator = getChildByField(node, 'declarator'); if (!declarator) return undefined; const qid = findDeclaratorQualifiedId(declarator); if (!qid) return undefined; const parts = getNodeText(qid, source).trim().split('::').filter(Boolean); return parts[parts.length - 1]; } function extractCppReceiverType(node: SyntaxNode, source: string): string | undefined { const declarator = getChildByField(node, 'declarator'); if (!declarator) return undefined; const qid = findDeclaratorQualifiedId(declarator); if (!qid) return undefined; const parts = getNodeText(qid, source).trim().split('::').filter(Boolean); return parts.length > 1 ? parts.slice(0, -1).join('::') : undefined; } export const cExtractor: LanguageExtractor = { functionTypes: ['function_definition'], classTypes: [], methodTypes: [], interfaceTypes: [], structTypes: ['struct_specifier'], enumTypes: ['enum_specifier'], enumMemberTypes: ['enumerator'], typeAliasTypes: ['type_definition'], // typedef importTypes: ['preproc_include'], callTypes: ['call_expression'], variableTypes: ['declaration'], nameField: 'declarator', bodyField: 'body', paramsField: 'parameters', resolveTypeAliasKind: (node, _source) => { // C typedef: `typedef enum { ... } name;` or `typedef struct { ... } name;` // The inner enum_specifier/struct_specifier is anonymous, but we want the typedef name // to become the enum/struct node name. for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (!child) continue; if (child.type === 'enum_specifier' && getChildByField(child, 'body')) return 'enum'; if (child.type === 'struct_specifier' && getChildByField(child, 'body')) return 'struct'; } return undefined; }, extractImport: (node, source) => { const importText = source.substring(node.startIndex, node.endIndex).trim(); // C includes: #include , #include "myheader.h" const systemLib = node.namedChildren.find((c: SyntaxNode) => c.type === 'system_lib_string'); if (systemLib) { return { moduleName: getNodeText(systemLib, source).replace(/^<|>$/g, ''), signature: importText }; } const stringLiteral = node.namedChildren.find((c: SyntaxNode) => c.type === 'string_literal'); if (stringLiteral) { const stringContent = stringLiteral.namedChildren.find((c: SyntaxNode) => c.type === 'string_content'); if (stringContent) { return { moduleName: getNodeText(stringContent, source), signature: importText }; } } return null; }, }; export const cppExtractor: LanguageExtractor = { functionTypes: ['function_definition'], classTypes: ['class_specifier'], methodTypes: ['function_definition'], interfaceTypes: [], structTypes: ['struct_specifier'], enumTypes: ['enum_specifier'], enumMemberTypes: ['enumerator'], typeAliasTypes: ['type_definition', 'alias_declaration'], // typedef and using importTypes: ['preproc_include'], callTypes: ['call_expression'], variableTypes: ['declaration'], nameField: 'declarator', bodyField: 'body', paramsField: 'parameters', resolveName: extractCppQualifiedMethodName, getReceiverType: extractCppReceiverType, getVisibility: (node) => { // Check for access specifier in parent const parent = node.parent; if (parent) { for (let i = 0; i < parent.childCount; i++) { const child = parent.child(i); if (child?.type === 'access_specifier') { const text = child.text; if (text.includes('public')) return 'public'; if (text.includes('private')) return 'private'; if (text.includes('protected')) return 'protected'; } } } return undefined; }, resolveTypeAliasKind: (node, _source) => { // C++ typedef: `typedef enum { ... } name;` or `typedef struct { ... } name;` for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (!child) continue; if (child.type === 'enum_specifier' && getChildByField(child, 'body')) return 'enum'; if (child.type === 'struct_specifier' && getChildByField(child, 'body')) return 'struct'; } return undefined; }, isMisparsedFunction: (name) => { // C++ macros like NLOHMANN_JSON_NAMESPACE_BEGIN cause tree-sitter to misparse // namespace blocks as function_definitions (e.g. name = "namespace detail"). // Also filter C++ keywords that tree-sitter occasionally misinterprets as // function/method names (e.g. switch statements inside macro-confused scopes). if (name.startsWith('namespace')) return true; const cppKeywords = ['switch', 'if', 'for', 'while', 'do', 'case', 'return']; return cppKeywords.includes(name); }, extractImport: (node, source) => { const importText = source.substring(node.startIndex, node.endIndex).trim(); // C++ includes: #include , #include "myheader.h" const systemLib = node.namedChildren.find((c: SyntaxNode) => c.type === 'system_lib_string'); if (systemLib) { return { moduleName: getNodeText(systemLib, source).replace(/^<|>$/g, ''), signature: importText }; } const stringLiteral = node.namedChildren.find((c: SyntaxNode) => c.type === 'string_literal'); if (stringLiteral) { const stringContent = stringLiteral.namedChildren.find((c: SyntaxNode) => c.type === 'string_content'); if (stringContent) { return { moduleName: getNodeText(stringContent, source), signature: importText }; } } return null; }, };