| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161 |
- import type { Node as SyntaxNode } from 'web-tree-sitter';
- import { getChildByField, getNodeText } from '../tree-sitter-helpers';
- import type { LanguageExtractor } from '../tree-sitter-types';
- /**
- * Find the function NAME's `qualified_identifier` (`Foo::bar`) inside a
- * declarator, skipping the `parameter_list` — a parameter with a qualified type
- * (`const std::string& x`) must NOT be mistaken for the method name. Without the
- * skip, a plain free function `std::string TableFileName(const std::string&...)`
- * was named `string` (from the parameter type), so calls to it never resolved
- * and its file looked like nothing depended on it.
- */
- function findDeclaratorQualifiedId(declarator: SyntaxNode): SyntaxNode | undefined {
- const queue: SyntaxNode[] = [declarator];
- while (queue.length > 0) {
- const current = queue.shift()!;
- if (current.type === 'qualified_identifier') return current;
- for (let i = 0; i < current.namedChildCount; i++) {
- const child = current.namedChild(i);
- // Don't descend into parameters or the trailing return type — their types
- // (`const std::string&`, `-> std::string`) aren't the function name.
- if (child && child.type !== 'parameter_list' && child.type !== 'trailing_return_type') {
- queue.push(child);
- }
- }
- }
- return undefined;
- }
- function extractCppQualifiedMethodName(node: SyntaxNode, source: string): string | undefined {
- const declarator = getChildByField(node, 'declarator');
- if (!declarator) return undefined;
- const qid = findDeclaratorQualifiedId(declarator);
- if (!qid) return undefined;
- const parts = getNodeText(qid, source).trim().split('::').filter(Boolean);
- return parts[parts.length - 1];
- }
- function extractCppReceiverType(node: SyntaxNode, source: string): string | undefined {
- const declarator = getChildByField(node, 'declarator');
- if (!declarator) return undefined;
- const qid = findDeclaratorQualifiedId(declarator);
- if (!qid) return undefined;
- const parts = getNodeText(qid, source).trim().split('::').filter(Boolean);
- return parts.length > 1 ? parts.slice(0, -1).join('::') : undefined;
- }
- export const cExtractor: LanguageExtractor = {
- functionTypes: ['function_definition'],
- classTypes: [],
- methodTypes: [],
- interfaceTypes: [],
- structTypes: ['struct_specifier'],
- enumTypes: ['enum_specifier'],
- enumMemberTypes: ['enumerator'],
- typeAliasTypes: ['type_definition'], // typedef
- importTypes: ['preproc_include'],
- callTypes: ['call_expression'],
- variableTypes: ['declaration'],
- nameField: 'declarator',
- bodyField: 'body',
- paramsField: 'parameters',
- resolveTypeAliasKind: (node, _source) => {
- // C typedef: `typedef enum { ... } name;` or `typedef struct { ... } name;`
- // The inner enum_specifier/struct_specifier is anonymous, but we want the typedef name
- // to become the enum/struct node name.
- for (let i = 0; i < node.namedChildCount; i++) {
- const child = node.namedChild(i);
- if (!child) continue;
- if (child.type === 'enum_specifier' && getChildByField(child, 'body')) return 'enum';
- if (child.type === 'struct_specifier' && getChildByField(child, 'body')) return 'struct';
- }
- return undefined;
- },
- extractImport: (node, source) => {
- const importText = source.substring(node.startIndex, node.endIndex).trim();
- // C includes: #include <stdio.h>, #include "myheader.h"
- const systemLib = node.namedChildren.find((c: SyntaxNode) => c.type === 'system_lib_string');
- if (systemLib) {
- return { moduleName: getNodeText(systemLib, source).replace(/^<|>$/g, ''), signature: importText };
- }
- const stringLiteral = node.namedChildren.find((c: SyntaxNode) => c.type === 'string_literal');
- if (stringLiteral) {
- const stringContent = stringLiteral.namedChildren.find((c: SyntaxNode) => c.type === 'string_content');
- if (stringContent) {
- return { moduleName: getNodeText(stringContent, source), signature: importText };
- }
- }
- return null;
- },
- };
- export const cppExtractor: LanguageExtractor = {
- functionTypes: ['function_definition'],
- classTypes: ['class_specifier'],
- methodTypes: ['function_definition'],
- interfaceTypes: [],
- structTypes: ['struct_specifier'],
- enumTypes: ['enum_specifier'],
- enumMemberTypes: ['enumerator'],
- typeAliasTypes: ['type_definition', 'alias_declaration'], // typedef and using
- importTypes: ['preproc_include'],
- callTypes: ['call_expression'],
- variableTypes: ['declaration'],
- nameField: 'declarator',
- bodyField: 'body',
- paramsField: 'parameters',
- resolveName: extractCppQualifiedMethodName,
- getReceiverType: extractCppReceiverType,
- getVisibility: (node) => {
- // Check for access specifier in parent
- const parent = node.parent;
- if (parent) {
- for (let i = 0; i < parent.childCount; i++) {
- const child = parent.child(i);
- if (child?.type === 'access_specifier') {
- const text = child.text;
- if (text.includes('public')) return 'public';
- if (text.includes('private')) return 'private';
- if (text.includes('protected')) return 'protected';
- }
- }
- }
- return undefined;
- },
- resolveTypeAliasKind: (node, _source) => {
- // C++ typedef: `typedef enum { ... } name;` or `typedef struct { ... } name;`
- for (let i = 0; i < node.namedChildCount; i++) {
- const child = node.namedChild(i);
- if (!child) continue;
- if (child.type === 'enum_specifier' && getChildByField(child, 'body')) return 'enum';
- if (child.type === 'struct_specifier' && getChildByField(child, 'body')) return 'struct';
- }
- return undefined;
- },
- isMisparsedFunction: (name) => {
- // C++ macros like NLOHMANN_JSON_NAMESPACE_BEGIN cause tree-sitter to misparse
- // namespace blocks as function_definitions (e.g. name = "namespace detail").
- // Also filter C++ keywords that tree-sitter occasionally misinterprets as
- // function/method names (e.g. switch statements inside macro-confused scopes).
- if (name.startsWith('namespace')) return true;
- const cppKeywords = ['switch', 'if', 'for', 'while', 'do', 'case', 'return'];
- return cppKeywords.includes(name);
- },
- extractImport: (node, source) => {
- const importText = source.substring(node.startIndex, node.endIndex).trim();
- // C++ includes: #include <iostream>, #include "myheader.h"
- const systemLib = node.namedChildren.find((c: SyntaxNode) => c.type === 'system_lib_string');
- if (systemLib) {
- return { moduleName: getNodeText(systemLib, source).replace(/^<|>$/g, ''), signature: importText };
- }
- const stringLiteral = node.namedChildren.find((c: SyntaxNode) => c.type === 'string_literal');
- if (stringLiteral) {
- const stringContent = stringLiteral.namedChildren.find((c: SyntaxNode) => c.type === 'string_content');
- if (stringContent) {
- return { moduleName: getNodeText(stringContent, source), signature: importText };
- }
- }
- return null;
- },
- };
|