/** * Generated-file detection for symbol-disambiguation down-ranking. * * When a query like "Send" matches 17 symbols across protobuf scaffolding, * test mocks, and the hand-written implementation, the FTS ranker often * surfaces the generated stubs first because their names are identical * to the implementation's name (validated empirically on cosmos-sdk — * see project_go_multi_module_audit memory). Generated stubs frequently * have no body to trace from, so the agent ends up reading source anyway. * * This helper is a pure path-based classifier consulted at disambiguation * time (findSymbol / findAllSymbols / codegraph_search formatting), NOT * a hard filter — generated nodes are still in the graph and remain * reachable; they just rank LAST when there's a real implementation * with the same name. * * Scope: suffix patterns only. Most generated files follow the * `..` convention (`.pb.go`, `_grpc.pb.go`, * `.g.dart`, `_pb2.py`), and that covers ~all of what we saw in the * Go audit. A future addition would be scanning for the canonical * `// Code generated by` header during extraction, for the rare files * that defy the suffix convention. */ const GENERATED_PATTERNS: ReadonlyArray = [ // Go — protobuf / gRPC / pulsar /\.pb\.go$/, /\.pulsar\.go$/, /_grpc\.pb\.go$/, // Go — mockgen output. Default emits `mock_.go`; many projects // (cosmos-sdk uses `expected_*_mocks.go`) rename to `*_mock.go` / // `*_mocks.go`. Matching either suffix catches both conventions // without false-positive risk on hand-written sources. /_mock\.go$/, /_mocks\.go$/, /^mock_[^/]+\.go$/, // TypeScript / JavaScript — common codegen suffixes (Apollo / GraphQL // codegen, Prisma, Hasura, ts-proto, gRPC-web, swagger-codegen). /\.generated\.[jt]sx?$/, /\.gen\.[jt]sx?$/, /\.pb\.[jt]s$/, /_pb\.[jt]s$/, /_grpc_pb\.[jt]s$/, // Python — protobuf / gRPC / openapi-codegen /_pb2(_grpc)?\.py$/, /_pb2\.pyi$/, // C++ — protobuf /\.pb\.(cc|h)$/, // C# — protobuf / gRPC (protoc-gen-csharp puts output under obj/ but // many projects also commit *.g.cs and *Grpc.cs siblings) /\.g\.cs$/, /Grpc\.cs$/, // Java — protobuf / gRPC: protoc-gen-java emits `*OuterClass.java`, // protoc-gen-grpc-java emits `*Grpc.java`. The XxxImplBase abstract // class lives inside Xxx*Grpc.java. /OuterClass\.java$/, /Grpc\.java$/, // Swift — protobuf /\.pb\.swift$/, // Dart — build_runner / freezed / json_serializable / chopper /\.g\.dart$/, /\.freezed\.dart$/, /\.pb\.dart$/, /\.pbgrpc\.dart$/, /\.chopper\.dart$/, // Rust — common build.rs OUT_DIR outputs are usually outside the source // tree, but in-tree generated files often use `*.generated.rs`. /\.generated\.rs$/, ]; /** * Whether `filePath` looks like a tool-generated source file based on * its filename. Path-only — does not read content. The result is a * relevance hint for disambiguation, not a hard claim. */ export function isGeneratedFile(filePath: string): boolean { return GENERATED_PATTERNS.some((p) => p.test(filePath)); }