| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- /**
- * Generated-file detection for symbol-disambiguation down-ranking.
- *
- * When a query like "Send" matches 17 symbols across protobuf scaffolding,
- * test mocks, and the hand-written implementation, the FTS ranker often
- * surfaces the generated stubs first because their names are identical
- * to the implementation's name (validated empirically on cosmos-sdk —
- * see project_go_multi_module_audit memory). Generated stubs frequently
- * have no body to trace from, so the agent ends up reading source anyway.
- *
- * This helper is a pure path-based classifier consulted at disambiguation
- * time (findSymbol / findAllSymbols / codegraph_search formatting), NOT
- * a hard filter — generated nodes are still in the graph and remain
- * reachable; they just rank LAST when there's a real implementation
- * with the same name.
- *
- * Scope: suffix patterns only. Most generated files follow the
- * `<basename>.<tool>.<ext>` convention (`.pb.go`, `_grpc.pb.go`,
- * `.g.dart`, `_pb2.py`), and that covers ~all of what we saw in the
- * Go audit. A future addition would be scanning for the canonical
- * `// Code generated by` header during extraction, for the rare files
- * that defy the suffix convention.
- */
- const GENERATED_PATTERNS: ReadonlyArray<RegExp> = [
- // Go — protobuf / gRPC / pulsar
- /\.pb\.go$/,
- /\.pulsar\.go$/,
- /_grpc\.pb\.go$/,
- // Go — mockgen output. Default emits `mock_<src>.go`; many projects
- // (cosmos-sdk uses `expected_*_mocks.go`) rename to `*_mock.go` /
- // `*_mocks.go`. Matching either suffix catches both conventions
- // without false-positive risk on hand-written sources.
- /_mock\.go$/,
- /_mocks\.go$/,
- /^mock_[^/]+\.go$/,
- // TypeScript / JavaScript — common codegen suffixes (Apollo / GraphQL
- // codegen, Prisma, Hasura, ts-proto, gRPC-web, swagger-codegen).
- /\.generated\.[jt]sx?$/,
- /\.gen\.[jt]sx?$/,
- /\.pb\.[jt]s$/,
- /_pb\.[jt]s$/,
- /_grpc_pb\.[jt]s$/,
- // Python — protobuf / gRPC / openapi-codegen
- /_pb2(_grpc)?\.py$/,
- /_pb2\.pyi$/,
- // C++ — protobuf
- /\.pb\.(cc|h)$/,
- // C# — protobuf / gRPC (protoc-gen-csharp puts output under obj/ but
- // many projects also commit *.g.cs and *Grpc.cs siblings)
- /\.g\.cs$/,
- /Grpc\.cs$/,
- // Java — protobuf / gRPC: protoc-gen-java emits `*OuterClass.java`,
- // protoc-gen-grpc-java emits `*Grpc.java`. The XxxImplBase abstract
- // class lives inside Xxx*Grpc.java.
- /OuterClass\.java$/,
- /Grpc\.java$/,
- // Swift — protobuf
- /\.pb\.swift$/,
- // Dart — build_runner / freezed / json_serializable / chopper
- /\.g\.dart$/,
- /\.freezed\.dart$/,
- /\.pb\.dart$/,
- /\.pbgrpc\.dart$/,
- /\.chopper\.dart$/,
- // Rust — common build.rs OUT_DIR outputs are usually outside the source
- // tree, but in-tree generated files often use `*.generated.rs`.
- /\.generated\.rs$/,
- ];
- /**
- * Whether `filePath` looks like a tool-generated source file based on
- * its filename. Path-only — does not read content. The result is a
- * relevance hint for disambiguation, not a hard claim.
- */
- export function isGeneratedFile(filePath: string): boolean {
- return GENERATED_PATTERNS.some((p) => p.test(filePath));
- }
|