|
@@ -14,14 +14,13 @@ import {
|
|
|
FileRecord,
|
|
FileRecord,
|
|
|
ExtractionResult,
|
|
ExtractionResult,
|
|
|
ExtractionError,
|
|
ExtractionError,
|
|
|
- CodeGraphConfig,
|
|
|
|
|
} from '../types';
|
|
} from '../types';
|
|
|
import { QueryBuilder } from '../db/queries';
|
|
import { QueryBuilder } from '../db/queries';
|
|
|
import { extractFromSource } from './tree-sitter';
|
|
import { extractFromSource } from './tree-sitter';
|
|
|
-import { detectLanguage, isLanguageSupported, initGrammars, loadGrammarsForLanguages } from './grammars';
|
|
|
|
|
|
|
+import { detectLanguage, isSourceFile, isLanguageSupported, initGrammars, loadGrammarsForLanguages } from './grammars';
|
|
|
import { logDebug, logWarn } from '../errors';
|
|
import { logDebug, logWarn } from '../errors';
|
|
|
import { validatePathWithinRoot, normalizePath } from '../utils';
|
|
import { validatePathWithinRoot, normalizePath } from '../utils';
|
|
|
-import picomatch from 'picomatch';
|
|
|
|
|
|
|
+import ignore, { Ignore } from 'ignore';
|
|
|
import { detectFrameworks } from '../resolution/frameworks';
|
|
import { detectFrameworks } from '../resolution/frameworks';
|
|
|
import type { ResolutionContext } from '../resolution/types';
|
|
import type { ResolutionContext } from '../resolution/types';
|
|
|
|
|
|
|
@@ -94,36 +93,11 @@ export function hashContent(content: string): string {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
|
- * Check if a path matches any glob pattern (simplified)
|
|
|
|
|
|
|
+ * Skip files larger than this (bytes). Generated bundles, minified JS, and
|
|
|
|
|
+ * vendored blobs blow the WASM heap and the worker-recycle budget for no useful
|
|
|
|
|
+ * symbols. 1 MB covers essentially all hand-written source.
|
|
|
*/
|
|
*/
|
|
|
-function matchesGlob(filePath: string, pattern: string): boolean {
|
|
|
|
|
- filePath = normalizePath(filePath);
|
|
|
|
|
- return picomatch.isMatch(filePath, pattern, { dot: true });
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-/**
|
|
|
|
|
- * Check if a file should be included based on config
|
|
|
|
|
- */
|
|
|
|
|
-export function shouldIncludeFile(
|
|
|
|
|
- filePath: string,
|
|
|
|
|
- config: CodeGraphConfig
|
|
|
|
|
-): boolean {
|
|
|
|
|
- // Check exclude patterns first
|
|
|
|
|
- for (const pattern of config.exclude) {
|
|
|
|
|
- if (matchesGlob(filePath, pattern)) {
|
|
|
|
|
- return false;
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- // Check include patterns
|
|
|
|
|
- for (const pattern of config.include) {
|
|
|
|
|
- if (matchesGlob(filePath, pattern)) {
|
|
|
|
|
- return true;
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- return false;
|
|
|
|
|
-}
|
|
|
|
|
|
|
+const MAX_FILE_SIZE = 1024 * 1024;
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
|
* Collect git-visible files (tracked + untracked, .gitignore-respected) from the
|
|
* Collect git-visible files (tracked + untracked, .gitignore-respected) from the
|
|
@@ -230,7 +204,7 @@ interface GitChanges {
|
|
|
* Use `git status` to detect changed files instead of scanning every file.
|
|
* Use `git status` to detect changed files instead of scanning every file.
|
|
|
* Returns null on failure so callers fall back to full scan.
|
|
* Returns null on failure so callers fall back to full scan.
|
|
|
*/
|
|
*/
|
|
|
-function getGitChangedFiles(rootDir: string, config: CodeGraphConfig): GitChanges | null {
|
|
|
|
|
|
|
+function getGitChangedFiles(rootDir: string): GitChanges | null {
|
|
|
try {
|
|
try {
|
|
|
const output = execFileSync(
|
|
const output = execFileSync(
|
|
|
'git',
|
|
'git',
|
|
@@ -248,8 +222,8 @@ function getGitChangedFiles(rootDir: string, config: CodeGraphConfig): GitChange
|
|
|
const statusCode = line.substring(0, 2);
|
|
const statusCode = line.substring(0, 2);
|
|
|
const filePath = normalizePath(line.substring(3));
|
|
const filePath = normalizePath(line.substring(3));
|
|
|
|
|
|
|
|
- // Skip files that don't match include/exclude config
|
|
|
|
|
- if (!shouldIncludeFile(filePath, config)) continue;
|
|
|
|
|
|
|
+ // Skip non-source files (git status already omits .gitignored paths).
|
|
|
|
|
+ if (!isSourceFile(filePath)) continue;
|
|
|
|
|
|
|
|
if (statusCode === '??') {
|
|
if (statusCode === '??') {
|
|
|
added.push(filePath);
|
|
added.push(filePath);
|
|
@@ -268,20 +242,14 @@ function getGitChangedFiles(rootDir: string, config: CodeGraphConfig): GitChange
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
|
- * Marker file name that indicates a directory (and all children) should be skipped
|
|
|
|
|
- */
|
|
|
|
|
-const CODEGRAPH_IGNORE_MARKER = '.codegraphignore';
|
|
|
|
|
-
|
|
|
|
|
-/**
|
|
|
|
|
- * Recursively scan directory for source files.
|
|
|
|
|
|
|
+ * Recursively scan a directory for source files.
|
|
|
*
|
|
*
|
|
|
- * In git repos, uses `git ls-files` to get the file list (inherently
|
|
|
|
|
- * respects .gitignore at all levels), then filters by config include patterns.
|
|
|
|
|
- * Falls back to filesystem walk for non-git projects.
|
|
|
|
|
|
|
+ * In git repos, uses `git ls-files` (inherently respects .gitignore at all
|
|
|
|
|
+ * levels), then keeps files with a supported source extension. For non-git
|
|
|
|
|
+ * projects, falls back to a filesystem walk that parses .gitignore itself.
|
|
|
*/
|
|
*/
|
|
|
export function scanDirectory(
|
|
export function scanDirectory(
|
|
|
rootDir: string,
|
|
rootDir: string,
|
|
|
- config: CodeGraphConfig,
|
|
|
|
|
onProgress?: (current: number, file: string) => void
|
|
onProgress?: (current: number, file: string) => void
|
|
|
): string[] {
|
|
): string[] {
|
|
|
// Fast path: use git to get all visible files (respects .gitignore everywhere)
|
|
// Fast path: use git to get all visible files (respects .gitignore everywhere)
|
|
@@ -290,7 +258,7 @@ export function scanDirectory(
|
|
|
const files: string[] = [];
|
|
const files: string[] = [];
|
|
|
let count = 0;
|
|
let count = 0;
|
|
|
for (const filePath of gitFiles) {
|
|
for (const filePath of gitFiles) {
|
|
|
- if (shouldIncludeFile(filePath, config)) {
|
|
|
|
|
|
|
+ if (isSourceFile(filePath)) {
|
|
|
files.push(filePath);
|
|
files.push(filePath);
|
|
|
count++;
|
|
count++;
|
|
|
onProgress?.(count, filePath);
|
|
onProgress?.(count, filePath);
|
|
@@ -300,7 +268,7 @@ export function scanDirectory(
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Fallback: walk filesystem for non-git projects
|
|
// Fallback: walk filesystem for non-git projects
|
|
|
- return scanDirectoryWalk(rootDir, config, onProgress);
|
|
|
|
|
|
|
+ return scanDirectoryWalk(rootDir, onProgress);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -309,7 +277,6 @@ export function scanDirectory(
|
|
|
*/
|
|
*/
|
|
|
export async function scanDirectoryAsync(
|
|
export async function scanDirectoryAsync(
|
|
|
rootDir: string,
|
|
rootDir: string,
|
|
|
- config: CodeGraphConfig,
|
|
|
|
|
onProgress?: (current: number, file: string) => void
|
|
onProgress?: (current: number, file: string) => void
|
|
|
): Promise<string[]> {
|
|
): Promise<string[]> {
|
|
|
const gitFiles = getGitVisibleFiles(rootDir);
|
|
const gitFiles = getGitVisibleFiles(rootDir);
|
|
@@ -317,7 +284,7 @@ export async function scanDirectoryAsync(
|
|
|
const files: string[] = [];
|
|
const files: string[] = [];
|
|
|
let count = 0;
|
|
let count = 0;
|
|
|
for (const filePath of gitFiles) {
|
|
for (const filePath of gitFiles) {
|
|
|
- if (shouldIncludeFile(filePath, config)) {
|
|
|
|
|
|
|
+ if (isSourceFile(filePath)) {
|
|
|
files.push(filePath);
|
|
files.push(filePath);
|
|
|
count++;
|
|
count++;
|
|
|
onProgress?.(count, filePath);
|
|
onProgress?.(count, filePath);
|
|
@@ -330,7 +297,7 @@ export async function scanDirectoryAsync(
|
|
|
return files;
|
|
return files;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- return scanDirectoryWalk(rootDir, config, onProgress);
|
|
|
|
|
|
|
+ return scanDirectoryWalk(rootDir, onProgress);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -338,14 +305,44 @@ export async function scanDirectoryAsync(
|
|
|
*/
|
|
*/
|
|
|
function scanDirectoryWalk(
|
|
function scanDirectoryWalk(
|
|
|
rootDir: string,
|
|
rootDir: string,
|
|
|
- config: CodeGraphConfig,
|
|
|
|
|
onProgress?: (current: number, file: string) => void
|
|
onProgress?: (current: number, file: string) => void
|
|
|
): string[] {
|
|
): string[] {
|
|
|
const files: string[] = [];
|
|
const files: string[] = [];
|
|
|
let count = 0;
|
|
let count = 0;
|
|
|
const visitedDirs = new Set<string>();
|
|
const visitedDirs = new Set<string>();
|
|
|
|
|
|
|
|
- function walk(dir: string): void {
|
|
|
|
|
|
|
+ // A .gitignore matcher scoped to the directory that declared it. Patterns in
|
|
|
|
|
+ // a nested .gitignore are relative to that directory, so we keep the dir
|
|
|
|
|
+ // alongside the matcher and test paths relative to it — mirroring how git
|
|
|
|
|
+ // applies .gitignore files at every level.
|
|
|
|
|
+ interface ScopedIgnore {
|
|
|
|
|
+ dir: string;
|
|
|
|
|
+ ig: Ignore;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const loadIgnore = (dir: string): ScopedIgnore | null => {
|
|
|
|
|
+ try {
|
|
|
|
|
+ const giPath = path.join(dir, '.gitignore');
|
|
|
|
|
+ if (fs.existsSync(giPath)) {
|
|
|
|
|
+ return { dir, ig: ignore().add(fs.readFileSync(giPath, 'utf-8')) };
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch {
|
|
|
|
|
+ // Unreadable .gitignore — treat as absent.
|
|
|
|
|
+ }
|
|
|
|
|
+ return null;
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ const isIgnored = (fullPath: string, isDir: boolean, matchers: ScopedIgnore[]): boolean => {
|
|
|
|
|
+ for (const { dir, ig } of matchers) {
|
|
|
|
|
+ let rel = normalizePath(path.relative(dir, fullPath));
|
|
|
|
|
+ if (!rel || rel.startsWith('..')) continue; // not under this matcher's dir
|
|
|
|
|
+ if (isDir) rel += '/'; // dir-only rules (e.g. `build/`) only match with the slash
|
|
|
|
|
+ if (ig.ignores(rel)) return true;
|
|
|
|
|
+ }
|
|
|
|
|
+ return false;
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ function walk(dir: string, matchers: ScopedIgnore[]): void {
|
|
|
let realDir: string;
|
|
let realDir: string;
|
|
|
try {
|
|
try {
|
|
|
realDir = fs.realpathSync(dir);
|
|
realDir = fs.realpathSync(dir);
|
|
@@ -360,12 +357,9 @@ function scanDirectoryWalk(
|
|
|
}
|
|
}
|
|
|
visitedDirs.add(realDir);
|
|
visitedDirs.add(realDir);
|
|
|
|
|
|
|
|
- // Check for .codegraphignore marker file
|
|
|
|
|
- const ignoreMarker = path.join(dir, CODEGRAPH_IGNORE_MARKER);
|
|
|
|
|
- if (fs.existsSync(ignoreMarker)) {
|
|
|
|
|
- logDebug('Skipping directory due to .codegraphignore marker', { dir });
|
|
|
|
|
- return;
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ // This directory's own .gitignore (if present) applies to everything below it.
|
|
|
|
|
+ const own = loadIgnore(dir);
|
|
|
|
|
+ const active = own ? [...matchers, own] : matchers;
|
|
|
|
|
|
|
|
let entries: fs.Dirent[];
|
|
let entries: fs.Dirent[];
|
|
|
try {
|
|
try {
|
|
@@ -376,6 +370,9 @@ function scanDirectoryWalk(
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
for (const entry of entries) {
|
|
for (const entry of entries) {
|
|
|
|
|
+ // Never descend into git internals or our own data directory.
|
|
|
|
|
+ if (entry.name === '.git' || entry.name === '.codegraph') continue;
|
|
|
|
|
+
|
|
|
const fullPath = path.join(dir, entry.name);
|
|
const fullPath = path.join(dir, entry.name);
|
|
|
const relativePath = normalizePath(path.relative(rootDir, fullPath));
|
|
const relativePath = normalizePath(path.relative(rootDir, fullPath));
|
|
|
|
|
|
|
@@ -384,19 +381,11 @@ function scanDirectoryWalk(
|
|
|
const realTarget = fs.realpathSync(fullPath);
|
|
const realTarget = fs.realpathSync(fullPath);
|
|
|
const stat = fs.statSync(realTarget);
|
|
const stat = fs.statSync(realTarget);
|
|
|
if (stat.isDirectory()) {
|
|
if (stat.isDirectory()) {
|
|
|
- const dirPattern = relativePath + '/';
|
|
|
|
|
- let excluded = false;
|
|
|
|
|
- for (const pattern of config.exclude) {
|
|
|
|
|
- if (matchesGlob(dirPattern, pattern) || matchesGlob(relativePath, pattern)) {
|
|
|
|
|
- excluded = true;
|
|
|
|
|
- break;
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- if (!excluded) {
|
|
|
|
|
- walk(fullPath);
|
|
|
|
|
|
|
+ if (!isIgnored(fullPath, true, active)) {
|
|
|
|
|
+ walk(fullPath, active);
|
|
|
}
|
|
}
|
|
|
} else if (stat.isFile()) {
|
|
} else if (stat.isFile()) {
|
|
|
- if (shouldIncludeFile(relativePath, config)) {
|
|
|
|
|
|
|
+ if (!isIgnored(fullPath, false, active) && isSourceFile(relativePath)) {
|
|
|
files.push(relativePath);
|
|
files.push(relativePath);
|
|
|
count++;
|
|
count++;
|
|
|
onProgress?.(count, relativePath);
|
|
onProgress?.(count, relativePath);
|
|
@@ -409,19 +398,11 @@ function scanDirectoryWalk(
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if (entry.isDirectory()) {
|
|
if (entry.isDirectory()) {
|
|
|
- const dirPattern = relativePath + '/';
|
|
|
|
|
- let excluded = false;
|
|
|
|
|
- for (const pattern of config.exclude) {
|
|
|
|
|
- if (matchesGlob(dirPattern, pattern) || matchesGlob(relativePath, pattern)) {
|
|
|
|
|
- excluded = true;
|
|
|
|
|
- break;
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- if (!excluded) {
|
|
|
|
|
- walk(fullPath);
|
|
|
|
|
|
|
+ if (!isIgnored(fullPath, true, active)) {
|
|
|
|
|
+ walk(fullPath, active);
|
|
|
}
|
|
}
|
|
|
} else if (entry.isFile()) {
|
|
} else if (entry.isFile()) {
|
|
|
- if (shouldIncludeFile(relativePath, config)) {
|
|
|
|
|
|
|
+ if (!isIgnored(fullPath, false, active) && isSourceFile(relativePath)) {
|
|
|
files.push(relativePath);
|
|
files.push(relativePath);
|
|
|
count++;
|
|
count++;
|
|
|
onProgress?.(count, relativePath);
|
|
onProgress?.(count, relativePath);
|
|
@@ -430,7 +411,7 @@ function scanDirectoryWalk(
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- walk(rootDir);
|
|
|
|
|
|
|
+ walk(rootDir, []);
|
|
|
return files;
|
|
return files;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -439,7 +420,6 @@ function scanDirectoryWalk(
|
|
|
*/
|
|
*/
|
|
|
export class ExtractionOrchestrator {
|
|
export class ExtractionOrchestrator {
|
|
|
private rootDir: string;
|
|
private rootDir: string;
|
|
|
- private config: CodeGraphConfig;
|
|
|
|
|
private queries: QueryBuilder;
|
|
private queries: QueryBuilder;
|
|
|
/**
|
|
/**
|
|
|
* Names of frameworks detected for this project, populated by indexAll().
|
|
* Names of frameworks detected for this project, populated by indexAll().
|
|
@@ -449,9 +429,8 @@ export class ExtractionOrchestrator {
|
|
|
*/
|
|
*/
|
|
|
private detectedFrameworkNames: string[] | null = null;
|
|
private detectedFrameworkNames: string[] | null = null;
|
|
|
|
|
|
|
|
- constructor(rootDir: string, config: CodeGraphConfig, queries: QueryBuilder) {
|
|
|
|
|
|
|
+ constructor(rootDir: string, queries: QueryBuilder) {
|
|
|
this.rootDir = rootDir;
|
|
this.rootDir = rootDir;
|
|
|
- this.config = config;
|
|
|
|
|
this.queries = queries;
|
|
this.queries = queries;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -500,7 +479,7 @@ export class ExtractionOrchestrator {
|
|
|
*/
|
|
*/
|
|
|
private ensureDetectedFrameworks(files?: string[]): string[] {
|
|
private ensureDetectedFrameworks(files?: string[]): string[] {
|
|
|
if (this.detectedFrameworkNames !== null) return this.detectedFrameworkNames;
|
|
if (this.detectedFrameworkNames !== null) return this.detectedFrameworkNames;
|
|
|
- const fileList = files ?? scanDirectory(this.rootDir, this.config);
|
|
|
|
|
|
|
+ const fileList = files ?? scanDirectory(this.rootDir);
|
|
|
const context = this.buildDetectionContext(fileList);
|
|
const context = this.buildDetectionContext(fileList);
|
|
|
this.detectedFrameworkNames = detectFrameworks(context).map((r) => r.name);
|
|
this.detectedFrameworkNames = detectFrameworks(context).map((r) => r.name);
|
|
|
return this.detectedFrameworkNames;
|
|
return this.detectedFrameworkNames;
|
|
@@ -534,7 +513,7 @@ export class ExtractionOrchestrator {
|
|
|
total: 0,
|
|
total: 0,
|
|
|
});
|
|
});
|
|
|
|
|
|
|
|
- const files = await scanDirectoryAsync(this.rootDir, this.config, (current, file) => {
|
|
|
|
|
|
|
+ const files = await scanDirectoryAsync(this.rootDir, (current, file) => {
|
|
|
onProgress?.({
|
|
onProgress?.({
|
|
|
phase: 'scanning',
|
|
phase: 'scanning',
|
|
|
current,
|
|
current,
|
|
@@ -802,18 +781,16 @@ export class ExtractionOrchestrator {
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // Honour config.maxFileSize. Without this check, vendored
|
|
|
|
|
- // generated headers, minified bundles, and other multi-MB
|
|
|
|
|
- // files get indexed despite the user setting a size cap —
|
|
|
|
|
- // wasting WASM heap and the worker recycle budget on inputs
|
|
|
|
|
- // the user explicitly opted out of. The single-file extractFile
|
|
|
|
|
- // path already enforces this; the bulk path used to silently
|
|
|
|
|
- // skip the check.
|
|
|
|
|
- if (stats.size > this.config.maxFileSize) {
|
|
|
|
|
|
|
+ // Honour MAX_FILE_SIZE. Without this check, vendored generated
|
|
|
|
|
+ // headers, minified bundles, and other multi-MB files get indexed,
|
|
|
|
|
+ // wasting WASM heap and the worker recycle budget on inputs with no
|
|
|
|
|
+ // useful symbols. The single-file extractFile path already enforces
|
|
|
|
|
+ // this; the bulk path used to silently skip the check.
|
|
|
|
|
+ if (stats.size > MAX_FILE_SIZE) {
|
|
|
processed++;
|
|
processed++;
|
|
|
filesSkipped++;
|
|
filesSkipped++;
|
|
|
errors.push({
|
|
errors.push({
|
|
|
- message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`,
|
|
|
|
|
|
|
+ message: `File exceeds max size (${stats.size} > ${MAX_FILE_SIZE})`,
|
|
|
filePath,
|
|
filePath,
|
|
|
severity: 'warning',
|
|
severity: 'warning',
|
|
|
code: 'size_exceeded',
|
|
code: 'size_exceeded',
|
|
@@ -1108,14 +1085,14 @@ export class ExtractionOrchestrator {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Check file size
|
|
// Check file size
|
|
|
- if (stats.size > this.config.maxFileSize) {
|
|
|
|
|
|
|
+ if (stats.size > MAX_FILE_SIZE) {
|
|
|
return {
|
|
return {
|
|
|
nodes: [],
|
|
nodes: [],
|
|
|
edges: [],
|
|
edges: [],
|
|
|
unresolvedReferences: [],
|
|
unresolvedReferences: [],
|
|
|
errors: [
|
|
errors: [
|
|
|
{
|
|
{
|
|
|
- message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`,
|
|
|
|
|
|
|
+ message: `File exceeds max size (${stats.size} > ${MAX_FILE_SIZE})`,
|
|
|
filePath: relativePath,
|
|
filePath: relativePath,
|
|
|
severity: 'warning',
|
|
severity: 'warning',
|
|
|
code: 'size_exceeded',
|
|
code: 'size_exceeded',
|
|
@@ -1245,7 +1222,7 @@ export class ExtractionOrchestrator {
|
|
|
});
|
|
});
|
|
|
|
|
|
|
|
const filesToIndex: string[] = [];
|
|
const filesToIndex: string[] = [];
|
|
|
- const gitChanges = getGitChangedFiles(this.rootDir, this.config);
|
|
|
|
|
|
|
+ const gitChanges = getGitChangedFiles(this.rootDir);
|
|
|
|
|
|
|
|
if (gitChanges) {
|
|
if (gitChanges) {
|
|
|
// === Git fast path ===
|
|
// === Git fast path ===
|
|
@@ -1291,7 +1268,7 @@ export class ExtractionOrchestrator {
|
|
|
}
|
|
}
|
|
|
} else {
|
|
} else {
|
|
|
// === Fallback: full scan (non-git project or git failure) ===
|
|
// === Fallback: full scan (non-git project or git failure) ===
|
|
|
- const currentFiles = new Set(scanDirectory(this.rootDir, this.config));
|
|
|
|
|
|
|
+ const currentFiles = new Set(scanDirectory(this.rootDir));
|
|
|
filesChecked = currentFiles.size;
|
|
filesChecked = currentFiles.size;
|
|
|
|
|
|
|
|
// Build Map for O(1) lookups instead of .find() per file
|
|
// Build Map for O(1) lookups instead of .find() per file
|
|
@@ -1376,7 +1353,7 @@ export class ExtractionOrchestrator {
|
|
|
* Uses git status as a fast path when available, falling back to full scan.
|
|
* Uses git status as a fast path when available, falling back to full scan.
|
|
|
*/
|
|
*/
|
|
|
getChangedFiles(): { added: string[]; modified: string[]; removed: string[] } {
|
|
getChangedFiles(): { added: string[]; modified: string[]; removed: string[] } {
|
|
|
- const gitChanges = getGitChangedFiles(this.rootDir, this.config);
|
|
|
|
|
|
|
+ const gitChanges = getGitChangedFiles(this.rootDir);
|
|
|
|
|
|
|
|
if (gitChanges) {
|
|
if (gitChanges) {
|
|
|
// === Git fast path ===
|
|
// === Git fast path ===
|
|
@@ -1420,7 +1397,7 @@ export class ExtractionOrchestrator {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// === Fallback: full scan (non-git project or git failure) ===
|
|
// === Fallback: full scan (non-git project or git failure) ===
|
|
|
- const currentFiles = new Set(scanDirectory(this.rootDir, this.config));
|
|
|
|
|
|
|
+ const currentFiles = new Set(scanDirectory(this.rootDir));
|
|
|
const trackedFiles = this.queries.getAllFiles();
|
|
const trackedFiles = this.queries.getAllFiles();
|
|
|
|
|
|
|
|
// Build Map for O(1) lookups
|
|
// Build Map for O(1) lookups
|
|
@@ -1467,4 +1444,4 @@ export class ExtractionOrchestrator {
|
|
|
|
|
|
|
|
// Re-export useful types and functions
|
|
// Re-export useful types and functions
|
|
|
export { extractFromSource } from './tree-sitter';
|
|
export { extractFromSource } from './tree-sitter';
|
|
|
-export { detectLanguage, isLanguageSupported, isGrammarLoaded, getSupportedLanguages, initGrammars, loadGrammarsForLanguages, loadAllGrammars } from './grammars';
|
|
|
|
|
|
|
+export { detectLanguage, isSourceFile, isLanguageSupported, isGrammarLoaded, getSupportedLanguages, initGrammars, loadGrammarsForLanguages, loadAllGrammars } from './grammars';
|