haiany
/
codegraph
огледало од https://github.com/colbymchenry/codegraph.git


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
							#!/usr/bin/env node
// Sanity-check that codegraph extracted REAL symbols (not just file/import nodes)
// from a repo for a given language. Exits non-zero on a critical failure so it
// can drive a write-extractor -> build -> re-check loop.
//
// Usage: node scripts/add-lang/verify-extraction.mjs <repo-path> <lang>
// Reads `codegraph status <repo> --json` using whatever codegraph is on PATH,
// so it reflects the binary that built the index.
//
// Exit codes: 0 = pass or soft-warn, 1 = critical fail, 2 = could not run.

import { execFileSync } from 'node:child_process';

const [repo, lang] = process.argv.slice(2);
if (!repo || !lang) {
  console.error('usage: verify-extraction.mjs <repo-path> <lang>');
  process.exit(2);
}

let status;
try {
  const out = execFileSync('codegraph', ['status', repo, '--json'], { encoding: 'utf8' });
  status = JSON.parse(out);
} catch (e) {
  console.error(`[verify] could not read codegraph status for ${repo}: ${e.message}`);
  process.exit(2);
}

// Kinds that prove the extractor mapped AST node types (everything except
// 'file' and 'import', which codegraph creates structurally for any language).
const SYMBOL_KINDS = new Set([
  'module', 'class', 'struct', 'interface', 'trait', 'protocol', 'function',
  'method', 'property', 'field', 'variable', 'constant', 'enum', 'enum_member',
  'type_alias', 'namespace', 'route', 'component',
]);

const byKind = status.nodesByKind || {};
const langs = status.languages || [];
const files = status.fileCount || 0;
const edges = status.edgeCount || 0;
const symbolKinds = Object.keys(byKind).filter((k) => SYMBOL_KINDS.has(k));
const symbolCount = symbolKinds.reduce((s, k) => s + byKind[k], 0);

const checks = [];
const add = (severity, ok, label, detail) => checks.push({ severity, ok, label, detail });

add('critical', status.initialized === true, 'index initialized', `initialized=${status.initialized}`);
add('critical', langs.includes(lang), `language "${lang}" detected`, `languages=[${langs.join(', ')}]`);
add('critical', symbolCount > 0, 'structural symbols extracted', `${symbolCount} symbols (${symbolKinds.join(', ') || 'NONE — only file/import nodes!'})`);
add('soft', symbolCount >= files, 'symbol density >= 1/file', `${symbolCount} symbols across ${files} files`);
add('soft', edges > files, 'edges resolved', `${edges} edges across ${files} files`);

console.log(`\n# Extraction check — ${repo}  (lang=${lang}, backend=${status.backend})`);
console.log(`  files=${files} nodes=${status.nodeCount} edges=${edges}`);
console.log(`  nodesByKind: ${JSON.stringify(byKind)}\n`);
for (const c of checks) console.log(`  ${c.ok ? '✓' : '✗'} ${c.label} — ${c.detail}`);

const critical = checks.filter((c) => !c.ok && c.severity === 'critical');
const soft = checks.filter((c) => !c.ok && c.severity === 'soft');
console.log();
if (critical.length) {
  console.log(`RESULT: FAIL (${critical.length} critical) — extractor or grammar wiring is broken. Re-run dump-ast.mjs and fix the node-type mappings.`);
  process.exit(1);
}
if (soft.length) {
  console.log(`RESULT: WARN (${soft.length} soft) — extraction works but looks thin; inspect the counts above.`);
  process.exit(0);
}
console.log('RESULT: PASS — extraction looks healthy.');
process.exit(0);