haiany
/
codegraph
mirror de https://github.com/colbymchenry/codegraph.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
							#!/usr/bin/env bash
# Add-lang benchmark for ONE repo:
#   clone -> wipe+index (with the codegraph on PATH) -> verify extraction ->
#   with/without retrieval A/B (reuses scripts/agent-eval/run-all.sh).
#
# Assumes the codegraph dev build is already built + linked on PATH — the skill
# runs `npm run build && ./scripts/local-install.sh` ONCE before looping repos.
# The A/B is skipped if extraction fails its critical checks (don't burn $ on a
# broken extractor); set FORCE_AB=1 to run it anyway.
#
# Usage: bench.sh <lang> <repo-name> <repo-url> "<question>" [headless|tmux|all]
# Env:   CORPUS   corpus dir (default /tmp/codegraph-corpus, shared with agent-eval)
set -uo pipefail

LANG_TOKEN="${1:?usage: bench.sh <lang> <repo-name> <repo-url> \"<question>\" [mode]}"
NAME="${2:?repo-name required}"
URL="${3:?repo-url required}"
Q="${4:?question required}"
MODE="${5:-headless}"

HARNESS="$(cd "$(dirname "$0")" && pwd)"
AGENT_EVAL="$(cd "$HARNESS/../agent-eval" && pwd)"
CORPUS="${CORPUS:-/tmp/codegraph-corpus}"
REPO="$CORPUS/$NAME"

command -v codegraph >/dev/null || { echo "no codegraph on PATH (build + ./scripts/local-install.sh first)"; exit 1; }

echo "==================== add-lang bench: $NAME ($LANG_TOKEN) ===================="
echo "codegraph: $(command -v codegraph) -> $(codegraph --version 2>/dev/null || echo '?')"

# 1. Ensure the repo (shallow clone, reuse if present).
mkdir -p "$CORPUS"
if [ -d "$REPO/.git" ]; then
  echo "→ reusing checkout: $REPO"
else
  echo "→ cloning $URL"
  git clone --depth 1 "$URL" "$REPO" || { echo "git clone failed"; exit 1; }
fi

# 2. Wipe + index with the binary under test.
echo "→ wiping .codegraph and indexing"
rm -rf "$REPO/.codegraph"
( cd "$REPO" && codegraph init -i ) || { echo "indexing failed"; exit 1; }

# 3. Verify extraction (cheap guard before the paid A/B).
echo "→ verifying extraction"
node "$HARNESS/verify-extraction.mjs" "$REPO" "$LANG_TOKEN"
VERIFY=$?

# 4. Retrieval A/B (skipped if extraction is broken, unless FORCE_AB=1).
if [ "$VERIFY" -ne 0 ] && [ "${FORCE_AB:-0}" != "1" ]; then
  echo "→ SKIPPING A/B — extraction failed critical checks (set FORCE_AB=1 to override)"
else
  echo "→ retrieval A/B (mode=$MODE)"
  bash "$AGENT_EVAL/run-all.sh" "$REPO" "$Q" "$MODE"
fi

echo "==================== bench complete: $NAME (verify exit=$VERIFY) ===================="
# Exit reflects extraction: 0 = pass/warn, 1 = critical fail, 2 = couldn't read status.
exit "$VERIFY"