bench.sh 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. #!/usr/bin/env bash
  2. # Add-lang benchmark for ONE repo:
  3. # clone -> wipe+index (with the codegraph on PATH) -> verify extraction ->
  4. # with/without retrieval A/B (reuses scripts/agent-eval/run-all.sh).
  5. #
  6. # Assumes the codegraph dev build is already built + linked on PATH — the skill
  7. # runs `npm run build && ./scripts/local-install.sh` ONCE before looping repos.
  8. # The A/B is skipped if extraction fails its critical checks (don't burn $ on a
  9. # broken extractor); set FORCE_AB=1 to run it anyway.
  10. #
  11. # Usage: bench.sh <lang> <repo-name> <repo-url> "<question>" [headless|tmux|all]
  12. # Env: CORPUS corpus dir (default /tmp/codegraph-corpus, shared with agent-eval)
  13. set -uo pipefail
  14. LANG_TOKEN="${1:?usage: bench.sh <lang> <repo-name> <repo-url> \"<question>\" [mode]}"
  15. NAME="${2:?repo-name required}"
  16. URL="${3:?repo-url required}"
  17. Q="${4:?question required}"
  18. MODE="${5:-headless}"
  19. HARNESS="$(cd "$(dirname "$0")" && pwd)"
  20. AGENT_EVAL="$(cd "$HARNESS/../agent-eval" && pwd)"
  21. CORPUS="${CORPUS:-/tmp/codegraph-corpus}"
  22. REPO="$CORPUS/$NAME"
  23. command -v codegraph >/dev/null || { echo "no codegraph on PATH (build + ./scripts/local-install.sh first)"; exit 1; }
  24. echo "==================== add-lang bench: $NAME ($LANG_TOKEN) ===================="
  25. echo "codegraph: $(command -v codegraph) -> $(codegraph --version 2>/dev/null || echo '?')"
  26. # 1. Ensure the repo (shallow clone, reuse if present).
  27. mkdir -p "$CORPUS"
  28. if [ -d "$REPO/.git" ]; then
  29. echo "→ reusing checkout: $REPO"
  30. else
  31. echo "→ cloning $URL"
  32. git clone --depth 1 "$URL" "$REPO" || { echo "git clone failed"; exit 1; }
  33. fi
  34. # 2. Wipe + index with the binary under test.
  35. echo "→ wiping .codegraph and indexing"
  36. rm -rf "$REPO/.codegraph"
  37. ( cd "$REPO" && codegraph init -i ) || { echo "indexing failed"; exit 1; }
  38. # 3. Verify extraction (cheap guard before the paid A/B).
  39. echo "→ verifying extraction"
  40. node "$HARNESS/verify-extraction.mjs" "$REPO" "$LANG_TOKEN"
  41. VERIFY=$?
  42. # 4. Retrieval A/B (skipped if extraction is broken, unless FORCE_AB=1).
  43. if [ "$VERIFY" -ne 0 ] && [ "${FORCE_AB:-0}" != "1" ]; then
  44. echo "→ SKIPPING A/B — extraction failed critical checks (set FORCE_AB=1 to override)"
  45. else
  46. echo "→ retrieval A/B (mode=$MODE)"
  47. bash "$AGENT_EVAL/run-all.sh" "$REPO" "$Q" "$MODE"
  48. fi
  49. echo "==================== bench complete: $NAME (verify exit=$VERIFY) ===================="
  50. # Exit reflects extraction: 0 = pass/warn, 1 = critical fail, 2 = couldn't read status.
  51. exit "$VERIFY"