bench-readme.sh 1.3 KB

12345678910111213141516171819202122232425262728
  1. #!/usr/bin/env bash
  2. # Re-run the README "Benchmark Results" A/B (with vs without codegraph) on the
  3. # current build: the 7 README repos, same queries, RUNS per arm (default 4).
  4. # Output → /tmp/ab-readme/<repo>/run<n>/run-headless-{with,without}.jsonl
  5. # Aggregate with parse-bench-readme.mjs. Repos must be cloned + indexed under
  6. # $CORPUS (default /tmp/codegraph-corpus) by the build under test.
  7. set -uo pipefail
  8. H="$(cd "$(dirname "$0")" && pwd)"
  9. C="${CORPUS:-/tmp/codegraph-corpus}"
  10. RUNS="${RUNS:-4}"
  11. ROWS=(
  12. "vscode|How does the extension host communicate with the main process?"
  13. "excalidraw|How does Excalidraw render and update canvas elements?"
  14. "django|How does Django's ORM build and execute a query from a QuerySet?"
  15. "tokio|How does tokio schedule and run async tasks on its runtime?"
  16. "okhttp|How does OkHttp process a request through its interceptor chain?"
  17. "gin|How does gin route requests through its middleware chain?"
  18. "alamofire|How does Alamofire build, send, and validate a request?"
  19. )
  20. echo "### README A/B START $(date) RUNS=$RUNS"
  21. for row in "${ROWS[@]}"; do
  22. repo="${row%%|*}"; q="${row#*|}"
  23. echo "===== $repo ====="
  24. for run in $(seq 1 "$RUNS"); do
  25. AGENT_EVAL_OUT="/tmp/ab-readme/$repo/run$run" bash "$H/run-all.sh" "$C/$repo" "$q" headless 2>&1 | grep -E "exit [0-9]" || echo " run$run: (no exit line)"
  26. done
  27. done
  28. echo "### README A/B DONE $(date)"