offload-eval-frontload.sh 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. #!/usr/bin/env bash
  2. # FRONTLOAD arm (approach 1): codegraph attached (offload-disabled) + the front-load
  3. # UserPromptSubmit hook (offload-eval-hook.mjs), n reps, appended to $RESULTS. Compare against
  4. # the matrix's raw/nocg baselines. Usage: offload-eval-frontload.sh <indexed-repo> <tier> <reps> "<Q>"
  5. # Env: MODEL=sonnet EFFORT=high RESULTS=<file> AGENT_EVAL_OUT=<scratch dir>
  6. set -uo pipefail
  7. HERE="$(cd "$(dirname "$0")" && pwd)"
  8. ENGINE="$(cd "$HERE/../.." && pwd)"
  9. BIN="$ENGINE/dist/bin/codegraph.js"
  10. OUT="${AGENT_EVAL_OUT:-/tmp/cg-offload-eval}"
  11. TARGET="${1:?repo}"; TIER="${2:?tier}"; REPS="${3:?reps}"; Q="${4:?question}"
  12. RUNS="$OUT/runs"
  13. EXTRACT="$HERE/offload-eval-metrics.mjs"
  14. RESULTS="${RESULTS:-$OUT/results-fl.jsonl}"
  15. REPO=$(basename "$TARGET")
  16. mkdir -p "$RUNS"
  17. [ -d "$TARGET/.codegraph" ] || { echo "not indexed: $TARGET"; exit 1; }
  18. TARGET=$(cd "$TARGET" && pwd -P)
  19. CFG="$RUNS/mcp-fl-$REPO.json"
  20. printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELAUNCHED=1","CODEGRAPH_OFFLOAD_DISABLE=1","node","%s","serve","--mcp","--path","%s"]}}}' "$BIN" "$TARGET" > "$CFG"
  21. # Generate the hook settings pointing at the persisted hook; enable its debug log so we can
  22. # count injections (claude passes this env down to the spawned hook process).
  23. HOOKCFG="$RUNS/frontload-settings.json"
  24. printf '{"hooks":{"UserPromptSubmit":[{"hooks":[{"type":"command","command":"node %s/offload-eval-hook.mjs"}]}]}}' "$HERE" > "$HOOKCFG"
  25. export CG_FRONTLOAD_DEBUG="$RUNS/hook-debug.log"
  26. prewarm() {
  27. pkill -9 -f "serve --mcp --path $1" 2>/dev/null; rm -f "$1/.codegraph/daemon.sock" 2>/dev/null; sleep 0.6
  28. env CODEGRAPH_OFFLOAD_DISABLE=1 CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS=1800000 node "$BIN" serve --mcp --path "$1" </dev/null >/dev/null 2>&1 &
  29. node -e 'const fs=require("fs");let n=0;const t=setInterval(()=>{if(fs.existsSync(process.argv[1]+"/.codegraph/daemon.sock")){clearInterval(t);process.exit(0)}if(n++>150){clearInterval(t);process.exit(1)}},100)' "$1" \
  30. && echo " daemon warm" || echo " WARN no daemon"
  31. }
  32. echo "###### FRONTLOAD repo=$REPO tier=$TIER reps=$REPS"
  33. prewarm "$TARGET"
  34. for r in $(seq 1 "$REPS"); do
  35. tag="$REPO-frontload-$r"
  36. ( cd "$TARGET" && claude -p "$Q" --output-format stream-json --verbose --permission-mode bypassPermissions \
  37. --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4 \
  38. --strict-mcp-config --mcp-config "$CFG" --settings "$HOOKCFG" \
  39. </dev/null > "$RUNS/$tag.jsonl" 2>"$RUNS/$tag.err" )
  40. node "$EXTRACT" --run "$RUNS/$tag.jsonl" --usage "-" --arm frontload --rep "$r" --repo "$REPO" --tier "$TIER" --q "$Q" >> "$RESULTS"
  41. node -e 'const o=JSON.parse(require("fs").readFileSync(process.argv[1],"utf8").trim().split("\n").pop());console.log(` [frontload #${o.rep}] ${o.durationSec}s | main $${o.costUsdMain} ${o.tokBillable}tok | read=${o.read} grep=${o.grep} agentExplore=${o.explore} | ok=${o.ok}`)' "$RESULTS"
  42. done
  43. pkill -9 -f "serve --mcp --path $TARGET" 2>/dev/null; rm -f "$TARGET/.codegraph/daemon.sock" 2>/dev/null
  44. echo "###### FRONTLOAD DONE $REPO (cumulative hook injections: $(grep -c INJECTED "$CG_FRONTLOAD_DEBUG" 2>/dev/null))"