3 dní pred · 4f8782cbe5
--- a/scripts/agent-eval/offload-eval-3arm.sh
+++ b/scripts/agent-eval/offload-eval-3arm.sh
@@ -36,9 +36,13 @@ prewarm() { # path  extra-env (e.g. "FOO=bar")
 
				 run() { # arm rep mcp-config usage-log-or-dash
			
 
				   local arm="$1" rep="$2" cfg="$3" usage="$4" tag="$REPO-$1-$2"
			
 
				   [ "$usage" != "-" ] && : > "$usage"
			
 
				+  # DISALLOW (optional): block sub-agent delegation across all arms so the A/B
			
 
				+  # measures the retrieval mode, not whether Sonnet decides to spawn a codegraph-blind
			
 
				+  # Explore subagent (which thrashes regardless and adds huge variance).
			
 
				   ( cd "$TARGET" && claude -p "$Q" \
			
 
				       --output-format stream-json --verbose --permission-mode bypassPermissions \
			
 
				       --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4 \
			
 
				+      ${DISALLOW:+--disallowedTools "$DISALLOW"} \
			
 
				       --strict-mcp-config --mcp-config "$cfg" \
			
 
				       </dev/null > "$RUNS/$tag.jsonl" 2>"$RUNS/$tag.err" )
			
 
				   node "$EXTRACT" --run "$RUNS/$tag.jsonl" --usage "$usage" --arm "$arm" --rep "$rep" \
			
@@ -52,14 +56,17 @@ printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELA
 
				 printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELAUNCHED=1","CODEGRAPH_OFFLOAD_DISABLE=1","node","%s","serve","--mcp","--path","%s"]}}}' "$BIN" "$TARGET" > "$CFG_RAW"
			
 
				 printf '{"mcpServers":{}}' > "$CFG_NOCG"
			
 
				 
			
 
				-echo "###### repo=$REPO tier=$TIER reps=$REPS model=${MODEL:-sonnet}/${EFFORT:-high}"
			
 
				+# REP_START lets a later batch ADD reps without clobbering earlier jsonls
			
 
				+# (e.g. REP_START=4 REPS=3 -> reps 4,5,6; default starts at 1).
			
 
				+START="${REP_START:-1}"; END=$((START + REPS - 1))
			
 
				+echo "###### repo=$REPO tier=$TIER reps=$START..$END model=${MODEL:-sonnet}/${EFFORT:-high}"
			
 
				 echo "###### Q=$Q"
			
 
				 echo "== ARM offload =="; prewarm "$TARGET" "CODEGRAPH_OFFLOAD_USAGE_LOG=$USAGE"
			
 
				-for r in $(seq 1 "$REPS"); do run offload "$r" "$CFG_OFF" "$USAGE"; done
			
 
				+for r in $(seq "$START" "$END"); do run offload "$r" "$CFG_OFF" "$USAGE"; done
			
 
				 pkill -9 -f "serve --mcp --path $TARGET" 2>/dev/null; rm -f "$TARGET/.codegraph/daemon.sock" 2>/dev/null; sleep 1
			
 
				 echo "== ARM raw =="; prewarm "$TARGET" "CODEGRAPH_OFFLOAD_DISABLE=1"
			
 
				-for r in $(seq 1 "$REPS"); do run raw "$r" "$CFG_RAW" "-"; done
			
 
				+for r in $(seq "$START" "$END"); do run raw "$r" "$CFG_RAW" "-"; done
			
 
				 pkill -9 -f "serve --mcp --path $TARGET" 2>/dev/null; rm -f "$TARGET/.codegraph/daemon.sock" 2>/dev/null; sleep 1
			
 
				 echo "== ARM nocg =="
			
 
				-for r in $(seq 1 "$REPS"); do run nocg "$r" "$CFG_NOCG" "-"; done
			
 
				+for r in $(seq "$START" "$END"); do run nocg "$r" "$CFG_NOCG" "-"; done
			
 
				 echo "###### DONE $REPO"
			
--- a/scripts/agent-eval/offload-eval-cost.mjs
+++ b/scripts/agent-eval/offload-eval-cost.mjs
@@ -0,0 +1,133 @@
 
				+#!/usr/bin/env node
			
 
				+// Cost/token analysis for the 3-arm offload eval, with a MAIN-vs-SUBAGENT split.
			
 
				+//
			
 
				+// The explore-subagent question. With delegation ALLOWED, the nocg arm spawns a
			
 
				+// Claude Code Explore subagent; the codegraph arms do all work in the main agent.
			
 
				+// Two facts make naive accounting wrong:
			
 
				+//   1. The Explore subagent runs on HAIKU 4.5; the main agent on SONNET 4.6.
			
 
				+//      So per-token cost differs ~3x between them — you cannot price both the same.
			
 
				+//   2. The subagent's consumption is ~95% cache-reads. At Haiku's $0.10/MTok
			
 
				+//      cache-read rate, a huge TOKEN volume is a small DOLLAR cost.
			
 
				+//
			
 
				+// Rather than re-derive cost from raw token counts (and guess the cache TTL —
			
 
				+// Claude Code uses 1-hour ephemeral cache here, 2x write, not 5-min), we read
			
 
				+// Claude Code's OWN authoritative accounting from the `result` event:
			
 
				+//   result.modelUsage[model].costUSD  — per-model cost CC itself billed
			
 
				+//   result.total_cost_usd             — their sum (INCLUDES the Haiku subagent;
			
 
				+//                                       the handoff's "excludes subagent" was wrong)
			
 
				+// The model split IS the agent split here: sonnet => main, haiku => Explore subagent
			
 
				+// (only nocg spawns one, and only nocg shows haiku usage). Token volume is still
			
 
				+// summed per-model from modelUsage for the separate "tokens" story.
			
 
				+//
			
 
				+// Usage: offload-eval-cost.mjs <runs-dir> <repo> [reps]
			
 
				+//   e.g. offload-eval-cost.mjs /tmp/cg-offload-eval/runs trezor 3
			
 
				+import { readFileSync, existsSync } from 'fs';
			
 
				+
			
 
				+const MAIN_TIER = /sonnet/;   // main agent
			
 
				+const SUB_TIER  = /haiku/;    // Claude Code Explore subagent
			
 
				+
			
 
				+const [,, runsDir, repo, repsArg] = process.argv;
			
 
				+if (!runsDir || !repo) { console.error('usage: offload-eval-cost.mjs <runs-dir> <repo> [reps]   (env ARMS=nocg,raw,offload)'); process.exit(1); }
			
 
				+const REPS = Number(repsArg || 3);
			
 
				+// Arms to analyze (file stems `<repo>-<arm>-<rep>.jsonl`). Override for the style A/B:
			
 
				+// ARMS=raw,refs,map,src. nocg's Haiku subagent is the only sub-tier; the rest are main-only.
			
 
				+const ARMS = (process.env.ARMS || 'nocg,raw,offload').split(',').map((s) => s.trim()).filter(Boolean);
			
 
				+
			
 
				+const toks = (u) => (u.inputTokens||0)+(u.outputTokens||0)+(u.cacheReadInputTokens||0)+(u.cacheCreationInputTokens||0);
			
 
				+
			
 
				+function analyzeRun(file) {
			
 
				+  let result = null, agentCalls = 0;
			
 
				+  const tools = {}, subPids = new Set();
			
 
				+  for (const line of readFileSync(file, 'utf8').split('\n')) {
			
 
				+    if (!line) continue;
			
 
				+    let e; try { e = JSON.parse(line); } catch { continue; }
			
 
				+    if (e.parent_tool_use_id && e.message?.usage) subPids.add(e.parent_tool_use_id);
			
 
				+    if (e.type === 'assistant' && Array.isArray(e.message?.content))
			
 
				+      for (const b of e.message.content)
			
 
				+        if (b.type === 'tool_use') { tools[b.name] = (tools[b.name]||0)+1; if (b.name === 'Agent') agentCalls++; }
			
 
				+    if (e.type === 'result') result = e;
			
 
				+  }
			
 
				+  // Authoritative cost + tokens from Claude Code's per-model accounting.
			
 
				+  const mu = result?.modelUsage || {};
			
 
				+  const main = { cost: 0, tok: 0 }, sub = { cost: 0, tok: 0 };
			
 
				+  for (const [model, u] of Object.entries(mu)) {
			
 
				+    const bucket = SUB_TIER.test(model) ? sub : main; // sonnet/anything-else => main
			
 
				+    bucket.cost += u.costUSD || 0;
			
 
				+    bucket.tok  += toks(u);
			
 
				+  }
			
 
				+  return {
			
 
				+    main, sub, subagents: subPids.size, agentCalls,
			
 
				+    ccTotal: result?.total_cost_usd ?? null,
			
 
				+    ok: result?.subtype === 'success',
			
 
				+    durationSec: result?.duration_ms ? +(result.duration_ms/1000).toFixed(1) : null,
			
 
				+    models: Object.keys(mu), tools,
			
 
				+  };
			
 
				+}
			
 
				+
			
 
				+const k = (n) => (n/1000).toFixed(0).padStart(5) + 'K';
			
 
				+const d = (n) => '$' + n.toFixed(3);
			
 
				+const cost = (b) => b.cost;
			
 
				+const tot  = (b) => b.tok;
			
 
				+
			
 
				+const byArm = {};
			
 
				+for (const arm of ARMS) {
			
 
				+  const runs = [];
			
 
				+  for (let r = 1; r <= REPS; r++) {
			
 
				+    const f = `${runsDir}/${repo}-${arm}-${r}.jsonl`;
			
 
				+    if (existsSync(f)) runs.push({ rep: r, ...analyzeRun(f) });
			
 
				+  }
			
 
				+  byArm[arm] = runs;
			
 
				+}
			
 
				+
			
 
				+// Per-run detail. Cost is Claude Code's own modelUsage.costUSD (authoritative,
			
 
				+// per-model pricing + correct cache TTL). MAIN=Sonnet, SUB=Haiku Explore subagent.
			
 
				+// cc-check: main$+sub$ must equal result.total_cost_usd (delta should be ~0).
			
 
				+console.log(`\n=== ${repo}: per-run main(Sonnet)/sub(Haiku) split — Claude Code's own cost accounting ===`);
			
 
				+console.log('arm      rep | subAg | MAIN(sonnet) tok / $ | SUB(haiku) tok / $   | TOTAL tok / $   | cc_total Δ | dur  reads');
			
 
				+for (const arm of ARMS) for (const r of byArm[arm]) {
			
 
				+  const mC = cost(r.main), sC = cost(r.sub), mT = tot(r.main), sT = tot(r.sub);
			
 
				+  const reads = r.tools['Read'] || 0, grep = (r.tools['Grep']||0)+(r.tools['Bash']||0)+(r.tools['Glob']||0);
			
 
				+  const explore = r.tools['mcp__codegraph__codegraph_explore'] || 0;
			
 
				+  const delta = (mC + sC) - (r.ccTotal || 0); // should be ~0
			
 
				+  console.log(
			
 
				+    `${arm.padEnd(8)} #${r.rep} | ${String(r.subagents).padStart(2)}    | ${k(mT)} ${d(mC).padStart(7)}     | ${k(sT)} ${d(sC).padStart(7)}     | ${k(mT+sT)} ${d(mC+sC).padStart(7)} | ${d(r.ccTotal||0).padStart(7)} ${(delta>=0?'+':'')+delta.toFixed(4)} | ${String(r.durationSec).padStart(5)} r=${reads} g=${grep} x=${explore}`
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+// Per-arm means
			
 
				+const mean = (arr, f) => arr.length ? arr.reduce((s,x)=>s+f(x),0)/arr.length : 0;
			
 
				+console.log(`\n=== ${repo}: per-arm MEANS (n per arm) ===`);
			
 
				+console.log('arm      n | main $   sub $    TOTAL $  | main tok   sub tok    TOTAL tok | %$ in sub | %tok in sub');
			
 
				+for (const arm of ARMS) {
			
 
				+  const runs = byArm[arm]; if (!runs.length) continue;
			
 
				+  const mC = mean(runs, r=>cost(r.main)), sC = mean(runs, r=>cost(r.sub));
			
 
				+  const mT = mean(runs, r=>tot(r.main)),  sT = mean(runs, r=>tot(r.sub));
			
 
				+  const pctSubC = (mC+sC) ? (100*sC/(mC+sC)) : 0;
			
 
				+  const pctSubT = (mT+sT) ? (100*sT/(mT+sT)) : 0;
			
 
				+  console.log(
			
 
				+    `${arm.padEnd(8)} ${runs.length} | ${d(mC).padStart(7)} ${d(sC).padStart(7)} ${d(mC+sC).padStart(7)} | ${k(mT)} ${k(sT)} ${k(mT+sT)} | ${pctSubC.toFixed(0).padStart(3)}%      | ${pctSubT.toFixed(0).padStart(3)}%`
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+// Headline ladders — cost, tokens, duration, all vs a baseline (nocg if present, else first arm).
			
 
				+console.log(`\n=== Ladders (mean, incl. subagent) ===`);
			
 
				+const totals = ARMS.map(a => ({ a, c: mean(byArm[a], r=>cost(r.main)+cost(r.sub)), t: mean(byArm[a], r=>tot(r.main)+tot(r.sub)) })).filter(x=>byArm[x.a].length);
			
 
				+const base = totals.find(x=>x.a==='nocg') ?? totals[0];
			
 
				+const bn = base?.a ?? '?';
			
 
				+console.log(`  COST (vs ${bn}):`);
			
 
				+for (const x of totals) {
			
 
				+  const vs = base && base.c ? ` (${((x.c/base.c-1)*100>=0?'+':'')}${((x.c/base.c-1)*100).toFixed(0)}%)` : '';
			
 
				+  console.log(`    ${x.a.padEnd(8)} ${d(x.c)}${vs}`);
			
 
				+}
			
 
				+console.log(`  TOKENS (vs ${bn}):`);
			
 
				+for (const x of totals) {
			
 
				+  const vs = base && base.t ? ` (${((x.t/base.t-1)*100>=0?'+':'')}${((x.t/base.t-1)*100).toFixed(0)}%)` : '';
			
 
				+  console.log(`    ${x.a.padEnd(8)} ${k(x.t)}${vs}`);
			
 
				+}
			
 
				+console.log(`  DURATION (wall-clock, vs ${bn}):`);
			
 
				+const durs = ARMS.map(a => ({ a, s: mean(byArm[a].filter(r=>r.durationSec!=null), r=>r.durationSec) })).filter(x=>byArm[x.a].length);
			
 
				+const dbase = durs.find(x=>x.a==='nocg') ?? durs[0];
			
 
				+for (const x of durs) {
			
 
				+  const vs = dbase && dbase.s ? ` (${((x.s/dbase.s-1)*100>=0?'+':'')}${((x.s/dbase.s-1)*100).toFixed(0)}%)` : '';
			
 
				+  console.log(`    ${x.a.padEnd(8)} ${x.s.toFixed(0)}s${vs}`);
			
 
				+}
			
--- a/scripts/agent-eval/offload-eval-styles.sh
+++ b/scripts/agent-eval/offload-eval-styles.sh
@@ -0,0 +1,72 @@
 
				+#!/usr/bin/env bash
			
 
				+# Offload reasoning-OUTPUT-STYLE A/B — all codegraph-on, isolating the Worker's
			
 
				+# output shape's effect on main-session tokens / latency / accuracy:
			
 
				+#   raw  : CODEGRAPH_OFFLOAD_DISABLE=1            (verbatim explore source, the floor)
			
 
				+#   refs : managed offload, default              (Cerebras map re-expanded to verbatim, ~24K)
			
 
				+#   map  : managed offload, STYLE=map            (compact reasoned map + file:line anchors, ~1-3K)
			
 
				+#   src  : managed offload, STYLE=src            (map + cited line ranges only, ~1-5K)
			
 
				+# Delegation BLOCKED by default (DISALLOW=Agent) so we measure the offload payload's
			
 
				+# effect on the main Sonnet agent, not whether it spawns a Haiku Explore subagent.
			
 
				+#
			
 
				+# Usage: offload-eval-styles.sh <indexed-repo> <reps> "<question>"
			
 
				+# Env:   RESULTS=<file>  AGENT_EVAL_OUT=<dir>  REP_START=1  DISALLOW=Agent  MODEL/EFFORT
			
 
				+set -uo pipefail
			
 
				+HERE="$(cd "$(dirname "$0")" && pwd)"
			
 
				+ENGINE="$(cd "$HERE/../.." && pwd)"
			
 
				+BIN="$ENGINE/dist/bin/codegraph.js"
			
 
				+OUT="${AGENT_EVAL_OUT:-/tmp/cg-offload-eval}"
			
 
				+TARGET="${1:?usage: offload-eval-styles.sh <indexed-repo> <reps> \"<question>\"}"
			
 
				+REPS="${2:?reps}"; Q="${3:?question}"
			
 
				+RUNS="$OUT/runs"; EXTRACT="$HERE/offload-eval-metrics.mjs"
			
 
				+RESULTS="${RESULTS:-$OUT/results-styles.jsonl}"
			
 
				+REPO=$(basename "$TARGET")
			
 
				+DISALLOW="${DISALLOW-Agent}"   # default: block delegation. `DISALLOW= ` to allow.
			
 
				+START="${REP_START:-1}"; END=$((START + REPS - 1))
			
 
				+mkdir -p "$RUNS"
			
 
				+command -v claude >/dev/null || { echo "no claude on PATH"; exit 1; }
			
 
				+[ -d "$TARGET/.codegraph" ] || { echo "not indexed: $TARGET"; exit 1; }
			
 
				+TARGET=$(cd "$TARGET" && pwd -P)
			
 
				+
			
 
				+prewarm() { # path  extra-env
			
 
				+  pkill -9 -f "serve --mcp --path $1" 2>/dev/null; rm -f "$1/.codegraph/daemon.sock" 2>/dev/null; sleep 0.6
			
 
				+  env ${2:-} CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS=1800000 node "$BIN" serve --mcp --path "$1" </dev/null >/dev/null 2>&1 &
			
 
				+  node -e 'const fs=require("fs");let n=0;const t=setInterval(()=>{if(fs.existsSync(process.argv[1]+"/.codegraph/daemon.sock")){clearInterval(t);process.exit(0)}if(n++>150){clearInterval(t);process.exit(1)}},100)' "$1" \
			
 
				+    && echo "  daemon warm" || echo "  WARN daemon never bound"
			
 
				+}
			
 
				+kill_daemon() { pkill -9 -f "serve --mcp --path $TARGET" 2>/dev/null; rm -f "$TARGET/.codegraph/daemon.sock" 2>/dev/null; sleep 1; }
			
 
				+
			
 
				+run() { # arm rep mcp-config usage-log-or-dash
			
 
				+  local arm="$1" rep="$2" cfg="$3" usage="$4" tag="$REPO-$1-$2"
			
 
				+  [ "$usage" != "-" ] && : > "$usage"
			
 
				+  ( cd "$TARGET" && claude -p "$Q" \
			
 
				+      --output-format stream-json --verbose --permission-mode bypassPermissions \
			
 
				+      --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4 \
			
 
				+      ${DISALLOW:+--disallowedTools "$DISALLOW"} \
			
 
				+      --strict-mcp-config --mcp-config "$cfg" \
			
 
				+      </dev/null > "$RUNS/$tag.jsonl" 2>"$RUNS/$tag.err" )
			
 
				+  node "$EXTRACT" --run "$RUNS/$tag.jsonl" --usage "$usage" --arm "$arm" --rep "$rep" \
			
 
				+      --repo "$REPO" --tier styles --q "$Q" >> "$RESULTS"
			
 
				+  node -e 'const o=JSON.parse(require("fs").readFileSync(process.argv[1],"utf8").trim().split("\n").pop());console.log(`  [${o.arm} #${o.rep}] ${o.durationSec}s | ${o.tokBillable} billable tok | read=${o.read} grep=${o.grep} explore=${o.explore} offload=${o.offloadFired} | AI ${o.ai.calls}c/${o.ai.totalTokens}t | ok=${o.ok}`)' "$RESULTS"
			
 
				+}
			
 
				+
			
 
				+# MCP configs: env baked into the daemon-spawn command claude uses.
			
 
				+USAGE="$RUNS/$REPO-usage.jsonl"
			
 
				+mkcfg() { # file extra-env-pairs(JSON array entries, comma-led or empty)
			
 
				+  printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELAUNCHED=1"%s,"node","%s","serve","--mcp","--path","%s"]}}}' "$1" "$BIN" "$TARGET"
			
 
				+}
			
 
				+CFG_RAW="$RUNS/mcp-sty-raw-$REPO.json";   mkcfg ',"CODEGRAPH_OFFLOAD_DISABLE=1"' > "$CFG_RAW"
			
 
				+CFG_REFS="$RUNS/mcp-sty-refs-$REPO.json"; mkcfg ",\"CODEGRAPH_OFFLOAD_USAGE_LOG=$USAGE\"" > "$CFG_REFS"
			
 
				+CFG_MAP="$RUNS/mcp-sty-map-$REPO.json";   mkcfg ",\"CODEGRAPH_OFFLOAD_USAGE_LOG=$USAGE\",\"CODEGRAPH_OFFLOAD_STYLE=map\"" > "$CFG_MAP"
			
 
				+CFG_SRC="$RUNS/mcp-sty-src-$REPO.json";   mkcfg ",\"CODEGRAPH_OFFLOAD_USAGE_LOG=$USAGE\",\"CODEGRAPH_OFFLOAD_STYLE=src\"" > "$CFG_SRC"
			
 
				+
			
 
				+echo "###### repo=$REPO reps=$START..$END model=${MODEL:-sonnet}/${EFFORT:-high} disallow=${DISALLOW:-<none>}"
			
 
				+echo "###### Q=$Q"
			
 
				+echo "== ARM raw ==";  prewarm "$TARGET" "CODEGRAPH_OFFLOAD_DISABLE=1"
			
 
				+for r in $(seq "$START" "$END"); do run raw  "$r" "$CFG_RAW"  "-"; done; kill_daemon
			
 
				+echo "== ARM refs =="; prewarm "$TARGET" "CODEGRAPH_OFFLOAD_USAGE_LOG=$USAGE"
			
 
				+for r in $(seq "$START" "$END"); do run refs "$r" "$CFG_REFS" "$USAGE"; done; kill_daemon
			
 
				+echo "== ARM map ==";  prewarm "$TARGET" "CODEGRAPH_OFFLOAD_USAGE_LOG=$USAGE CODEGRAPH_OFFLOAD_STYLE=map"
			
 
				+for r in $(seq "$START" "$END"); do run map  "$r" "$CFG_MAP"  "$USAGE"; done; kill_daemon
			
 
				+echo "== ARM src ==";  prewarm "$TARGET" "CODEGRAPH_OFFLOAD_USAGE_LOG=$USAGE CODEGRAPH_OFFLOAD_STYLE=src"
			
 
				+for r in $(seq "$START" "$END"); do run src  "$r" "$CFG_SRC"  "$USAGE"; done; kill_daemon
			
 
				+echo "###### DONE $REPO — judge: node $HERE/offload-eval-judge.mjs --results $RESULTS --truth $HERE/offload-eval-ground-truth.json --out $OUT/judged-styles.jsonl"
			
--- a/src/reasoning/reasoner.ts
+++ b/src/reasoning/reasoner.ts
@@ -200,6 +200,9 @@ export async function synthesizeOffload({ query, context }: SynthArgs): Promise<
 
				   const url = cfg.url.replace(/\/+$/, '') + '/chat/completions';
			
 
				   const { system, footer } = promptFor(cfg.style);
			
 
				   const ctx = cfg.strip ? stripAgentDirectives(context) : context;
			
 
				+  // Optional operator/eval flag forwarded verbatim to the managed Worker (see body below);
			
 
				+  // the Worker validates it and falls back to its default for anything it doesn't recognize.
			
 
				+  const workerStyle = (process.env.CODEGRAPH_OFFLOAD_STYLE || '').trim();
			
 
				 
			
 
				   const controller = new AbortController();
			
 
				   const timer = setTimeout(() => controller.abort(), cfg.timeoutMs);
			
@@ -217,6 +220,10 @@ export async function synthesizeOffload({ query, context }: SynthArgs): Promise<
 
				         max_tokens: cfg.maxTokens,
			
 
				         temperature: 0.2,
			
 
				         reasoning_effort: cfg.effort,
			
 
				+        // Optional managed-tier flag, forwarded ONLY to the managed gateway (which strips it
			
 
				+        // before the upstream model call) and ONLY when an operator/eval sets it — so BYO
			
 
				+        // endpoints, which may reject unknown fields, never see it.
			
 
				+        ...(cfg.managed && workerStyle ? { offload_style: workerStyle } : {}),
			
 
				         messages: [
			
 
				           { role: 'system', content: system },
			
 
				           {