Pārlūkot izejas kodu

feat(reasoning): add `CODEGRAPH_OFFLOAD_DISABLE` kill-switch and per-call usage log

`CODEGRAPH_OFFLOAD_DISABLE=1` immediately disables the offload for the current
process without touching the persisted config or stored login — useful for A/B
arms or sessions where raw source is preferred.

`CODEGRAPH_OFFLOAD_USAGE_LOG=` appends one JSONL entry per call with token
counts, charged credits, and derived cost (`creditsCharged / 100_000`) so a
harness can attribute CodeGraph AI spend to a single run independently of the
server's cumulative totals. Both features are best-effort and never disrupt the
degradable offload path.

Also fixes the `login` credit display to check `unlimited` before the numeric
balance, so comped/internal accounts don't incorrectly show "0 remaining".
Colby McHenry 4 dienas atpakaļ
vecāks
revīzija
6d5cb6b25c
4 mainītis faili ar 122 papildinājumiem un 4 dzēšanām
  1. 59 1
      __tests__/offload.test.ts
  2. 8 2
      src/bin/codegraph.ts
  3. 11 0
      src/reasoning/config.ts
  4. 44 1
      src/reasoning/reasoner.ts

+ 59 - 1
__tests__/offload.test.ts

@@ -34,7 +34,7 @@ describe('reasoning offload', () => {
     'CODEGRAPH_OFFLOAD_URL', 'CODEGRAPH_OFFLOAD_MODEL', 'CODEGRAPH_OFFLOAD_KEY',
     'CODEGRAPH_OFFLOAD_EFFORT', 'CODEGRAPH_OFFLOAD_STYLE', 'CODEGRAPH_OFFLOAD_TIMEOUT_MS',
     'CODEGRAPH_OFFLOAD_MAXTOKENS', 'CODEGRAPH_OFFLOAD_STRIP', 'CODEGRAPH_OFFLOAD_DEBUG',
-    'CEREBRAS_API_KEY',
+    'CODEGRAPH_OFFLOAD_DISABLE', 'CODEGRAPH_OFFLOAD_USAGE_LOG', 'CEREBRAS_API_KEY',
   ];
   let saved: Record<string, string | undefined>;
 
@@ -118,6 +118,64 @@ describe('reasoning offload', () => {
     });
   });
 
+  describe('CODEGRAPH_OFFLOAD_DISABLE kill-switch', () => {
+    it('forces the offload off even when managed + signed in', () => {
+      writeOffloadConfig({ managed: true });
+      writeOffloadToken('cgai_live');
+      expect(resolveOffload().enabled).toBe(true); // sanity: on without the flag
+      process.env.CODEGRAPH_OFFLOAD_DISABLE = '1';
+      const c = resolveOffload();
+      expect(c.enabled).toBe(false);
+      expect(c.managed).toBe(false);
+      expect(c.origin).toBe('none');
+      expect(isOffloadEnabled()).toBe(false);
+    });
+
+    it('forces the offload off even with a BYO endpoint + key', () => {
+      process.env.CODEGRAPH_OFFLOAD_URL = 'https://env.example/v1';
+      process.env.CODEGRAPH_OFFLOAD_KEY = 'sk-direct';
+      expect(resolveOffload().enabled).toBe(true);
+      process.env.CODEGRAPH_OFFLOAD_DISABLE = '1';
+      expect(resolveOffload().enabled).toBe(false);
+    });
+  });
+
+  describe('per-call usage log (CODEGRAPH_OFFLOAD_USAGE_LOG)', () => {
+    const okResponse = () => ({
+      ok: true, status: 200,
+      headers: { get: (h: string) => (h === 'x-cg-credits-charged' ? '127' : null) },
+      json: async () => ({
+        choices: [{ message: { content: 'Coverage: full.\nThe answer.' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 700, completion_tokens: 80, total_tokens: 780 },
+      }),
+    });
+
+    it('appends one JSON line with tokens + charged credits when the log path is set', async () => {
+      writeOffloadConfig({ url: 'https://api.cerebras.ai/v1', keyEnv: 'CEREBRAS_API_KEY' });
+      process.env.CEREBRAS_API_KEY = 'sk-live';
+      vi.stubGlobal('fetch', vi.fn().mockResolvedValue(okResponse()));
+      const logPath = path.join(home, 'usage.jsonl');
+      process.env.CODEGRAPH_OFFLOAD_USAGE_LOG = logPath;
+
+      await synthesizeOffload({ query: 'q', context: 'src' });
+      const line = JSON.parse(fs.readFileSync(logPath, 'utf8').trim());
+      expect(line.totalTokens).toBe(780);
+      expect(line.promptTokens).toBe(700);
+      expect(line.creditsCharged).toBe(127);
+      expect(line.costUsd).toBeCloseTo(0.00127, 6); // 100k credits = $1
+      expect(line.answerLen).toBeGreaterThan(0);
+    });
+
+    it('is a no-op (and never throws) when the log path is unset', async () => {
+      writeOffloadConfig({ url: 'https://api.cerebras.ai/v1', keyEnv: 'CEREBRAS_API_KEY' });
+      process.env.CEREBRAS_API_KEY = 'sk-live';
+      vi.stubGlobal('fetch', vi.fn().mockResolvedValue(okResponse()));
+      // no CODEGRAPH_OFFLOAD_USAGE_LOG set → answer still returns fine
+      const out = await synthesizeOffload({ query: 'q', context: 'src' });
+      expect(out).toContain('Coverage: full.');
+    });
+  });
+
   describe('strict degradation (never throws, returns null to fall back)', () => {
     it('returns null when no endpoint is configured', async () => {
       expect(await synthesizeOffload({ query: 'q', context: 'ctx' })).toBeNull();

+ 8 - 2
src/bin/codegraph.ts

@@ -1382,8 +1382,14 @@ program
       success('Signed in to CodeGraph AI — managed reasoning is on.');
       try {
         const usage = await fetchUsage();
-        if (usage && typeof usage.remaining === 'number') {
-          info(`  credits: ${usage.remaining.toLocaleString()} remaining`);
+        if (usage) {
+          // Mirror `codegraph usage`'s precedence: a comped/internal account is
+          // flagged `unlimited` (often with remaining:0 when no allowance is set),
+          // so check that before the numeric balance or it reads "0 remaining".
+          if (usage.banned) warn('  Account suspended — contact support.');
+          else if (usage.unlimited) info('  credits: unlimited');
+          else if (typeof usage.remaining === 'number')
+            info(`  credits: ${usage.remaining.toLocaleString()} remaining`);
         }
       } catch {
         /* balance is best-effort */

+ 11 - 0
src/reasoning/config.ts

@@ -102,6 +102,17 @@ const trimmed = (v: string | undefined): string | undefined => {
 
 /** Merge the persisted config with `CODEGRAPH_OFFLOAD_*` env overrides (env wins). */
 export function resolveOffload(env: NodeJS.ProcessEnv = process.env): ResolvedOffload {
+  // Hard kill-switch: disable the offload for this process/session without touching
+  // the persisted config or the stored login — e.g. one A/B arm, or a user who wants
+  // codegraph_explore to return raw source for a session. Env-only by design.
+  if (env.CODEGRAPH_OFFLOAD_DISABLE === '1') {
+    return {
+      enabled: false, managed: false, url: undefined, model: MANAGED_DEFAULT_MODEL,
+      apiKey: undefined, keySource: undefined, effort: 'low', style: 'plain',
+      timeoutMs: 20000, maxTokens: 12000, strip: false,
+      debug: env.CODEGRAPH_OFFLOAD_DEBUG === '1', origin: 'none',
+    };
+  }
   const c = readOffloadConfig();
   const managed = !!c.managed;
   const envUrl = trimmed(env.CODEGRAPH_OFFLOAD_URL);

+ 44 - 1
src/reasoning/reasoner.ts

@@ -28,6 +28,7 @@
  * result — a broken offload must be invisible to the agent (one isError early in a
  * session and an agent can abandon the tool entirely).
  */
+import * as fs from 'fs';
 import { resolveOffload } from './config';
 
 interface SynthArgs {
@@ -87,6 +88,23 @@ function debug(...args: unknown[]): void {
   }
 }
 
+/**
+ * Append one JSON line of per-call offload usage to `CODEGRAPH_OFFLOAD_USAGE_LOG`
+ * when that env var is set (otherwise a no-op). Lets a harness attribute CodeGraph AI
+ * tokens + cost to a single run without depending on the metered server's cumulative
+ * totals. Best-effort: a write failure is logged under debug and never disrupts the
+ * tool call (the offload is strictly degradable, and so is its bookkeeping).
+ */
+function recordUsage(entry: Record<string, unknown>): void {
+  const logPath = process.env.CODEGRAPH_OFFLOAD_USAGE_LOG;
+  if (!logPath) return;
+  try {
+    fs.appendFileSync(logPath, JSON.stringify(entry) + '\n');
+  } catch (err) {
+    debug('usage-log write failed', (err as Error)?.message);
+  }
+}
+
 // Shared preamble: the model is a pure analysis function, never an agent.
 // CORRECTNESS-FIRST — a synthesized answer is only useful if it is never wrong,
 // and NEVER confidently wrong. The calibration below is the load-bearing part.
@@ -215,14 +233,39 @@ export async function synthesizeOffload({ query, context }: SynthArgs): Promise<
     }
     const data = (await res.json()) as {
       choices?: Array<{ message?: { content?: string }; finish_reason?: string }>;
+      usage?: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
     };
+    // Per-call usage/cost capture. The managed gateway returns the spend in the
+    // `x-cg-credits-charged` header (100k credits = $1) and the token counts in the
+    // standard OpenAI `usage` block; a BYO endpoint typically returns `usage` only.
+    // This is the source of truth for "CodeGraph AI tokens + cost" per run.
+    // Optional chaining: usage bookkeeping must NEVER break the degradable path,
+    // even if a response/mock lacks a standard headers object.
+    const creditsCharged = Number(res.headers?.get?.('x-cg-credits-charged'));
     const answer = data.choices?.[0]?.message?.content?.trim();
+    recordUsage({
+      ts: new Date().toISOString(),
+      ms: Date.now() - started,
+      model: cfg.model,
+      style: cfg.style,
+      managed: cfg.managed,
+      promptTokens: data.usage?.prompt_tokens ?? null,
+      completionTokens: data.usage?.completion_tokens ?? null,
+      totalTokens: data.usage?.total_tokens ?? null,
+      creditsCharged: Number.isFinite(creditsCharged) ? creditsCharged : null,
+      costUsd: Number.isFinite(creditsCharged) ? creditsCharged / 100_000 : null,
+      queryLen: query.length,
+      ctxLen: ctx.length,
+      rawCtxLen: context.length,
+      answerLen: answer?.length ?? 0,
+      finishReason: data.choices?.[0]?.finish_reason ?? null,
+    });
     if (!answer) {
       debug('empty answer', JSON.stringify(data).slice(0, 200));
       return null;
     }
     debug(
-      `ok in ${Date.now() - started}ms [${cfg.style}] — answer ${answer.length} chars (ctx ${ctx.length} of ${context.length}, finish=${data.choices?.[0]?.finish_reason})`
+      `ok in ${Date.now() - started}ms [${cfg.style}] — answer ${answer.length} chars (ctx ${ctx.length} of ${context.length}, finish=${data.choices?.[0]?.finish_reason}), ${data.usage?.total_tokens ?? '?'} tok, ${Number.isFinite(creditsCharged) ? creditsCharged + ' cr' : 'no-charge-hdr'}`
     );
     return answer + footer;
   } catch (err) {