4 dienas atpakaļ · 6d5cb6b25c
--- a/__tests__/offload.test.ts
+++ b/__tests__/offload.test.ts
@@ -34,7 +34,7 @@ describe('reasoning offload', () => {
 
				     'CODEGRAPH_OFFLOAD_URL', 'CODEGRAPH_OFFLOAD_MODEL', 'CODEGRAPH_OFFLOAD_KEY',
			
 
				     'CODEGRAPH_OFFLOAD_EFFORT', 'CODEGRAPH_OFFLOAD_STYLE', 'CODEGRAPH_OFFLOAD_TIMEOUT_MS',
			
 
				     'CODEGRAPH_OFFLOAD_MAXTOKENS', 'CODEGRAPH_OFFLOAD_STRIP', 'CODEGRAPH_OFFLOAD_DEBUG',
			
 
				-    'CEREBRAS_API_KEY',
			
 
				+    'CODEGRAPH_OFFLOAD_DISABLE', 'CODEGRAPH_OFFLOAD_USAGE_LOG', 'CEREBRAS_API_KEY',
			
 
				   ];
			
 
				   let saved: Record<string, string | undefined>;
			
 
				 
			
@@ -118,6 +118,64 @@ describe('reasoning offload', () => {
 
				     });
			
 
				   });
			
 
				 
			
 
				+  describe('CODEGRAPH_OFFLOAD_DISABLE kill-switch', () => {
			
 
				+    it('forces the offload off even when managed + signed in', () => {
			
 
				+      writeOffloadConfig({ managed: true });
			
 
				+      writeOffloadToken('cgai_live');
			
 
				+      expect(resolveOffload().enabled).toBe(true); // sanity: on without the flag
			
 
				+      process.env.CODEGRAPH_OFFLOAD_DISABLE = '1';
			
 
				+      const c = resolveOffload();
			
 
				+      expect(c.enabled).toBe(false);
			
 
				+      expect(c.managed).toBe(false);
			
 
				+      expect(c.origin).toBe('none');
			
 
				+      expect(isOffloadEnabled()).toBe(false);
			
 
				+    });
			
 
				+
			
 
				+    it('forces the offload off even with a BYO endpoint + key', () => {
			
 
				+      process.env.CODEGRAPH_OFFLOAD_URL = 'https://env.example/v1';
			
 
				+      process.env.CODEGRAPH_OFFLOAD_KEY = 'sk-direct';
			
 
				+      expect(resolveOffload().enabled).toBe(true);
			
 
				+      process.env.CODEGRAPH_OFFLOAD_DISABLE = '1';
			
 
				+      expect(resolveOffload().enabled).toBe(false);
			
 
				+    });
			
 
				+  });
			
 
				+
			
 
				+  describe('per-call usage log (CODEGRAPH_OFFLOAD_USAGE_LOG)', () => {
			
 
				+    const okResponse = () => ({
			
 
				+      ok: true, status: 200,
			
 
				+      headers: { get: (h: string) => (h === 'x-cg-credits-charged' ? '127' : null) },
			
 
				+      json: async () => ({
			
 
				+        choices: [{ message: { content: 'Coverage: full.\nThe answer.' }, finish_reason: 'stop' }],
			
 
				+        usage: { prompt_tokens: 700, completion_tokens: 80, total_tokens: 780 },
			
 
				+      }),
			
 
				+    });
			
 
				+
			
 
				+    it('appends one JSON line with tokens + charged credits when the log path is set', async () => {
			
 
				+      writeOffloadConfig({ url: 'https://api.cerebras.ai/v1', keyEnv: 'CEREBRAS_API_KEY' });
			
 
				+      process.env.CEREBRAS_API_KEY = 'sk-live';
			
 
				+      vi.stubGlobal('fetch', vi.fn().mockResolvedValue(okResponse()));
			
 
				+      const logPath = path.join(home, 'usage.jsonl');
			
 
				+      process.env.CODEGRAPH_OFFLOAD_USAGE_LOG = logPath;
			
 
				+
			
 
				+      await synthesizeOffload({ query: 'q', context: 'src' });
			
 
				+      const line = JSON.parse(fs.readFileSync(logPath, 'utf8').trim());
			
 
				+      expect(line.totalTokens).toBe(780);
			
 
				+      expect(line.promptTokens).toBe(700);
			
 
				+      expect(line.creditsCharged).toBe(127);
			
 
				+      expect(line.costUsd).toBeCloseTo(0.00127, 6); // 100k credits = $1
			
 
				+      expect(line.answerLen).toBeGreaterThan(0);
			
 
				+    });
			
 
				+
			
 
				+    it('is a no-op (and never throws) when the log path is unset', async () => {
			
 
				+      writeOffloadConfig({ url: 'https://api.cerebras.ai/v1', keyEnv: 'CEREBRAS_API_KEY' });
			
 
				+      process.env.CEREBRAS_API_KEY = 'sk-live';
			
 
				+      vi.stubGlobal('fetch', vi.fn().mockResolvedValue(okResponse()));
			
 
				+      // no CODEGRAPH_OFFLOAD_USAGE_LOG set → answer still returns fine
			
 
				+      const out = await synthesizeOffload({ query: 'q', context: 'src' });
			
 
				+      expect(out).toContain('Coverage: full.');
			
 
				+    });
			
 
				+  });
			
 
				+
			
 
				   describe('strict degradation (never throws, returns null to fall back)', () => {
			
 
				     it('returns null when no endpoint is configured', async () => {
			
 
				       expect(await synthesizeOffload({ query: 'q', context: 'ctx' })).toBeNull();
			
--- a/src/bin/codegraph.ts
+++ b/src/bin/codegraph.ts
@@ -1382,8 +1382,14 @@ program
 
				       success('Signed in to CodeGraph AI — managed reasoning is on.');
			
 
				       try {
			
 
				         const usage = await fetchUsage();
			
 
				-        if (usage && typeof usage.remaining === 'number') {
			
 
				-          info(`  credits: ${usage.remaining.toLocaleString()} remaining`);
			
 
				+        if (usage) {
			
 
				+          // Mirror `codegraph usage`'s precedence: a comped/internal account is
			
 
				+          // flagged `unlimited` (often with remaining:0 when no allowance is set),
			
 
				+          // so check that before the numeric balance or it reads "0 remaining".
			
 
				+          if (usage.banned) warn('  Account suspended — contact support.');
			
 
				+          else if (usage.unlimited) info('  credits: unlimited');
			
 
				+          else if (typeof usage.remaining === 'number')
			
 
				+            info(`  credits: ${usage.remaining.toLocaleString()} remaining`);
			
 
				         }
			
 
				       } catch {
			
 
				         /* balance is best-effort */
			
--- a/src/reasoning/config.ts
+++ b/src/reasoning/config.ts
@@ -102,6 +102,17 @@ const trimmed = (v: string | undefined): string | undefined => {
 
				 
			
 
				 /** Merge the persisted config with `CODEGRAPH_OFFLOAD_*` env overrides (env wins). */
			
 
				 export function resolveOffload(env: NodeJS.ProcessEnv = process.env): ResolvedOffload {
			
 
				+  // Hard kill-switch: disable the offload for this process/session without touching
			
 
				+  // the persisted config or the stored login — e.g. one A/B arm, or a user who wants
			
 
				+  // codegraph_explore to return raw source for a session. Env-only by design.
			
 
				+  if (env.CODEGRAPH_OFFLOAD_DISABLE === '1') {
			
 
				+    return {
			
 
				+      enabled: false, managed: false, url: undefined, model: MANAGED_DEFAULT_MODEL,
			
 
				+      apiKey: undefined, keySource: undefined, effort: 'low', style: 'plain',
			
 
				+      timeoutMs: 20000, maxTokens: 12000, strip: false,
			
 
				+      debug: env.CODEGRAPH_OFFLOAD_DEBUG === '1', origin: 'none',
			
 
				+    };
			
 
				+  }
			
 
				   const c = readOffloadConfig();
			
 
				   const managed = !!c.managed;
			
 
				   const envUrl = trimmed(env.CODEGRAPH_OFFLOAD_URL);
			
--- a/src/reasoning/reasoner.ts
+++ b/src/reasoning/reasoner.ts
@@ -28,6 +28,7 @@
 
				  * result — a broken offload must be invisible to the agent (one isError early in a
			
 
				  * session and an agent can abandon the tool entirely).
			
 
				  */
			
 
				+import * as fs from 'fs';
			
 
				 import { resolveOffload } from './config';
			
 
				 
			
 
				 interface SynthArgs {
			
@@ -87,6 +88,23 @@ function debug(...args: unknown[]): void {
 
				   }
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Append one JSON line of per-call offload usage to `CODEGRAPH_OFFLOAD_USAGE_LOG`
			
 
				+ * when that env var is set (otherwise a no-op). Lets a harness attribute CodeGraph AI
			
 
				+ * tokens + cost to a single run without depending on the metered server's cumulative
			
 
				+ * totals. Best-effort: a write failure is logged under debug and never disrupts the
			
 
				+ * tool call (the offload is strictly degradable, and so is its bookkeeping).
			
 
				+ */
			
 
				+function recordUsage(entry: Record<string, unknown>): void {
			
 
				+  const logPath = process.env.CODEGRAPH_OFFLOAD_USAGE_LOG;
			
 
				+  if (!logPath) return;
			
 
				+  try {
			
 
				+    fs.appendFileSync(logPath, JSON.stringify(entry) + '\n');
			
 
				+  } catch (err) {
			
 
				+    debug('usage-log write failed', (err as Error)?.message);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 // Shared preamble: the model is a pure analysis function, never an agent.
			
 
				 // CORRECTNESS-FIRST — a synthesized answer is only useful if it is never wrong,
			
 
				 // and NEVER confidently wrong. The calibration below is the load-bearing part.
			
@@ -215,14 +233,39 @@ export async function synthesizeOffload({ query, context }: SynthArgs): Promise<
 
				     }
			
 
				     const data = (await res.json()) as {
			
 
				       choices?: Array<{ message?: { content?: string }; finish_reason?: string }>;
			
 
				+      usage?: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
			
 
				     };
			
 
				+    // Per-call usage/cost capture. The managed gateway returns the spend in the
			
 
				+    // `x-cg-credits-charged` header (100k credits = $1) and the token counts in the
			
 
				+    // standard OpenAI `usage` block; a BYO endpoint typically returns `usage` only.
			
 
				+    // This is the source of truth for "CodeGraph AI tokens + cost" per run.
			
 
				+    // Optional chaining: usage bookkeeping must NEVER break the degradable path,
			
 
				+    // even if a response/mock lacks a standard headers object.
			
 
				+    const creditsCharged = Number(res.headers?.get?.('x-cg-credits-charged'));
			
 
				     const answer = data.choices?.[0]?.message?.content?.trim();
			
 
				+    recordUsage({
			
 
				+      ts: new Date().toISOString(),
			
 
				+      ms: Date.now() - started,
			
 
				+      model: cfg.model,
			
 
				+      style: cfg.style,
			
 
				+      managed: cfg.managed,
			
 
				+      promptTokens: data.usage?.prompt_tokens ?? null,
			
 
				+      completionTokens: data.usage?.completion_tokens ?? null,
			
 
				+      totalTokens: data.usage?.total_tokens ?? null,
			
 
				+      creditsCharged: Number.isFinite(creditsCharged) ? creditsCharged : null,
			
 
				+      costUsd: Number.isFinite(creditsCharged) ? creditsCharged / 100_000 : null,
			
 
				+      queryLen: query.length,
			
 
				+      ctxLen: ctx.length,
			
 
				+      rawCtxLen: context.length,
			
 
				+      answerLen: answer?.length ?? 0,
			
 
				+      finishReason: data.choices?.[0]?.finish_reason ?? null,
			
 
				+    });
			
 
				     if (!answer) {
			
 
				       debug('empty answer', JSON.stringify(data).slice(0, 200));
			
 
				       return null;
			
 
				     }
			
 
				     debug(
			
 
				-      `ok in ${Date.now() - started}ms [${cfg.style}] — answer ${answer.length} chars (ctx ${ctx.length} of ${context.length}, finish=${data.choices?.[0]?.finish_reason})`
			
 
				+      `ok in ${Date.now() - started}ms [${cfg.style}] — answer ${answer.length} chars (ctx ${ctx.length} of ${context.length}, finish=${data.choices?.[0]?.finish_reason}), ${data.usage?.total_tokens ?? '?'} tok, ${Number.isFinite(creditsCharged) ? creditsCharged + ' cr' : 'no-charge-hdr'}`
			
 
				     );
			
 
				     return answer + footer;
			
 
				   } catch (err) {