mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-22 04:38:24 +08:00
feat: add eval:trend CLI for per-test pass rate tracking
computeTrends() classifies tests as stable-pass/stable-fail/flaky/ improving/degrading based on pass rate, flip count, and recent streak. gstack eval trend shows sparkline table with --limit, --tier, --test filters. Guard CLI main block with import.meta.main to prevent execution on import. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
192
lib/cli-eval.ts
192
lib/cli-eval.ts
@@ -258,6 +258,7 @@ async function cmdSummary(args: string[]): Promise<void> {
|
|||||||
if (flakyTests.length > 0) {
|
if (flakyTests.length > 0) {
|
||||||
console.log(` Flaky tests (${flakyTests.length}):`);
|
console.log(` Flaky tests (${flakyTests.length}):`);
|
||||||
for (const name of flakyTests) console.log(` - ${name}`);
|
for (const name of flakyTests) console.log(` - ${name}`);
|
||||||
|
console.log(` Run 'bun run eval:trend' for detailed time series.`);
|
||||||
console.log('─'.repeat(60));
|
console.log('─'.repeat(60));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -429,6 +430,191 @@ async function cmdWatch(): Promise<void> {
|
|||||||
process.exit(exitCode);
|
process.exit(exitCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Trend tracking ---
|
||||||
|
|
||||||
|
export interface TestTrend {
|
||||||
|
name: string;
|
||||||
|
tier: string;
|
||||||
|
results: Array<{ timestamp: string; passed: boolean }>;
|
||||||
|
passRate: number;
|
||||||
|
streak: { type: 'pass' | 'fail'; count: number };
|
||||||
|
flipCount: number;
|
||||||
|
status: 'stable-pass' | 'stable-fail' | 'flaky' | 'improving' | 'degrading';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute per-test pass rate trends from eval results.
|
||||||
|
* Pure function — no I/O. Results are ordered chronologically (oldest first).
|
||||||
|
*/
|
||||||
|
export function computeTrends(
|
||||||
|
results: EvalResult[],
|
||||||
|
filterTier?: string,
|
||||||
|
filterTest?: string,
|
||||||
|
): TestTrend[] {
|
||||||
|
// Build time series per test (chronological — oldest first)
|
||||||
|
const byTest = new Map<string, Array<{ timestamp: string; passed: boolean }>>();
|
||||||
|
|
||||||
|
// Results from loadEvalResults are newest-first, so reverse for chronological
|
||||||
|
const chronological = [...results].reverse();
|
||||||
|
|
||||||
|
for (const r of chronological) {
|
||||||
|
if (filterTier && r.tier !== filterTier) continue;
|
||||||
|
for (const t of r.tests) {
|
||||||
|
if (filterTest && t.name !== filterTest) continue;
|
||||||
|
const key = `${r.tier}:${t.name}`;
|
||||||
|
if (!byTest.has(key)) byTest.set(key, []);
|
||||||
|
byTest.get(key)!.push({ timestamp: r.timestamp, passed: t.passed });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const trends: TestTrend[] = [];
|
||||||
|
|
||||||
|
for (const [key, results] of byTest) {
|
||||||
|
const [tier, ...nameParts] = key.split(':');
|
||||||
|
const name = nameParts.join(':');
|
||||||
|
const total = results.length;
|
||||||
|
const passCount = results.filter(r => r.passed).length;
|
||||||
|
const passRate = total > 0 ? passCount / total : 0;
|
||||||
|
|
||||||
|
// Streak: walk from newest (end of array) backward
|
||||||
|
let streakType: 'pass' | 'fail' = results[results.length - 1].passed ? 'pass' : 'fail';
|
||||||
|
let streakCount = 0;
|
||||||
|
for (let i = results.length - 1; i >= 0; i--) {
|
||||||
|
const r = results[i].passed ? 'pass' : 'fail';
|
||||||
|
if (r === streakType) streakCount++;
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flip count: transitions between pass and fail
|
||||||
|
let flipCount = 0;
|
||||||
|
for (let i = 1; i < results.length; i++) {
|
||||||
|
if (results[i].passed !== results[i - 1].passed) flipCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Classify status
|
||||||
|
let status: TestTrend['status'];
|
||||||
|
const last3 = results.slice(-3);
|
||||||
|
const earlier = results.slice(0, -3);
|
||||||
|
const last3AllPass = last3.length >= 3 && last3.every(r => r.passed);
|
||||||
|
const last3HasFail = last3.some(r => !r.passed);
|
||||||
|
const earlierHadFailures = earlier.some(r => !r.passed);
|
||||||
|
const earlierWasPassing = earlier.length > 0 && earlier.every(r => r.passed);
|
||||||
|
|
||||||
|
// Check improving/degrading first — a clear recent trend outranks raw pass rate
|
||||||
|
if (last3AllPass && earlierHadFailures) {
|
||||||
|
status = 'improving';
|
||||||
|
} else if (last3HasFail && earlierWasPassing) {
|
||||||
|
status = 'degrading';
|
||||||
|
} else if (flipCount >= 3 || (passRate > 0.3 && passRate < 0.7)) {
|
||||||
|
status = 'flaky';
|
||||||
|
} else if (passRate >= 0.9 && flipCount <= 1) {
|
||||||
|
status = 'stable-pass';
|
||||||
|
} else if (passRate <= 0.1 && flipCount <= 1) {
|
||||||
|
status = 'stable-fail';
|
||||||
|
} else if (passRate >= 0.5) {
|
||||||
|
status = 'stable-pass';
|
||||||
|
} else {
|
||||||
|
status = 'stable-fail';
|
||||||
|
}
|
||||||
|
|
||||||
|
trends.push({
|
||||||
|
name, tier, results, passRate,
|
||||||
|
streak: { type: streakType, count: streakCount },
|
||||||
|
flipCount, status,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort: flaky first, then flipCount desc, then name
|
||||||
|
trends.sort((a, b) => {
|
||||||
|
const statusOrder = { flaky: 0, degrading: 1, improving: 2, 'stable-fail': 3, 'stable-pass': 4 };
|
||||||
|
const sa = statusOrder[a.status] ?? 5;
|
||||||
|
const sb = statusOrder[b.status] ?? 5;
|
||||||
|
if (sa !== sb) return sa - sb;
|
||||||
|
if (a.flipCount !== b.flipCount) return b.flipCount - a.flipCount;
|
||||||
|
return a.name.localeCompare(b.name);
|
||||||
|
});
|
||||||
|
|
||||||
|
return trends;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function cmdTrend(args: string[]): Promise<void> {
|
||||||
|
let limit = 10;
|
||||||
|
let filterTier: string | undefined;
|
||||||
|
let filterTest: string | undefined;
|
||||||
|
|
||||||
|
for (let i = 0; i < args.length; i++) {
|
||||||
|
if (args[i] === '--limit' && args[i + 1]) { limit = parseInt(args[++i], 10); }
|
||||||
|
else if (args[i] === '--tier' && args[i + 1]) { filterTier = args[++i]; }
|
||||||
|
else if (args[i] === '--test' && args[i + 1]) { filterTest = args[++i]; }
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = loadEvalResults<EvalResult>(undefined, limit);
|
||||||
|
if (results.length === 0) {
|
||||||
|
console.log('No eval runs yet. Run: EVALS=1 bun run test:evals');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const trends = computeTrends(results, filterTier, filterTest);
|
||||||
|
|
||||||
|
if (trends.length === 0) {
|
||||||
|
console.log('No test data matching filters.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine how many result columns to show
|
||||||
|
const maxResults = Math.min(limit, Math.max(...trends.map(t => t.results.length)));
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
console.log(`Test Trends (last ${results.length} runs)`);
|
||||||
|
console.log('═'.repeat(80));
|
||||||
|
console.log(
|
||||||
|
' ' +
|
||||||
|
'Test Name'.padEnd(36) +
|
||||||
|
'Rate'.padEnd(7) +
|
||||||
|
`Last ${maxResults}`.padEnd(maxResults + 3) +
|
||||||
|
'Streak'.padEnd(8) +
|
||||||
|
'Status'
|
||||||
|
);
|
||||||
|
console.log('─'.repeat(80));
|
||||||
|
|
||||||
|
let flakyCount = 0;
|
||||||
|
let degradingCount = 0;
|
||||||
|
|
||||||
|
for (const t of trends) {
|
||||||
|
if (t.status === 'flaky') flakyCount++;
|
||||||
|
if (t.status === 'degrading') degradingCount++;
|
||||||
|
|
||||||
|
const fullName = `${t.tier}:${t.name}`;
|
||||||
|
const displayName = fullName.length > 34 ? fullName.slice(0, 31) + '...' : fullName.padEnd(36);
|
||||||
|
const rate = `${Math.round(t.passRate * 100)}%`.padEnd(7);
|
||||||
|
|
||||||
|
// Build sparkline of last N results
|
||||||
|
const sparkline = t.results
|
||||||
|
.slice(-maxResults)
|
||||||
|
.map(r => r.passed ? '\u2713' : '\u2717')
|
||||||
|
.join('');
|
||||||
|
|
||||||
|
const streak = `${t.streak.count}${t.streak.type === 'pass' ? '\u2713' : '\u2717'}`.padEnd(8);
|
||||||
|
|
||||||
|
// Color status
|
||||||
|
let statusStr = t.status;
|
||||||
|
if (isTTY) {
|
||||||
|
if (t.status === 'flaky' || t.status === 'degrading') statusStr = red(t.status);
|
||||||
|
else if (t.status === 'stable-pass' || t.status === 'improving') statusStr = green(t.status);
|
||||||
|
else statusStr = dim(t.status);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(` ${displayName}${rate}${sparkline.padEnd(maxResults + 3)}${streak}${statusStr}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('─'.repeat(80));
|
||||||
|
const parts: string[] = [`${trends.length} tests tracked`];
|
||||||
|
if (flakyCount > 0) parts.push(`${flakyCount} flaky`);
|
||||||
|
if (degradingCount > 0) parts.push(`${degradingCount} degrading`);
|
||||||
|
console.log(` ${parts.join(' | ')}`);
|
||||||
|
console.log('');
|
||||||
|
}
|
||||||
|
|
||||||
function printUsage(): void {
|
function printUsage(): void {
|
||||||
console.log(`
|
console.log(`
|
||||||
gstack eval — eval management CLI
|
gstack eval — eval management CLI
|
||||||
@@ -441,13 +627,15 @@ Commands:
|
|||||||
summary [--limit N] Aggregate stats across all runs
|
summary [--limit N] Aggregate stats across all runs
|
||||||
push <file> Validate + save + sync an eval result
|
push <file> Validate + save + sync an eval result
|
||||||
cost <file> Show per-model cost breakdown
|
cost <file> Show per-model cost breakdown
|
||||||
|
trend [--limit N] [--tier X] [--test X] Per-test pass rate trends
|
||||||
cache read|write|stats|clear|verify Manage eval cache
|
cache read|write|stats|clear|verify Manage eval cache
|
||||||
watch Live E2E test dashboard
|
watch Live E2E test dashboard
|
||||||
`);
|
`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Main ---
|
// --- Main (only when run directly, not imported) ---
|
||||||
|
|
||||||
|
if (import.meta.main) {
|
||||||
const command = process.argv[2];
|
const command = process.argv[2];
|
||||||
const cmdArgs = process.argv.slice(3);
|
const cmdArgs = process.argv.slice(3);
|
||||||
|
|
||||||
@@ -457,6 +645,7 @@ switch (command) {
|
|||||||
case 'summary': cmdSummary(cmdArgs); break;
|
case 'summary': cmdSummary(cmdArgs); break;
|
||||||
case 'push': cmdPush(cmdArgs); break;
|
case 'push': cmdPush(cmdArgs); break;
|
||||||
case 'cost': cmdCost(cmdArgs); break;
|
case 'cost': cmdCost(cmdArgs); break;
|
||||||
|
case 'trend': cmdTrend(cmdArgs); break;
|
||||||
case 'cache': cmdCache(cmdArgs); break;
|
case 'cache': cmdCache(cmdArgs); break;
|
||||||
case 'watch': cmdWatch(); break;
|
case 'watch': cmdWatch(); break;
|
||||||
case '--help': case '-h': case 'help': case undefined:
|
case '--help': case '-h': case 'help': case undefined:
|
||||||
@@ -467,3 +656,4 @@ switch (command) {
|
|||||||
printUsage();
|
printUsage();
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -21,6 +21,7 @@
|
|||||||
"eval:list": "bun run lib/cli-eval.ts list",
|
"eval:list": "bun run lib/cli-eval.ts list",
|
||||||
"eval:compare": "bun run lib/cli-eval.ts compare",
|
"eval:compare": "bun run lib/cli-eval.ts compare",
|
||||||
"eval:summary": "bun run lib/cli-eval.ts summary",
|
"eval:summary": "bun run lib/cli-eval.ts summary",
|
||||||
|
"eval:trend": "bun run lib/cli-eval.ts trend",
|
||||||
"eval:watch": "bun run lib/cli-eval.ts watch"
|
"eval:watch": "bun run lib/cli-eval.ts watch"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|||||||
193
test/lib-eval-trend.test.ts
Normal file
193
test/lib-eval-trend.test.ts
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
/**
|
||||||
|
* Tests for computeTrends() — per-test pass rate trend tracking.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, test, expect } from 'bun:test';
|
||||||
|
import { computeTrends } from '../lib/cli-eval';
|
||||||
|
import type { EvalResult } from './helpers/eval-store';
|
||||||
|
|
||||||
|
/** Build a minimal EvalResult with given tests. */
|
||||||
|
function makeRun(opts: {
|
||||||
|
timestamp: string;
|
||||||
|
tier?: 'e2e' | 'llm-judge';
|
||||||
|
tests: Array<{ name: string; passed: boolean }>;
|
||||||
|
}): EvalResult {
|
||||||
|
return {
|
||||||
|
schema_version: 1,
|
||||||
|
version: '0.3.3',
|
||||||
|
branch: 'main',
|
||||||
|
git_sha: 'abc',
|
||||||
|
timestamp: opts.timestamp,
|
||||||
|
hostname: 'test',
|
||||||
|
tier: opts.tier || 'e2e',
|
||||||
|
total_tests: opts.tests.length,
|
||||||
|
passed: opts.tests.filter(t => t.passed).length,
|
||||||
|
failed: opts.tests.filter(t => !t.passed).length,
|
||||||
|
total_cost_usd: 0,
|
||||||
|
total_duration_ms: 0,
|
||||||
|
tests: opts.tests.map(t => ({
|
||||||
|
name: t.name, suite: 'test', tier: opts.tier || 'e2e' as const,
|
||||||
|
passed: t.passed, duration_ms: 0, cost_usd: 0,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('computeTrends', () => {
|
||||||
|
test('classifies stable-pass test correctly', () => {
|
||||||
|
// 10 runs all passing — results are newest-first (loadEvalResults order)
|
||||||
|
const results = Array.from({ length: 10 }, (_, i) => makeRun({
|
||||||
|
timestamp: `2026-03-${String(10 + i).padStart(2, '0')}T00:00:00Z`,
|
||||||
|
tests: [{ name: 'always-pass', passed: true }],
|
||||||
|
})).reverse(); // newest first
|
||||||
|
|
||||||
|
const trends = computeTrends(results);
|
||||||
|
expect(trends).toHaveLength(1);
|
||||||
|
expect(trends[0].status).toBe('stable-pass');
|
||||||
|
expect(trends[0].passRate).toBe(1);
|
||||||
|
expect(trends[0].streak).toEqual({ type: 'pass', count: 10 });
|
||||||
|
expect(trends[0].flipCount).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('classifies stable-fail test correctly', () => {
|
||||||
|
const results = Array.from({ length: 10 }, (_, i) => makeRun({
|
||||||
|
timestamp: `2026-03-${String(10 + i).padStart(2, '0')}T00:00:00Z`,
|
||||||
|
tests: [{ name: 'always-fail', passed: false }],
|
||||||
|
})).reverse();
|
||||||
|
|
||||||
|
const trends = computeTrends(results);
|
||||||
|
expect(trends[0].status).toBe('stable-fail');
|
||||||
|
expect(trends[0].passRate).toBe(0);
|
||||||
|
expect(trends[0].streak).toEqual({ type: 'fail', count: 10 });
|
||||||
|
});
|
||||||
|
|
||||||
|
test('classifies flaky test correctly — alternating pass/fail', () => {
|
||||||
|
const results = Array.from({ length: 10 }, (_, i) => makeRun({
|
||||||
|
timestamp: `2026-03-${String(10 + i).padStart(2, '0')}T00:00:00Z`,
|
||||||
|
tests: [{ name: 'flaky', passed: i % 2 === 0 }],
|
||||||
|
})).reverse();
|
||||||
|
|
||||||
|
const trends = computeTrends(results);
|
||||||
|
expect(trends[0].status).toBe('flaky');
|
||||||
|
expect(trends[0].flipCount).toBe(9);
|
||||||
|
expect(trends[0].passRate).toBe(0.5);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('classifies improving test correctly', () => {
|
||||||
|
// First 5 fail, last 5 pass
|
||||||
|
const results = Array.from({ length: 10 }, (_, i) => makeRun({
|
||||||
|
timestamp: `2026-03-${String(10 + i).padStart(2, '0')}T00:00:00Z`,
|
||||||
|
tests: [{ name: 'improving', passed: i >= 5 }],
|
||||||
|
})).reverse();
|
||||||
|
|
||||||
|
const trends = computeTrends(results);
|
||||||
|
expect(trends[0].status).toBe('improving');
|
||||||
|
expect(trends[0].streak).toEqual({ type: 'pass', count: 5 });
|
||||||
|
});
|
||||||
|
|
||||||
|
test('classifies degrading test correctly', () => {
|
||||||
|
// First 7 pass, last 3 fail
|
||||||
|
const results = Array.from({ length: 10 }, (_, i) => makeRun({
|
||||||
|
timestamp: `2026-03-${String(10 + i).padStart(2, '0')}T00:00:00Z`,
|
||||||
|
tests: [{ name: 'degrading', passed: i < 7 }],
|
||||||
|
})).reverse();
|
||||||
|
|
||||||
|
const trends = computeTrends(results);
|
||||||
|
expect(trends[0].status).toBe('degrading');
|
||||||
|
expect(trends[0].streak).toEqual({ type: 'fail', count: 3 });
|
||||||
|
});
|
||||||
|
|
||||||
|
test('computes streak correctly with mixed ending', () => {
|
||||||
|
// pass, pass, fail, pass, pass, pass (newest)
|
||||||
|
const passed = [true, true, false, true, true, true];
|
||||||
|
const results = passed.map((p, i) => makeRun({
|
||||||
|
timestamp: `2026-03-${String(10 + i).padStart(2, '0')}T00:00:00Z`,
|
||||||
|
tests: [{ name: 'test', passed: p }],
|
||||||
|
})).reverse();
|
||||||
|
|
||||||
|
const trends = computeTrends(results);
|
||||||
|
expect(trends[0].streak).toEqual({ type: 'pass', count: 3 });
|
||||||
|
});
|
||||||
|
|
||||||
|
test('computes flipCount correctly', () => {
|
||||||
|
// pass, fail, pass, pass, fail, pass → 4 flips
|
||||||
|
const passed = [true, false, true, true, false, true];
|
||||||
|
const results = passed.map((p, i) => makeRun({
|
||||||
|
timestamp: `2026-03-${String(10 + i).padStart(2, '0')}T00:00:00Z`,
|
||||||
|
tests: [{ name: 'test', passed: p }],
|
||||||
|
})).reverse();
|
||||||
|
|
||||||
|
const trends = computeTrends(results);
|
||||||
|
expect(trends[0].flipCount).toBe(4);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles single run', () => {
|
||||||
|
const results = [makeRun({
|
||||||
|
timestamp: '2026-03-15T00:00:00Z',
|
||||||
|
tests: [{ name: 'single', passed: true }],
|
||||||
|
})];
|
||||||
|
|
||||||
|
const trends = computeTrends(results);
|
||||||
|
expect(trends).toHaveLength(1);
|
||||||
|
expect(trends[0].passRate).toBe(1);
|
||||||
|
expect(trends[0].streak).toEqual({ type: 'pass', count: 1 });
|
||||||
|
expect(trends[0].flipCount).toBe(0);
|
||||||
|
expect(trends[0].status).toBe('stable-pass');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles single failing run', () => {
|
||||||
|
const results = [makeRun({
|
||||||
|
timestamp: '2026-03-15T00:00:00Z',
|
||||||
|
tests: [{ name: 'single-fail', passed: false }],
|
||||||
|
})];
|
||||||
|
|
||||||
|
const trends = computeTrends(results);
|
||||||
|
expect(trends[0].status).toBe('stable-fail');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('filters by tier', () => {
|
||||||
|
const results = [
|
||||||
|
makeRun({ timestamp: '2026-03-15T00:00:00Z', tier: 'e2e', tests: [{ name: 'e2e-test', passed: true }] }),
|
||||||
|
makeRun({ timestamp: '2026-03-15T00:00:00Z', tier: 'llm-judge', tests: [{ name: 'judge-test', passed: true }] }),
|
||||||
|
];
|
||||||
|
|
||||||
|
const e2eOnly = computeTrends(results, 'e2e');
|
||||||
|
expect(e2eOnly).toHaveLength(1);
|
||||||
|
expect(e2eOnly[0].name).toBe('e2e-test');
|
||||||
|
|
||||||
|
const judgeOnly = computeTrends(results, 'llm-judge');
|
||||||
|
expect(judgeOnly).toHaveLength(1);
|
||||||
|
expect(judgeOnly[0].name).toBe('judge-test');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('filters by test name', () => {
|
||||||
|
const results = Array.from({ length: 3 }, (_, i) => makeRun({
|
||||||
|
timestamp: `2026-03-${String(10 + i).padStart(2, '0')}T00:00:00Z`,
|
||||||
|
tests: [
|
||||||
|
{ name: 'test-a', passed: true },
|
||||||
|
{ name: 'test-b', passed: false },
|
||||||
|
],
|
||||||
|
})).reverse();
|
||||||
|
|
||||||
|
const filtered = computeTrends(results, undefined, 'test-a');
|
||||||
|
expect(filtered).toHaveLength(1);
|
||||||
|
expect(filtered[0].name).toBe('test-a');
|
||||||
|
expect(filtered[0].passRate).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('sorts flaky tests first', () => {
|
||||||
|
// Create runs where test-a is flaky and test-b is stable
|
||||||
|
const results = Array.from({ length: 6 }, (_, i) => makeRun({
|
||||||
|
timestamp: `2026-03-${String(10 + i).padStart(2, '0')}T00:00:00Z`,
|
||||||
|
tests: [
|
||||||
|
{ name: 'test-a', passed: i % 2 === 0 }, // flaky: alternating
|
||||||
|
{ name: 'test-b', passed: true }, // stable-pass
|
||||||
|
],
|
||||||
|
})).reverse();
|
||||||
|
|
||||||
|
const trends = computeTrends(results);
|
||||||
|
expect(trends[0].name).toBe('test-a');
|
||||||
|
expect(trends[0].status).toBe('flaky');
|
||||||
|
expect(trends[1].name).toBe('test-b');
|
||||||
|
expect(trends[1].status).toBe('stable-pass');
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user