Merge remote-tracking branch 'origin/main' into v0.3.6-qa-upgrades

# Conflicts:
#	test/skill-e2e.test.ts
This commit is contained in:
Garry Tan
2026-03-14 02:35:48 -05:00
27 changed files with 1154 additions and 294 deletions

View File

@@ -65,6 +65,19 @@ describeEval('LLM-as-judge quality evals', () => {
expect(scores.actionability).toBeGreaterThanOrEqual(4);
}, 30_000);
test('setup block scores >= 4 on actionability and clarity', async () => {
const content = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8');
const setupStart = content.indexOf('## SETUP');
const setupEnd = content.indexOf('## IMPORTANT');
const section = content.slice(setupStart, setupEnd);
const scores = await judge('setup/binary discovery instructions', section);
console.log('Setup block scores:', JSON.stringify(scores, null, 2));
expect(scores.actionability).toBeGreaterThanOrEqual(4);
expect(scores.clarity).toBeGreaterThanOrEqual(4);
}, 30_000);
test('regression check: compare branch vs baseline quality', async () => {
// This test compares the generated output against the hand-maintained
// baseline from main. The generated version should score equal or higher.