diff --git a/.agents/skills/mle-workflow/SKILL.md b/.agents/skills/mle-workflow/SKILL.md index 979179af..19223378 100644 --- a/.agents/skills/mle-workflow/SKILL.md +++ b/.agents/skills/mle-workflow/SKILL.md @@ -37,6 +37,8 @@ Use only the lanes that fit the system in front of you. This skill is useful for Do not treat MLE as separate from software engineering. Most ECC SWE workflows apply directly to ML systems, often with stricter failure modes: +The recommended `minimal --with capability:machine-learning` install keeps the core agent surface available alongside this skill. For skill-only or agent-limited harnesses, pair `skill:mle-workflow` with `agent:mle-reviewer` where the target supports agents. + | SWE surface | MLE use | |-------------|---------| | `product-capability` / `architecture-decision-records` | Turn model work into explicit product contracts and record irreversible data, model, and rollout choices | diff --git a/manifests/install-modules.json b/manifests/install-modules.json index c7cfa83f..0178a873 100644 --- a/manifests/install-modules.json +++ b/manifests/install-modules.json @@ -587,7 +587,9 @@ "antigravity", "codex", "opencode", - "codebuddy" + "codebuddy", + "joycode", + "qwen" ], "dependencies": [ "framework-language", diff --git a/scripts/consult.js b/scripts/consult.js index b8159ba3..f3d9c1fa 100644 --- a/scripts/consult.js +++ b/scripts/consult.js @@ -11,6 +11,21 @@ const DEFAULT_TARGET = 'claude'; const DEFAULT_LIMIT = 5; const MAX_LIMIT = 20; const SCHEMA_VERSION = 'ecc.consult.v1'; +const FUZZY_EXCLUDED_TOKENS = new Set(['review']); +const MACHINE_LEARNING_CONTEXT_TOKENS = new Set([ + 'data-science', + 'evals', + 'evaluation', + 'inference', + 'ml', + 'mle', + 'mlops', + 'model', + 'models', + 'pytorch', + 'serving', + 'training', +]); const STOP_WORDS = new Set([ 'a', @@ -74,6 +89,7 @@ const COMPONENT_ALIASES = Object.freeze({ 'mlops', 'model', 'models', + 'pytorch', 'training', 'inference', 'serving', @@ -252,6 +268,7 @@ function scoreAgainstQuery(queryTokens, corpusTokens, options = {}) { if ( token.length >= 4 + && !FUZZY_EXCLUDED_TOKENS.has(token) && [...corpus].some(corpusToken => ( corpusToken.length >= 4 && (corpusToken.includes(token) || token.includes(corpusToken)) @@ -272,6 +289,7 @@ function scoreAgainstQuery(queryTokens, corpusTokens, options = {}) { function preferredComponentBonus(component, queryTokens) { let bonus = 0; const suffix = component.id.split(':')[1]; + const hasMachineLearningContext = queryTokens.some(token => MACHINE_LEARNING_CONTEXT_TOKENS.has(token)); if (queryTokens[0] === suffix) { bonus += 5; @@ -281,7 +299,17 @@ function preferredComponentBonus(component, queryTokens) { bonus += 3; } - if (component.id === 'capability:security' && queryTokens.some(token => ['audit', 'review', 'security'].includes(token))) { + if (component.id === 'agent:mle-reviewer' && hasMachineLearningContext) { + bonus += 2; + } + + if ( + component.id === 'capability:security' + && ( + queryTokens.some(token => ['audit', 'security', 'threat', 'vulnerability'].includes(token)) + || (!hasMachineLearningContext && queryTokens.includes('review')) + ) + ) { bonus += 4; } diff --git a/skills/mle-workflow/SKILL.md b/skills/mle-workflow/SKILL.md index 8df72319..e2626628 100644 --- a/skills/mle-workflow/SKILL.md +++ b/skills/mle-workflow/SKILL.md @@ -37,6 +37,8 @@ Use only the lanes that fit the system in front of you. This skill is useful for Do not treat MLE as separate from software engineering. Most ECC SWE workflows apply directly to ML systems, often with stricter failure modes: +The recommended `minimal --with capability:machine-learning` install keeps the core agent surface available alongside this skill. For skill-only or agent-limited harnesses, pair `skill:mle-workflow` with `agent:mle-reviewer` where the target supports agents. + | SWE surface | MLE use | |-------------|---------| | `product-capability` / `architecture-decision-records` | Turn model work into explicit product contracts and record irreversible data, model, and rollout choices | diff --git a/tests/lib/install-manifests.test.js b/tests/lib/install-manifests.test.js index 5c0cf6ba..35fd35e0 100644 --- a/tests/lib/install-manifests.test.js +++ b/tests/lib/install-manifests.test.js @@ -301,6 +301,43 @@ function runTests() { )), 'Should install the MLE workflow skill'); })) passed++; else failed++; + if (test('resolves machine-learning component on JoyCode and Qwen targets', () => { + for (const target of ['joycode', 'qwen']) { + const plan = resolveInstallPlan({ + includeComponentIds: ['capability:machine-learning'], + target, + projectRoot: '/workspace/ml-app', + homeDir: '/Users/example', + }); + + assert.ok(plan.selectedModuleIds.includes('machine-learning'), + `Should include machine-learning module for ${target}`); + assert.ok(!plan.skippedModuleIds.includes('machine-learning'), + `Should not skip machine-learning module for ${target}`); + assert.ok(plan.operations.some(operation => ( + operation.sourceRelativePath === 'skills/mle-workflow' + )), `Should install the MLE workflow skill for ${target}`); + } + })) passed++; else failed++; + + if (test('minimal machine-learning install includes MLE reviewer agent surface', () => { + const plan = resolveInstallPlan({ + profileId: 'minimal', + includeComponentIds: ['capability:machine-learning'], + target: 'claude', + projectRoot: '/workspace/ml-app', + }); + + assert.ok(plan.selectedModuleIds.includes('agents-core'), + 'Minimal install should keep the agent surface available'); + assert.ok(plan.operations.some(operation => ( + operation.sourceRelativePath === 'agents' + )), 'Should install the agent directory that contains mle-reviewer.md'); + assert.ok(plan.operations.some(operation => ( + operation.sourceRelativePath === 'skills/mle-workflow' + )), 'Should install the MLE workflow skill'); + })) passed++; else failed++; + if (test('resolves explicit modules with dependency expansion', () => { const plan = resolveInstallPlan({ moduleIds: ['security'] }); assert.ok(plan.selectedModuleIds.includes('security'), 'Should include requested module'); diff --git a/tests/scripts/consult.test.js b/tests/scripts/consult.test.js index 6523c506..4520de32 100644 --- a/tests/scripts/consult.test.js +++ b/tests/scripts/consult.test.js @@ -22,6 +22,14 @@ function parseJson(stdout) { return JSON.parse(stdout.trim()); } +function findMatch(payload, componentId) { + return payload.matches.find(match => match.componentId === componentId); +} + +function findMatchIndex(payload, componentId) { + return payload.matches.findIndex(match => match.componentId === componentId); +} + function test(name, fn) { try { fn(); @@ -88,9 +96,13 @@ function runTests() { assert.strictEqual(result.status, 0, result.stderr); const payload = parseJson(result.stdout); - assert.strictEqual(payload.matches[0].componentId, 'capability:machine-learning'); - assert.ok(payload.matches[0].installCommand.includes('--with capability:machine-learning')); - assert.ok(payload.matches.some(match => match.componentId === 'agent:mle-reviewer')); + const capabilityIndex = findMatchIndex(payload, 'capability:machine-learning'); + const reviewerIndex = findMatchIndex(payload, 'agent:mle-reviewer'); + assert.ok(capabilityIndex >= 0, 'Should include capability:machine-learning'); + assert.ok(reviewerIndex >= 0, 'Should include agent:mle-reviewer'); + assert.ok(capabilityIndex < reviewerIndex, + 'The workflow capability should rank ahead of the reviewer agent for broad MLE setup queries'); + assert.ok(findMatch(payload, 'capability:machine-learning').installCommand.includes('--with capability:machine-learning')); assert.ok(!payload.profiles.some(profile => profile.id === 'mle')); })) passed++; else failed++; @@ -99,10 +111,41 @@ function runTests() { assert.strictEqual(result.status, 0, result.stderr); const payload = parseJson(result.stdout); - const reviewer = payload.matches.find(match => match.componentId === 'agent:mle-reviewer'); + const capabilityIndex = findMatchIndex(payload, 'capability:machine-learning'); + const securityIndex = findMatchIndex(payload, 'capability:security'); + const reviewerIndex = findMatchIndex(payload, 'agent:mle-reviewer'); + const codeReviewerIndex = findMatchIndex(payload, 'agent:code-reviewer'); + const reviewer = findMatch(payload, 'agent:mle-reviewer'); assert.ok(reviewer, 'Should include agent:mle-reviewer'); assert.ok(reviewer.reasons.includes('matched "model"')); assert.ok(!reviewer.reasons.includes('matched "review"')); + assert.ok(!reviewer.reasons.includes('fuzzy matched "review"')); + assert.ok(capabilityIndex >= 0, 'Should include capability:machine-learning'); + assert.ok(securityIndex < 0 || capabilityIndex < securityIndex, + 'Model review queries should prefer the MLE capability over generic security review'); + assert.ok(codeReviewerIndex < 0 || reviewerIndex < codeReviewerIndex, + 'Model review queries should prefer the MLE reviewer over generic code review'); + })) passed++; else failed++; + + if (test('surfaces MLE reviewer for PyTorch model review queries', () => { + const result = run(['pytorch', 'model', 'review', '--json']); + + assert.strictEqual(result.status, 0, result.stderr); + const payload = parseJson(result.stdout); + const reviewer = findMatch(payload, 'agent:mle-reviewer'); + assert.ok(findMatch(payload, 'capability:machine-learning'), 'Should include capability:machine-learning'); + assert.ok(reviewer, 'Should include agent:mle-reviewer'); + assert.ok(reviewer.reasons.includes('matched "pytorch"')); + })) passed++; else failed++; + + if (test('does not route generic review queries to MLE components', () => { + const result = run(['review', '--json']); + + assert.strictEqual(result.status, 0, result.stderr); + const payload = parseJson(result.stdout); + assert.ok(!findMatch(payload, 'capability:machine-learning')); + assert.ok(!findMatch(payload, 'agent:mle-reviewer')); + assert.ok(!payload.profiles.some(profile => profile.id === 'mle')); })) passed++; else failed++; if (test('works from outside the ECC repository', () => {