mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-20 19:29:56 +08:00
test: agent conciseness, focus stealing, opus model, switchTab opts
Tests for the three UX fixes: - System prompt contains STOP/CONCISE/Do NOT keep exploring - sidebar agent uses opus (not sonnet) for prompt injection resistance - switchTab has bringToFront option, defaults to true (opt-out) - handleCommand tab pinning uses bringToFront: false (no focus steal) - Updated stale tests: switchTab signature, allowedTools excludes Write, narration -> conciseness, tab pinning restore calls Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -513,17 +513,17 @@ describe('BROWSE_TAB tab pinning (cross-tab isolation)', () => {
|
|||||||
expect(handleFn).toContain('tabId');
|
expect(handleFn).toContain('tabId');
|
||||||
// Should save and restore the active tab
|
// Should save and restore the active tab
|
||||||
expect(handleFn).toContain('savedTabId');
|
expect(handleFn).toContain('savedTabId');
|
||||||
expect(handleFn).toContain('browserManager.switchTab(tabId)');
|
expect(handleFn).toContain('switchTab(tabId');
|
||||||
});
|
});
|
||||||
|
|
||||||
test('handleCommand restores active tab after command (success path)', () => {
|
test('handleCommand restores active tab after command (success path)', () => {
|
||||||
// On success, should restore savedTabId
|
// On success, should restore savedTabId without stealing focus
|
||||||
const handleFn = serverSrc.slice(
|
const handleFn = serverSrc.slice(
|
||||||
serverSrc.indexOf('async function handleCommand('),
|
serverSrc.indexOf('async function handleCommand('),
|
||||||
serverSrc.length,
|
serverSrc.length,
|
||||||
);
|
);
|
||||||
// Count restore calls — should appear in both success and error paths
|
// Count restore calls — should appear in both success and error paths
|
||||||
const restoreCount = (handleFn.match(/browserManager\.switchTab\(savedTabId\)/g) || []).length;
|
const restoreCount = (handleFn.match(/switchTab\(savedTabId/g) || []).length;
|
||||||
expect(restoreCount).toBeGreaterThanOrEqual(2); // success + error paths
|
expect(restoreCount).toBeGreaterThanOrEqual(2); // success + error paths
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -532,7 +532,7 @@ describe('BROWSE_TAB tab pinning (cross-tab isolation)', () => {
|
|||||||
const catchBlock = serverSrc.slice(
|
const catchBlock = serverSrc.slice(
|
||||||
serverSrc.indexOf('} catch (err: any) {', serverSrc.indexOf('async function handleCommand(')),
|
serverSrc.indexOf('} catch (err: any) {', serverSrc.indexOf('async function handleCommand(')),
|
||||||
);
|
);
|
||||||
expect(catchBlock).toContain('switchTab(savedTabId)');
|
expect(catchBlock).toContain('switchTab(savedTabId');
|
||||||
});
|
});
|
||||||
|
|
||||||
test('tab pinning only activates when tabId is provided', () => {
|
test('tab pinning only activates when tabId is provided', () => {
|
||||||
|
|||||||
@@ -41,13 +41,13 @@ describe('sidebar system prompt (server.ts)', () => {
|
|||||||
expect(promptSection).toContain('url`');
|
expect(promptSection).toContain('url`');
|
||||||
});
|
});
|
||||||
|
|
||||||
test('system prompt includes narration instructions', () => {
|
test('system prompt includes conciseness and stop instructions', () => {
|
||||||
const promptSection = serverSrc.slice(
|
const promptSection = serverSrc.slice(
|
||||||
serverSrc.indexOf('const systemPrompt = ['),
|
serverSrc.indexOf('const systemPrompt = ['),
|
||||||
serverSrc.indexOf("].join('\\n');", serverSrc.indexOf('const systemPrompt = [')) + 15,
|
serverSrc.indexOf("].join('\\n');", serverSrc.indexOf('const systemPrompt = [')) + 15,
|
||||||
);
|
);
|
||||||
expect(promptSection).toContain('Narrate');
|
expect(promptSection).toContain('CONCISE');
|
||||||
expect(promptSection).toContain('plain English');
|
expect(promptSection).toContain('STOP');
|
||||||
});
|
});
|
||||||
|
|
||||||
test('--resume is never used in spawnClaude args', () => {
|
test('--resume is never used in spawnClaude args', () => {
|
||||||
@@ -385,12 +385,11 @@ describe('browser tab bar (sidepanel.html)', () => {
|
|||||||
describe('sidebar→browser tab switch', () => {
|
describe('sidebar→browser tab switch', () => {
|
||||||
const bmSrc = fs.readFileSync(path.join(ROOT, 'src', 'browser-manager.ts'), 'utf-8');
|
const bmSrc = fs.readFileSync(path.join(ROOT, 'src', 'browser-manager.ts'), 'utf-8');
|
||||||
|
|
||||||
test('switchTab calls bringToFront so browser visually switches', () => {
|
test('switchTab supports bringToFront option', () => {
|
||||||
const switchFn = bmSrc.slice(
|
expect(bmSrc).toContain('switchTab(id: number, opts?');
|
||||||
bmSrc.indexOf('switchTab(id: number)'),
|
expect(bmSrc).toContain('bringToFront');
|
||||||
bmSrc.indexOf('switchTab(id: number)') + 400,
|
// Default behavior still brings to front (opt-out, not opt-in)
|
||||||
);
|
expect(bmSrc).toContain('bringToFront !== false');
|
||||||
expect(switchFn).toContain('bringToFront');
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -974,6 +973,48 @@ describe('chat message dedup (prevents repeat rendering)', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ─── Agent conciseness and focus stealing ───────────────────────
|
||||||
|
|
||||||
|
describe('sidebar agent conciseness + no focus stealing', () => {
|
||||||
|
const serverSrc = fs.readFileSync(path.join(ROOT, 'src', 'server.ts'), 'utf-8');
|
||||||
|
const bmSrc = fs.readFileSync(path.join(ROOT, 'src', 'browser-manager.ts'), 'utf-8');
|
||||||
|
|
||||||
|
test('system prompt tells agent to STOP when task is done', () => {
|
||||||
|
const promptSection = serverSrc.slice(
|
||||||
|
serverSrc.indexOf('const systemPrompt = ['),
|
||||||
|
serverSrc.indexOf("].join('\\n');", serverSrc.indexOf('const systemPrompt = [')),
|
||||||
|
);
|
||||||
|
expect(promptSection).toContain('STOP');
|
||||||
|
expect(promptSection).toContain('CONCISE');
|
||||||
|
expect(promptSection).toContain('Do NOT keep exploring');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('sidebar agent uses opus (not sonnet) for prompt injection resistance', () => {
|
||||||
|
const spawnFn = serverSrc.slice(
|
||||||
|
serverSrc.indexOf('function spawnClaude('),
|
||||||
|
serverSrc.indexOf('\nfunction ', serverSrc.indexOf('function spawnClaude(') + 1),
|
||||||
|
);
|
||||||
|
expect(spawnFn).toContain("'opus'");
|
||||||
|
});
|
||||||
|
|
||||||
|
test('switchTab has bringToFront option', () => {
|
||||||
|
expect(bmSrc).toContain('bringToFront?: boolean');
|
||||||
|
expect(bmSrc).toContain('bringToFront !== false');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handleCommand tab pinning does NOT steal focus', () => {
|
||||||
|
// All switchTab calls in handleCommand should use bringToFront: false
|
||||||
|
const handleFn = serverSrc.slice(
|
||||||
|
serverSrc.indexOf('async function handleCommand('),
|
||||||
|
serverSrc.indexOf('\n// ', serverSrc.indexOf('async function handleCommand(') + 200),
|
||||||
|
);
|
||||||
|
const switchCalls = handleFn.match(/switchTab\([^)]+\)/g) || [];
|
||||||
|
for (const call of switchCalls) {
|
||||||
|
expect(call).toContain('bringToFront: false');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// ─── LLM-based cleanup architecture ─────────────────────────────
|
// ─── LLM-based cleanup architecture ─────────────────────────────
|
||||||
|
|
||||||
describe('LLM-based cleanup (smart agent cleanup)', () => {
|
describe('LLM-based cleanup (smart agent cleanup)', () => {
|
||||||
|
|||||||
@@ -1559,12 +1559,13 @@ describe('sidebar agent (#584)', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// #584 — Server Write: server.ts allowedTools includes Write (DRY parity)
|
// #584 — Server Write: server.ts allowedTools includes Write (DRY parity)
|
||||||
test('server.ts allowedTools includes Write', () => {
|
test('server.ts allowedTools excludes Write (agent is read-only + Bash)', () => {
|
||||||
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'server.ts'), 'utf-8');
|
const content = fs.readFileSync(path.join(ROOT, 'browse', 'src', 'server.ts'), 'utf-8');
|
||||||
// Find the sidebar allowedTools in the headed-mode path
|
// Find the sidebar allowedTools in the headed-mode path
|
||||||
const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
|
const match = content.match(/--allowedTools['"]\s*,\s*['"]([^'"]+)['"]/);
|
||||||
expect(match).not.toBeNull();
|
expect(match).not.toBeNull();
|
||||||
expect(match![1]).toContain('Write');
|
expect(match![1]).toContain('Bash');
|
||||||
|
expect(match![1]).not.toContain('Write');
|
||||||
});
|
});
|
||||||
|
|
||||||
// #584 — Sidebar stderr: stderr handler is not empty
|
// #584 — Sidebar stderr: stderr handler is not empty
|
||||||
|
|||||||
Reference in New Issue
Block a user