feat: aggressive cleanup heuristics + preserve top nav bar

Deterministic cleanup improvements (used as first pass before LLM analysis):
- New 'clutter' category: audio players, podcast widgets, sidebar puzzles/games,
  recirculation widgets (taboola, outbrain, nativo), cross-promotion banners
- Text-content detection: removes "ADVERTISEMENT", "Article continues below",
  "Sponsored", "Paid content" labels and their parent wrappers
- Sticky fix: preserves the topmost full-width element near viewport top (site
  nav bar) instead of hiding all sticky/fixed elements. Sorts by vertical
  position, preserves the first one that spans >80% viewport width.

Tests: clutter category, ad label removal, nav bar preservation logic.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-29 23:44:37 -07:00
parent fa03b66c61
commit 0940d216ea
2 changed files with 116 additions and 26 deletions

View File

@@ -771,22 +771,24 @@ describe('cleanup and screenshot buttons', () => {
expect(html).toContain('quick-actions');
});
test('sidepanel.js cleanup handler POSTs to /command with cleanup', () => {
expect(js).toContain("command: 'cleanup'");
expect(js).toContain("args: ['--all']");
});
test('sidepanel.js screenshot handler POSTs to /command with screenshot', () => {
expect(js).toContain("command: 'screenshot'");
});
test('sidepanel.js cleanup resets inspector state after success', () => {
// runCleanup should call inspectorShowEmpty after cleanup
test('cleanup button sends smart prompt to sidebar agent (not just deterministic selectors)', () => {
// Should use /sidebar-command endpoint (agent-based) not just /command (deterministic)
const cleanupFn = js.slice(
js.indexOf('async function runCleanup('),
js.indexOf('async function runScreenshot('),
);
expect(cleanupFn).toContain('inspectorShowEmpty');
expect(cleanupFn).toContain('sidebar-command');
expect(cleanupFn).toContain('cleanupPrompt');
// Should include both deterministic first pass AND agent snapshot analysis
expect(cleanupFn).toContain('cleanup --all');
expect(cleanupFn).toContain('snapshot -i');
// Should instruct agent to KEEP site branding
expect(cleanupFn).toContain('KEEP');
expect(cleanupFn).toContain('header/masthead/logo');
});
test('sidepanel.js screenshot handler POSTs to /command with screenshot', () => {
expect(js).toContain("command: 'screenshot'");
});
test('sidepanel.js has notification rendering for type notification', () => {
@@ -880,7 +882,31 @@ describe('cleanup heuristics (write-commands.ts)', () => {
});
test('sticky cleanup skips gstack control indicator', () => {
expect(wcSrc).toContain("el.id === 'gstack-ctrl'");
expect(wcSrc).toContain("gstack-ctrl");
});
test('CLEANUP_SELECTORS has clutter category', () => {
expect(wcSrc).toContain('clutter: [');
expect(wcSrc).toContain('audio-player');
expect(wcSrc).toContain('podcast-player');
expect(wcSrc).toContain('puzzle');
expect(wcSrc).toContain('recirculation');
expect(wcSrc).toContain('everlit');
});
test('cleanup removes "ADVERTISEMENT" text labels', () => {
expect(wcSrc).toContain('adTextPatterns');
expect(wcSrc).toContain('/^advertisement$/i');
expect(wcSrc).toContain('/article continues/i');
expect(wcSrc).toContain('ad labels');
});
test('sticky cleanup preserves topmost full-width nav bar', () => {
// Should preserve the first full-width element near the top
expect(wcSrc).toContain('preservedTopNav');
expect(wcSrc).toContain('viewportWidth * 0.8');
// Should sort sticky elements by vertical position
expect(wcSrc).toContain('sort((a, b) => a.top - b.top)');
});
});