mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-19 19:02:29 +08:00
feat: hidden element stripping for scoped token text extraction
Detects CSS-hidden elements (opacity, font-size, off-screen, same-color, clip-path) and ARIA label injection patterns. Marks elements with data-gstack-hidden, extracts text from a clean clone (no DOM mutation), then removes markers. Only active for scoped tokens on text command. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { randomBytes } from 'crypto';
|
import { randomBytes } from 'crypto';
|
||||||
|
import type { Page, Frame } from 'playwright';
|
||||||
|
|
||||||
// ─── Datamarking (Layer 1) ──────────────────────────────────────
|
// ─── Datamarking (Layer 1) ──────────────────────────────────────
|
||||||
|
|
||||||
@@ -56,6 +57,139 @@ export function datamarkContent(content: string): string {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── Hidden Element Stripping (Layer 2) ─────────────────────────
|
||||||
|
|
||||||
|
/** Injection-like patterns in ARIA labels */
|
||||||
|
const ARIA_INJECTION_PATTERNS = [
|
||||||
|
/ignore\s+(previous|above|all)\s+instructions?/i,
|
||||||
|
/you\s+are\s+(now|a)\s+/i,
|
||||||
|
/system\s*:\s*/i,
|
||||||
|
/\bdo\s+not\s+(follow|obey|listen)/i,
|
||||||
|
/\bexecute\s+(the\s+)?following/i,
|
||||||
|
/\bforget\s+(everything|all|your)/i,
|
||||||
|
/\bnew\s+instructions?\s*:/i,
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect hidden elements and ARIA injection on a page.
|
||||||
|
* Marks hidden elements with data-gstack-hidden attribute.
|
||||||
|
* Returns descriptions of what was found for logging.
|
||||||
|
*
|
||||||
|
* Detection criteria:
|
||||||
|
* - opacity < 0.1
|
||||||
|
* - font-size < 1px
|
||||||
|
* - off-screen (positioned far outside viewport)
|
||||||
|
* - visibility:hidden or display:none with text content
|
||||||
|
* - same foreground/background color
|
||||||
|
* - clip/clip-path hiding
|
||||||
|
* - ARIA labels with injection patterns
|
||||||
|
*/
|
||||||
|
export async function markHiddenElements(page: Page | Frame): Promise<string[]> {
|
||||||
|
return await page.evaluate((ariaPatterns: string[]) => {
|
||||||
|
const found: string[] = [];
|
||||||
|
const elements = document.querySelectorAll('body *');
|
||||||
|
|
||||||
|
for (const el of elements) {
|
||||||
|
if (el instanceof HTMLElement) {
|
||||||
|
const style = window.getComputedStyle(el);
|
||||||
|
const text = el.textContent?.trim() || '';
|
||||||
|
if (!text) continue; // skip empty elements
|
||||||
|
|
||||||
|
let isHidden = false;
|
||||||
|
let reason = '';
|
||||||
|
|
||||||
|
// Check opacity
|
||||||
|
if (parseFloat(style.opacity) < 0.1) {
|
||||||
|
isHidden = true;
|
||||||
|
reason = 'opacity < 0.1';
|
||||||
|
}
|
||||||
|
// Check font-size
|
||||||
|
else if (parseFloat(style.fontSize) < 1) {
|
||||||
|
isHidden = true;
|
||||||
|
reason = 'font-size < 1px';
|
||||||
|
}
|
||||||
|
// Check off-screen positioning
|
||||||
|
else if (style.position === 'absolute' || style.position === 'fixed') {
|
||||||
|
const rect = el.getBoundingClientRect();
|
||||||
|
if (rect.right < -100 || rect.bottom < -100 || rect.left > window.innerWidth + 100 || rect.top > window.innerHeight + 100) {
|
||||||
|
isHidden = true;
|
||||||
|
reason = 'off-screen';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check same fg/bg color (text hiding)
|
||||||
|
else if (style.color === style.backgroundColor && text.length > 10) {
|
||||||
|
isHidden = true;
|
||||||
|
reason = 'same fg/bg color';
|
||||||
|
}
|
||||||
|
// Check clip-path hiding
|
||||||
|
else if (style.clipPath === 'inset(100%)' || style.clip === 'rect(0px, 0px, 0px, 0px)') {
|
||||||
|
isHidden = true;
|
||||||
|
reason = 'clip hiding';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isHidden) {
|
||||||
|
el.setAttribute('data-gstack-hidden', 'true');
|
||||||
|
found.push(`[${el.tagName.toLowerCase()}] ${reason}: "${text.slice(0, 60)}..."`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check ARIA labels for injection patterns
|
||||||
|
const ariaLabel = el.getAttribute('aria-label') || '';
|
||||||
|
const ariaLabelledBy = el.getAttribute('aria-labelledby');
|
||||||
|
let labelText = ariaLabel;
|
||||||
|
if (ariaLabelledBy) {
|
||||||
|
const labelEl = document.getElementById(ariaLabelledBy);
|
||||||
|
if (labelEl) labelText += ' ' + (labelEl.textContent || '');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (labelText) {
|
||||||
|
for (const pattern of ariaPatterns) {
|
||||||
|
if (new RegExp(pattern).test(labelText)) {
|
||||||
|
el.setAttribute('data-gstack-hidden', 'true');
|
||||||
|
found.push(`[${el.tagName.toLowerCase()}] ARIA injection: "${labelText.slice(0, 60)}..."`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return found;
|
||||||
|
}, ARIA_INJECTION_PATTERNS.map(p => p.source));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get clean text with hidden elements stripped (for `text` command).
|
||||||
|
* Uses clone + remove approach: clones body, removes marked elements, returns innerText.
|
||||||
|
*/
|
||||||
|
export async function getCleanTextWithStripping(page: Page | Frame): Promise<string> {
|
||||||
|
return await page.evaluate(() => {
|
||||||
|
const body = document.body;
|
||||||
|
if (!body) return '';
|
||||||
|
const clone = body.cloneNode(true) as HTMLElement;
|
||||||
|
// Remove standard noise elements
|
||||||
|
clone.querySelectorAll('script, style, noscript, svg').forEach(el => el.remove());
|
||||||
|
// Remove hidden-marked elements
|
||||||
|
clone.querySelectorAll('[data-gstack-hidden]').forEach(el => el.remove());
|
||||||
|
return clone.innerText
|
||||||
|
.split('\n')
|
||||||
|
.map(line => line.trim())
|
||||||
|
.filter(line => line.length > 0)
|
||||||
|
.join('\n');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clean up data-gstack-hidden attributes from the page.
|
||||||
|
* Should be called after extraction is complete.
|
||||||
|
*/
|
||||||
|
export async function cleanupHiddenMarkers(page: Page | Frame): Promise<void> {
|
||||||
|
await page.evaluate(() => {
|
||||||
|
document.querySelectorAll('[data-gstack-hidden]').forEach(el => {
|
||||||
|
el.removeAttribute('data-gstack-hidden');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// ─── Content Envelope (wrapping) ────────────────────────────────
|
// ─── Content Envelope (wrapping) ────────────────────────────────
|
||||||
|
|
||||||
const ENVELOPE_BEGIN = '═══ BEGIN UNTRUSTED WEB CONTENT ═══';
|
const ENVELOPE_BEGIN = '═══ BEGIN UNTRUSTED WEB CONTENT ═══';
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ import { COMMAND_DESCRIPTIONS, PAGE_CONTENT_COMMANDS, wrapUntrustedContent } fro
|
|||||||
import {
|
import {
|
||||||
wrapUntrustedPageContent, datamarkContent,
|
wrapUntrustedPageContent, datamarkContent,
|
||||||
runContentFilters, type ContentFilterResult,
|
runContentFilters, type ContentFilterResult,
|
||||||
|
markHiddenElements, getCleanTextWithStripping, cleanupHiddenMarkers,
|
||||||
} from './content-security';
|
} from './content-security';
|
||||||
import { handleSnapshot, SNAPSHOT_FLAGS } from './snapshot';
|
import { handleSnapshot, SNAPSHOT_FLAGS } from './snapshot';
|
||||||
import {
|
import {
|
||||||
@@ -957,7 +958,23 @@ async function handleCommandInternal(
|
|||||||
let result: string;
|
let result: string;
|
||||||
|
|
||||||
if (READ_COMMANDS.has(command)) {
|
if (READ_COMMANDS.has(command)) {
|
||||||
result = await handleReadCommand(command, args, browserManager);
|
const isScoped = tokenInfo && tokenInfo.clientId !== 'root';
|
||||||
|
// Hidden element stripping for scoped tokens on text command
|
||||||
|
if (isScoped && command === 'text') {
|
||||||
|
const page = browserManager.getPage();
|
||||||
|
const strippedDescs = await markHiddenElements(page);
|
||||||
|
if (strippedDescs.length > 0) {
|
||||||
|
console.warn(`[browse] Content security: stripped ${strippedDescs.length} hidden elements for ${tokenInfo.clientId}`);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const target = browserManager.getActiveFrameOrPage();
|
||||||
|
result = await getCleanTextWithStripping(target);
|
||||||
|
} finally {
|
||||||
|
await cleanupHiddenMarkers(page);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result = await handleReadCommand(command, args, browserManager);
|
||||||
|
}
|
||||||
} else if (WRITE_COMMANDS.has(command)) {
|
} else if (WRITE_COMMANDS.has(command)) {
|
||||||
result = await handleWriteCommand(command, args, browserManager);
|
result = await handleWriteCommand(command, args, browserManager);
|
||||||
} else if (META_COMMANDS.has(command)) {
|
} else if (META_COMMANDS.has(command)) {
|
||||||
|
|||||||
Reference in New Issue
Block a user