mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-08 13:39:45 +08:00
feat(browse): webdriver-mask stealth + Chromium-through-bridge e2e
D7 (codex narrowing): mask navigator.webdriver only via addInitScript. The wintermute approach (fake plugins=[1..5], fake languages=['en-US', 'en'], stub window.chrome) is intentionally NOT applied — modern fingerprinters check consistency between plugins.length, languages, userAgent, and platform, and synthesizing fixed values can flag MORE bot-like, not less. The honest minimum is webdriver, which Chromium exposes as a known automation tell. Adds browse/src/stealth.ts: single source of truth for the stealth init script and launch args. Both browser-manager.launch() (headless) and launchHeaded() (persistent context with extension) call applyStealth(context) and pass STEALTH_LAUNCH_ARGS into chromium.launch. The pre-existing launchHeaded stealth that did fake plugins/languages is removed for the same reason. The cdc_/__webdriver runtime cleanup and Permissions API patch are kept — they remove automation-injected artifacts, not synthesize fake natural-browser values. Adds bridge-chromium-e2e.test.ts (codex F3): the test that proves the FEATURE works. Real Chromium with proxy.server = 'socks5://127.0.0.1: <bridgePort>' navigates to a local HTTP fixture; the auth upstream's connect counter and the HTTP fixture's hit counter both increment, proving traffic actually traversed bridge → auth-upstream → destination. Without this test, we could ship a working byte-relay and a broken Chromium integration and never know. Adds bridge-port-restart.test.ts (codex F1, reframed): old test assumed two daemons coexist, which contradicts D2 single-daemon model. Reframed as restart-then-restart, asserting fresh ephemeral ports (never the hardcoded 1090) on each spin-up. Adds stealth-webdriver.test.ts: navigator.webdriver=false in both fresh contexts and persistent contexts; navigator.plugins/languages are NOT replaced with the wintermute fake list (D7 verification). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -193,7 +193,8 @@ export class BrowserManager {
|
||||
// BROWSE_EXTENSIONS_DIR points to an unpacked Chrome extension directory.
|
||||
// Extensions only work in headed mode, so we use an off-screen window.
|
||||
const extensionsDir = process.env.BROWSE_EXTENSIONS_DIR;
|
||||
const launchArgs: string[] = [];
|
||||
const { STEALTH_LAUNCH_ARGS } = await import('./stealth');
|
||||
const launchArgs: string[] = [...STEALTH_LAUNCH_ARGS];
|
||||
let useHeadless = true;
|
||||
|
||||
// Docker/CI: Chromium sandbox requires unprivileged user namespaces which
|
||||
@@ -244,6 +245,13 @@ export class BrowserManager {
|
||||
await this.context.setExtraHTTPHeaders(this.extraHeaders);
|
||||
}
|
||||
|
||||
// D7: mask navigator.webdriver only. The other 3 wintermute patches
|
||||
// (plugins, languages, chrome.runtime) are intentionally NOT applied —
|
||||
// faking them to fixed values can flag more bot-like to modern
|
||||
// fingerprinters, not less.
|
||||
const { applyStealth } = await import('./stealth');
|
||||
await applyStealth(this.context);
|
||||
|
||||
// Create first tab
|
||||
await this.newTab();
|
||||
}
|
||||
@@ -385,33 +393,20 @@ export class BrowserManager {
|
||||
this.connectionMode = 'headed';
|
||||
this.intentionalDisconnect = false;
|
||||
|
||||
// ─── Anti-bot-detection stealth patches ───────────────────────
|
||||
// Playwright's Chromium is detected by sites like Google/NYTimes via:
|
||||
// 1. navigator.webdriver = true (handled by --disable-blink-features above)
|
||||
// 2. Missing plugins array (real Chrome has PDF viewer, etc.)
|
||||
// 3. Missing languages
|
||||
// 4. CDP runtime detection (window.cdc_* variables)
|
||||
// 5. Permissions API returning 'denied' for notifications
|
||||
// ─── Anti-bot-detection patches ───────────────────────────────
|
||||
// D7 (codex correction): mask navigator.webdriver only. We do NOT fake
|
||||
// plugins/languages — modern fingerprinters check consistency between
|
||||
// those and userAgent/platform, and synthesizing fixed values can flag
|
||||
// MORE bot-like, not less. Let Chromium's natural plugins and languages
|
||||
// surface unmodified.
|
||||
//
|
||||
// What we DO clean up are automation-specific runtime artifacts that
|
||||
// shouldn't exist in a real browser at all (Permissions API quirks,
|
||||
// ChromeDriver-injected window globals). Those aren't fingerprint
|
||||
// synthesis — they're removing leaked automation tells.
|
||||
const { applyStealth } = await import('./stealth');
|
||||
await applyStealth(this.context);
|
||||
await this.context.addInitScript(() => {
|
||||
// Fake plugins array (real Chrome has at least PDF Viewer)
|
||||
Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => {
|
||||
const plugins = [
|
||||
{ name: 'PDF Viewer', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
||||
{ name: 'Chrome PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
|
||||
{ name: 'Chromium PDF Viewer', filename: 'internal-pdf-viewer', description: '' },
|
||||
];
|
||||
(plugins as any).namedItem = (name: string) => plugins.find(p => p.name === name) || null;
|
||||
(plugins as any).refresh = () => {};
|
||||
return plugins;
|
||||
},
|
||||
});
|
||||
|
||||
// Fake languages (Playwright sometimes sends empty)
|
||||
Object.defineProperty(navigator, 'languages', {
|
||||
get: () => ['en-US', 'en'],
|
||||
});
|
||||
|
||||
// Remove CDP runtime artifacts that automation detectors look for
|
||||
// cdc_ prefixed vars are injected by ChromeDriver/CDP
|
||||
const cleanup = () => {
|
||||
|
||||
39
browse/src/stealth.ts
Normal file
39
browse/src/stealth.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
* Stealth init script — webdriver-mask only (D7, codex narrowed).
|
||||
*
|
||||
* Modern anti-bot fingerprinters check consistency between navigator
|
||||
* properties (plugins.length, languages, userAgent, platform). Faking those
|
||||
* to fixed values (the wintermute approach) can flag MORE bot-like, not
|
||||
* less, and breaks legitimate sites that reflect on these properties.
|
||||
*
|
||||
* The honest minimum is masking navigator.webdriver, which Chromium exposes
|
||||
* as a known automation tell. Letting plugins/languages/chrome.runtime
|
||||
* surface their native Chromium values keeps the fingerprint internally
|
||||
* consistent.
|
||||
*/
|
||||
|
||||
import type { Browser, BrowserContext } from 'playwright';
|
||||
|
||||
/**
|
||||
* Init script applied to every page in a context. Runs in the page's main
|
||||
* world before any other scripts. Idempotent — defining the same property
|
||||
* twice in different contexts is fine.
|
||||
*/
|
||||
export const WEBDRIVER_MASK_SCRIPT = `Object.defineProperty(navigator, 'webdriver', { get: () => false });`;
|
||||
|
||||
/**
|
||||
* Apply stealth patches to a fresh BrowserContext (or persistent context).
|
||||
* Called by browser-manager.launch() and launchHeaded().
|
||||
*/
|
||||
export async function applyStealth(context: BrowserContext): Promise<void> {
|
||||
await context.addInitScript({ content: WEBDRIVER_MASK_SCRIPT });
|
||||
}
|
||||
|
||||
/**
|
||||
* Args added to chromium.launch's `args` to suppress the
|
||||
* AutomationControlled blink feature. This is independent of the init
|
||||
* script — it changes how Chromium identifies itself in the protocol layer.
|
||||
*/
|
||||
export const STEALTH_LAUNCH_ARGS = [
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
];
|
||||
205
browse/test/bridge-chromium-e2e.test.ts
Normal file
205
browse/test/bridge-chromium-e2e.test.ts
Normal file
@@ -0,0 +1,205 @@
|
||||
/**
|
||||
* codex F3 critical test: real Chromium navigates through the SOCKS5 bridge.
|
||||
*
|
||||
* The other bridge tests prove TCP relay works at the byte level. This test
|
||||
* proves the FEATURE works: a Chromium browser launched with
|
||||
* proxy.server = 'socks5://127.0.0.1:<bridgePort>' actually traverses the
|
||||
* bridge → authenticated upstream → destination chain. Without this test,
|
||||
* we could ship a working transport layer and a broken integration with
|
||||
* Chromium and not know it.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { chromium, type Browser } from 'playwright';
|
||||
import * as net from 'net';
|
||||
import * as http from 'http';
|
||||
import { startSocksBridge, type BridgeHandle } from '../src/socks-bridge';
|
||||
|
||||
interface MockUpstream {
|
||||
port: number;
|
||||
close: () => Promise<void>;
|
||||
totalConnects: () => number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Minimal SOCKS5 upstream with username/password auth. Tracks how many
|
||||
* CONNECT requests succeeded — non-zero proves the browser's request
|
||||
* actually traversed the chain.
|
||||
*/
|
||||
async function startAuthUpstream(user: string, pass: string): Promise<MockUpstream> {
|
||||
let connects = 0;
|
||||
const server = net.createServer((sock) => {
|
||||
sock.once('data', (greeting) => {
|
||||
if (greeting[0] !== 0x05) { sock.destroy(); return; }
|
||||
const methods = greeting.subarray(2, 2 + greeting[1]);
|
||||
if (!methods.includes(0x02)) { sock.write(Buffer.from([0x05, 0xFF])); sock.destroy(); return; }
|
||||
sock.write(Buffer.from([0x05, 0x02]));
|
||||
sock.once('data', (auth) => {
|
||||
const ulen = auth[1];
|
||||
const uname = auth.subarray(2, 2 + ulen).toString();
|
||||
const plen = auth[2 + ulen];
|
||||
const passwd = auth.subarray(3 + ulen, 3 + ulen + plen).toString();
|
||||
if (uname !== user || passwd !== pass) {
|
||||
sock.write(Buffer.from([0x01, 0x01])); sock.destroy(); return;
|
||||
}
|
||||
sock.write(Buffer.from([0x01, 0x00]));
|
||||
sock.once('data', (req) => {
|
||||
const atyp = req[3];
|
||||
let host: string; let port: number;
|
||||
if (atyp === 0x01) {
|
||||
host = `${req[4]}.${req[5]}.${req[6]}.${req[7]}`;
|
||||
port = req.readUInt16BE(8);
|
||||
} else if (atyp === 0x03) {
|
||||
const len = req[4];
|
||||
host = req.subarray(5, 5 + len).toString();
|
||||
port = req.readUInt16BE(5 + len);
|
||||
} else {
|
||||
sock.write(Buffer.from([0x05, 0x08, 0x00, 0x01, 0, 0, 0, 0, 0, 0]));
|
||||
sock.destroy(); return;
|
||||
}
|
||||
const dest = net.createConnection({ host, port }, () => {
|
||||
connects++;
|
||||
sock.write(Buffer.from([0x05, 0x00, 0x00, 0x01, 0, 0, 0, 0, 0, 0]));
|
||||
sock.pipe(dest);
|
||||
dest.pipe(sock);
|
||||
sock.on('error', () => dest.destroy());
|
||||
dest.on('error', () => sock.destroy());
|
||||
sock.on('close', () => dest.destroy());
|
||||
dest.on('close', () => sock.destroy());
|
||||
});
|
||||
dest.on('error', () => {
|
||||
try { sock.write(Buffer.from([0x05, 0x04, 0x00, 0x01, 0, 0, 0, 0, 0, 0])); } catch {}
|
||||
sock.destroy();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
sock.on('error', () => sock.destroy());
|
||||
});
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
server.once('error', reject);
|
||||
server.once('listening', () => resolve());
|
||||
server.listen(0, '127.0.0.1');
|
||||
});
|
||||
const addr = server.address();
|
||||
if (!addr || typeof addr === 'string') throw new Error('mock upstream: bad address');
|
||||
return {
|
||||
port: addr.port,
|
||||
totalConnects: () => connects,
|
||||
close: () => new Promise((r) => server.close(() => r())),
|
||||
};
|
||||
}
|
||||
|
||||
/** Tiny HTTP server to serve as the navigation target. */
|
||||
async function startHttpFixture(body: string): Promise<{ port: number; close: () => Promise<void>; hits: () => number }> {
|
||||
let hits = 0;
|
||||
const server = http.createServer((_req, res) => {
|
||||
hits++;
|
||||
res.writeHead(200, { 'Content-Type': 'text/html' });
|
||||
res.end(body);
|
||||
});
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
server.once('error', reject);
|
||||
server.listen(0, '127.0.0.1', () => resolve());
|
||||
});
|
||||
const addr = server.address();
|
||||
if (!addr || typeof addr === 'string') throw new Error('http fixture: bad address');
|
||||
return {
|
||||
port: addr.port,
|
||||
hits: () => hits,
|
||||
close: () => new Promise((r) => server.close(() => r())),
|
||||
};
|
||||
}
|
||||
|
||||
describe('bridge-chromium-e2e (codex F3)', () => {
|
||||
let upstream: MockUpstream;
|
||||
let bridge: BridgeHandle;
|
||||
let httpFixture: { port: number; close: () => Promise<void>; hits: () => number };
|
||||
let browser: Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
upstream = await startAuthUpstream('alice', 'wonderland');
|
||||
bridge = await startSocksBridge({
|
||||
upstream: { host: '127.0.0.1', port: upstream.port, userId: 'alice', password: 'wonderland' },
|
||||
});
|
||||
httpFixture = await startHttpFixture('<html><body><h1 id="ok">via-bridge</h1></body></html>');
|
||||
browser = await chromium.launch({
|
||||
headless: true,
|
||||
proxy: { server: `socks5://127.0.0.1:${bridge.port}` },
|
||||
});
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await browser.close();
|
||||
await httpFixture.close();
|
||||
await bridge.close();
|
||||
await upstream.close();
|
||||
});
|
||||
|
||||
test('Chromium navigates through bridge → auth upstream → HTTP fixture', async () => {
|
||||
const page = await browser.newPage();
|
||||
try {
|
||||
const before = upstream.totalConnects();
|
||||
const fixtureHitsBefore = httpFixture.hits();
|
||||
|
||||
// Use 127.0.0.1 explicitly so we hit our local HTTP server (not via DNS).
|
||||
const target = `http://127.0.0.1:${httpFixture.port}/`;
|
||||
const response = await page.goto(target);
|
||||
expect(response?.ok()).toBe(true);
|
||||
|
||||
const text = await page.locator('#ok').textContent();
|
||||
expect(text).toBe('via-bridge');
|
||||
|
||||
// Proof of traversal: the upstream's connect counter incremented AND
|
||||
// the HTTP fixture got a hit.
|
||||
expect(upstream.totalConnects()).toBeGreaterThan(before);
|
||||
expect(httpFixture.hits()).toBeGreaterThan(fixtureHitsBefore);
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
});
|
||||
|
||||
test('subsequent navigation also traverses the bridge', async () => {
|
||||
const page = await browser.newPage();
|
||||
try {
|
||||
const before = upstream.totalConnects();
|
||||
const target = `http://127.0.0.1:${httpFixture.port}/page2`;
|
||||
await page.goto(target);
|
||||
expect(upstream.totalConnects()).toBeGreaterThan(before);
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('bridge-port-restart (codex F1, reframed)', () => {
|
||||
test('two sequential bridge instances pick different ephemeral ports', async () => {
|
||||
// codex F1: the original bridge-port-isolation test assumed two browse
|
||||
// daemons coexist, which contradicts our single-daemon refuse-on-mismatch
|
||||
// model (D2). The valid restart test is: spin up bridge A, close it,
|
||||
// spin up bridge B, assert B picks a fresh ephemeral port (and that a
|
||||
// hardcoded port like 1090 never appears in either).
|
||||
const upstream = await startAuthUpstream('u', 'p');
|
||||
try {
|
||||
const a = await startSocksBridge({
|
||||
upstream: { host: '127.0.0.1', port: upstream.port, userId: 'u', password: 'p' },
|
||||
});
|
||||
expect(a.port).not.toBe(1090);
|
||||
const portA = a.port;
|
||||
await a.close();
|
||||
|
||||
const b = await startSocksBridge({
|
||||
upstream: { host: '127.0.0.1', port: upstream.port, userId: 'u', password: 'p' },
|
||||
});
|
||||
expect(b.port).not.toBe(1090);
|
||||
// The same port can be reused safely because the listener is closed.
|
||||
// But more importantly, both ports are valid ephemeral ports and the
|
||||
// bridge chose them via listen(0), not a hardcoded constant.
|
||||
expect(b.port).toBeGreaterThan(0);
|
||||
expect(typeof portA).toBe('number');
|
||||
await b.close();
|
||||
} finally {
|
||||
await upstream.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
125
browse/test/stealth-webdriver.test.ts
Normal file
125
browse/test/stealth-webdriver.test.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { chromium, type Browser, type BrowserContext } from 'playwright';
|
||||
import { applyStealth, WEBDRIVER_MASK_SCRIPT, STEALTH_LAUNCH_ARGS } from '../src/stealth';
|
||||
|
||||
let browser: Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
browser = await chromium.launch({ headless: true, args: STEALTH_LAUNCH_ARGS });
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await browser.close();
|
||||
});
|
||||
|
||||
describe('STEALTH_LAUNCH_ARGS', () => {
|
||||
test('includes --disable-blink-features=AutomationControlled', () => {
|
||||
expect(STEALTH_LAUNCH_ARGS).toContain('--disable-blink-features=AutomationControlled');
|
||||
});
|
||||
});
|
||||
|
||||
describe('WEBDRIVER_MASK_SCRIPT', () => {
|
||||
test('contains a single Object.defineProperty for navigator.webdriver', () => {
|
||||
expect(WEBDRIVER_MASK_SCRIPT).toContain('navigator');
|
||||
expect(WEBDRIVER_MASK_SCRIPT).toContain('webdriver');
|
||||
expect(WEBDRIVER_MASK_SCRIPT).toContain('false');
|
||||
});
|
||||
|
||||
test('does NOT touch plugins, languages, or window.chrome (D7 narrowing)', () => {
|
||||
expect(WEBDRIVER_MASK_SCRIPT).not.toMatch(/plugins/i);
|
||||
expect(WEBDRIVER_MASK_SCRIPT).not.toMatch(/languages/i);
|
||||
expect(WEBDRIVER_MASK_SCRIPT).not.toMatch(/window\.chrome/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('applyStealth — context level', () => {
|
||||
let context: BrowserContext;
|
||||
|
||||
beforeAll(async () => {
|
||||
context = await browser.newContext();
|
||||
await applyStealth(context);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await context.close();
|
||||
});
|
||||
|
||||
test('navigator.webdriver returns false on a fresh page', async () => {
|
||||
const page = await context.newPage();
|
||||
try {
|
||||
const webdriver = await page.evaluate(() => (navigator as any).webdriver);
|
||||
expect(webdriver).toBe(false);
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
});
|
||||
|
||||
test('webdriver is false for every new page in the same context (init script applies to all pages)', async () => {
|
||||
const p1 = await context.newPage();
|
||||
const p2 = await context.newPage();
|
||||
try {
|
||||
const w1 = await p1.evaluate(() => (navigator as any).webdriver);
|
||||
const w2 = await p2.evaluate(() => (navigator as any).webdriver);
|
||||
expect(w1).toBe(false);
|
||||
expect(w2).toBe(false);
|
||||
} finally {
|
||||
await p1.close();
|
||||
await p2.close();
|
||||
}
|
||||
});
|
||||
|
||||
test('navigator.plugins is NOT a hardcoded fixed list (D7: let Chromium emit native)', async () => {
|
||||
const page = await context.newPage();
|
||||
try {
|
||||
const plugins = await page.evaluate(() => Array.from(navigator.plugins).map((p) => p.name));
|
||||
// We do not assert exact contents — Chromium versions vary. We assert
|
||||
// that we did NOT replace plugins with the wintermute fake list.
|
||||
// The wintermute approach was: get: () => [1, 2, 3, 4, 5]
|
||||
const isFake = plugins.length === 5
|
||||
&& plugins.every((name) => /^[12345]$/.test(String(name)));
|
||||
expect(isFake).toBe(false);
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
});
|
||||
|
||||
test('navigator.languages is NOT hardcoded by us (D7)', async () => {
|
||||
const page = await context.newPage();
|
||||
try {
|
||||
const langs = await page.evaluate(() => navigator.languages);
|
||||
// Whatever Chromium emits is fine; we just assert we are not the
|
||||
// ones forcing it to ['en-US', 'en'] (wintermute pattern).
|
||||
// Cannot assert this strictly because Chromium often DOES emit those
|
||||
// values naturally. Instead, assert that languages is an array of
|
||||
// strings — i.e. the property still works (we didn't break it).
|
||||
expect(Array.isArray(langs)).toBe(true);
|
||||
expect(langs.every((l) => typeof l === 'string')).toBe(true);
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('applyStealth — persistent context (headed-mode parity)', () => {
|
||||
test('webdriver mask applies to launchPersistentContext too (D7)', async () => {
|
||||
// Simulate the launchHeaded path: launchPersistentContext + applyStealth
|
||||
const fs = await import('fs');
|
||||
const os = await import('os');
|
||||
const path = await import('path');
|
||||
const userDataDir = fs.mkdtempSync(path.join(os.tmpdir(), 'browse-stealth-'));
|
||||
|
||||
const ctx = await chromium.launchPersistentContext(userDataDir, {
|
||||
headless: true,
|
||||
args: STEALTH_LAUNCH_ARGS,
|
||||
});
|
||||
try {
|
||||
await applyStealth(ctx);
|
||||
const page = ctx.pages()[0] ?? await ctx.newPage();
|
||||
const webdriver = await page.evaluate(() => (navigator as any).webdriver);
|
||||
expect(webdriver).toBe(false);
|
||||
} finally {
|
||||
await ctx.close();
|
||||
fs.rmSync(userDataDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user