From 9cb98a710333acd2fb226dde398e7eea086d2480 Mon Sep 17 00:00:00 2001
From: Garry Tan <garrytan@gmail.com>
Date: Thu, 7 May 2026 13:37:14 -0700
Subject: [PATCH] feat(browse): --navigate flag on download for
 browser-triggered files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the --navigate strategy from community PR #1355 (originally from
@garrytan-agents). When set, download navigates to the URL with
waitUntil:'commit' and captures the resulting browser download via
page.waitForEvent('download'), then saves via download.saveAs().
Handles URLs that trigger files via Content-Disposition headers,
multi-hop CDN redirects requiring browser cookies, or anti-bot CDN
chains where page.request.fetch() can't follow the auth/redirect
chain.

Defaults still use the existing direct-fetch strategy. --navigate is
opt-in.

Goes through the same validateNavigationUrl SSRF gate as goto, so
download --navigate cannot reach IPv4 metadata endpoints (AWS IMDSv1,
GCP/Azure equivalents) or arbitrary internal hosts.

Inferred content type from suggested filename for common extensions
(epub, pdf, zip, gz, mp3/mp4, jpg/jpeg/png, txt, html, json) — falls
back to application/octet-stream. Same 200MB cap as Strategy 1.

Frames the use case generically (anti-bot CDN, Content-Disposition,
redirect chains) rather than naming any specific site, per project
voice rules.

Co-Authored-By: @garrytan-agents
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 browse/src/commands.ts       |  2 +-
 browse/src/write-commands.ts | 59 ++++++++++++++++++++++++++++++++++--
 gstack/llms.txt              |  2 +-
 3 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/browse/src/commands.ts b/browse/src/commands.ts
index 493c19ea..1af127d5 100644
--- a/browse/src/commands.ts
+++ b/browse/src/commands.ts
@@ -134,7 +134,7 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
   'dialog-accept': { category: 'Interaction', description: 'Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response', usage: 'dialog-accept [text]' },
   'dialog-dismiss': { category: 'Interaction', description: 'Auto-dismiss next dialog' },
   // Data extraction
-  'download': { category: 'Extraction', description: 'Download URL or media element to disk using browser cookies', usage: 'download <url|@ref> [path] [--base64]' },
+  'download': { category: 'Extraction', description: 'Download URL or media element to disk using browser cookies. Use --navigate for URLs that trigger browser downloads (CDN redirects, Content-Disposition, anti-bot protected sites)', usage: 'download <url|@ref> [path] [--base64] [--navigate]' },
   'scrape':   { category: 'Extraction', description: 'Bulk download all media from page. Writes manifest.json', usage: 'scrape <images|videos|media> [--selector sel] [--dir path] [--limit N]' },
   'archive':  { category: 'Extraction', description: 'Save complete page as MHTML via CDP', usage: 'archive [path]' },
   // Visual
diff --git a/browse/src/write-commands.ts b/browse/src/write-commands.ts
index 73896ba3..61c84d83 100644
--- a/browse/src/write-commands.ts
+++ b/browse/src/write-commands.ts
@@ -1137,9 +1137,10 @@ export async function handleWriteCommand(
     }
 
     case 'download': {
-      if (args.length === 0) throw new Error('Usage: download <url|@ref> [path] [--base64]');
+      if (args.length === 0) throw new Error('Usage: download <url|@ref> [path] [--base64] [--navigate]');
       const isBase64 = args.includes('--base64');
-      const filteredArgs = args.filter(a => a !== '--base64');
+      const useNavigate = args.includes('--navigate');
+      const filteredArgs = args.filter(a => a !== '--base64' && a !== '--navigate');
       let url = filteredArgs[0];
       const outputPath = filteredArgs[1];
 
@@ -1200,6 +1201,60 @@ export async function handleWriteCommand(
         if (!match) throw new Error('Failed to decode blob data');
         contentType = match[1];
         buffer = Buffer.from(match[2], 'base64');
+      } else if (useNavigate) {
+        // Strategy 2: Navigate to URL and capture browser-triggered download.
+        // Handles URLs that trigger file downloads via redirects,
+        // Content-Disposition headers, or anti-bot CDN chains where
+        // page.request.fetch() can't follow the auth/redirect chain.
+        await validateNavigationUrl(url);
+        const downloadPromise = page.waitForEvent('download', { timeout: 60000 });
+        // Use goto with 'commit' wait — the page may redirect to trigger
+        // the download, so 'domcontentloaded' may never fire.
+        page.goto(url, { waitUntil: 'commit', timeout: 30000 }).catch(() => {
+          // Navigation may "fail" because the response is a download,
+          // not a page. The download event handles it.
+        });
+        const download = await downloadPromise;
+        const failure = await download.failure();
+        if (failure) {
+          throw new Error(`Download failed: ${failure}`);
+        }
+        // Save to temp location first, then read into buffer
+        const tempPath = path.join(TEMP_DIR, `browse-nav-download-${Date.now()}`);
+        await download.saveAs(tempPath);
+        buffer = fs.readFileSync(tempPath);
+        // Try to infer content type from suggested filename
+        const suggested = download.suggestedFilename();
+        if (suggested) {
+          const extMatch = suggested.match(/\.([a-z0-9]+)$/i);
+          if (extMatch) {
+            const extLower = extMatch[1].toLowerCase();
+            const mimeMap: Record<string, string> = {
+              epub: 'application/epub+zip', pdf: 'application/pdf',
+              zip: 'application/zip', gz: 'application/gzip',
+              mp3: 'audio/mpeg', mp4: 'video/mp4',
+              jpg: 'image/jpeg', jpeg: 'image/jpeg', png: 'image/png',
+              txt: 'text/plain', html: 'text/html', json: 'application/json',
+            };
+            contentType = mimeMap[extLower] || 'application/octet-stream';
+          }
+        }
+        // Clean up temp file if we're going to write elsewhere
+        if (outputPath || isBase64) {
+          try { fs.unlinkSync(tempPath); } catch { /* ignore */ }
+        } else {
+          // No explicit output path — rename temp file with inferred extension.
+          const ext = contentType.split(';')[0].includes('/')
+            ? mimeToExt(contentType.split(';')[0].trim())
+            : '.bin';
+          const finalPath = path.join(TEMP_DIR, `browse-download-${Date.now()}${ext}`);
+          fs.renameSync(tempPath, finalPath);
+          const sizeKB = Math.round(buffer.length / 1024);
+          return `Downloaded: ${finalPath} (${sizeKB}KB, ${contentType.split(';')[0].trim()})${suggested ? ` [${suggested}]` : ''}`;
+        }
+        if (buffer.length > 200 * 1024 * 1024) {
+          throw new Error('File too large (>200MB).');
+        }
       } else {
         // Strategy 1: Direct URL via page.request.fetch().
         // Gate the URL through the same validator `goto` uses. Without
diff --git a/gstack/llms.txt b/gstack/llms.txt
index 7fb00400..8c5d4a39 100644
--- a/gstack/llms.txt
+++ b/gstack/llms.txt
@@ -64,7 +64,7 @@ Run with `browse <command> [args]`. Full reference: `browse/SKILL.md`.
 
 ### Extraction
 - `archive [path]`: Save complete page as MHTML via CDP
-- `download <url|@ref> [path] [--base64]`: Download URL or media element to disk using browser cookies
+- `download <url|@ref> [path] [--base64] [--navigate]`: Download URL or media element to disk using browser cookies.
 - `scrape <images|videos|media> [--selector sel] [--dir path] [--limit N]`: Bulk download all media from page.
 
 ### Inspection