Merge remote-tracking branch 'origin/main' into garrytan/resolver-factoring

# Conflicts: # CHANGELOG.md # investigate/SKILL.md # investigate/SKILL.md.tmpl # office-hours/SKILL.md # office-hours/SKILL.md.tmpl # ship/SKILL.md # ship/SKILL.md.tmpl
2026-05-21 20:28:24 +08:00 · 2026-03-29 21:21:46 -07:00
parent 22582df24b cdd6f7865d
commit 1164c879db
70 changed files with 898 additions and 367 deletions
--- a/browse/SKILL.md
+++ b/browse/SKILL.md
@@ -8,7 +8,7 @@ description: |
  responsive layouts, test forms and uploads, handle dialogs, and assert element states.
  ~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
  user flow, or file a bug with evidence. Use when asked to "open in browser", "test the
-  site", "take a screenshot", or "dogfood this".
+  site", "take a screenshot", or "dogfood this". (gstack)
 allowed-tools:
  - Bash
  - Read
@@ -26,7 +26,7 @@ _UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/sk
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
-find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+find ~/.gstack/sessions -mmin +120 -type f -exec rm {} + 2>/dev/null || true
 _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true")
 _PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no")
@@ -48,7 +48,9 @@ _SESSION_ID="$$-$(date +%s)"
 echo "TELEMETRY: ${_TEL:-off}"
 echo "TEL_PROMPTED: $_TEL_PROMPTED"
 mkdir -p ~/.gstack/analytics
-echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}'  >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
+if [ "${_TEL:-off}" != "off" ]; then
+  echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}'  >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
+fi
 # zsh-compatible: use find instead of glob to avoid NOMATCH error
 for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do
  if [ -f "$_PF" ]; then
@@ -269,20 +271,22 @@ Run this bash:
 _TEL_END=$(date +%s)
 _TEL_DUR=$(( _TEL_END - _TEL_START ))
 rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true
-# Local analytics (always available, no binary needed)
-echo '{"skill":"SKILL_NAME","duration_s":"'"$_TEL_DUR"'","outcome":"OUTCOME","browse":"USED_BROWSE","session":"'"$_SESSION_ID"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
-# Remote telemetry (opt-in, requires binary)
-if [ "$_TEL" != "off" ] && [ -x ~/.claude/skills/gstack/bin/gstack-telemetry-log ]; then
-  ~/.claude/skills/gstack/bin/gstack-telemetry-log \
-    --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \
-    --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null &
+# Local + remote telemetry (both gated by _TEL setting)
+if [ "$_TEL" != "off" ]; then
+  echo '{"skill":"SKILL_NAME","duration_s":"'"$_TEL_DUR"'","outcome":"OUTCOME","browse":"USED_BROWSE","session":"'"$_SESSION_ID"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true
+  if [ -x ~/.claude/skills/gstack/bin/gstack-telemetry-log ]; then
+    ~/.claude/skills/gstack/bin/gstack-telemetry-log \
+      --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \
+      --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null &
+  fi
 fi
 ```

 Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with
 success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used.
-If you cannot determine the outcome, use "unknown". The local JSONL always logs. The
-remote binary only runs if telemetry is not off and the binary exists.
+If you cannot determine the outcome, use "unknown". Both local JSONL and remote
+telemetry only run if telemetry is not off. The remote binary additionally requires
+the binary to exist.

 ## Plan Status Footer

--- a/browse/SKILL.md.tmpl
+++ b/browse/SKILL.md.tmpl
@@ -8,7 +8,7 @@ description: |
  responsive layouts, test forms and uploads, handle dialogs, and assert element states.
  ~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
  user flow, or file a bug with evidence. Use when asked to "open in browser", "test the
-  site", "take a screenshot", or "dogfood this".
+  site", "take a screenshot", or "dogfood this". (gstack)
 allowed-tools:
  - Bash
  - Read
--- a/browse/src/server.ts
+++ b/browse/src/server.ts
@@ -430,7 +430,7 @@ function spawnClaude(userMessage: string, extensionUrl?: string | null): void {

  const prompt = `${systemPrompt}\n\n<user-message>\n${escapedMessage}\n</user-message>`;
  const args = ['-p', prompt, '--model', 'opus', '--output-format', 'stream-json', '--verbose',
-    '--allowedTools', 'Bash,Read,Glob,Grep'];
+    '--allowedTools', 'Bash,Read,Glob,Grep,Write'];
  if (sidebarSession?.claudeSessionId) {
    args.push('--resume', sidebarSession.claudeSessionId);
  }
--- a/browse/src/sidebar-agent.ts
+++ b/browse/src/sidebar-agent.ts
@@ -161,8 +161,10 @@ async function askClaude(queueEntry: any): Promise<void> {
  return new Promise((resolve) => {
    // Use args from queue entry (server sets --model, --allowedTools, prompt framing).
    // Fall back to defaults only if queue entry has no args (backward compat).
+    // Write doesn't expand attack surface beyond what Bash already provides.
+    // The security boundary is the localhost-only message path, not the tool allowlist.
    let claudeArgs = args || ['-p', prompt, '--output-format', 'stream-json', '--verbose',
-      '--allowedTools', 'Bash,Read,Glob,Grep'];
+      '--allowedTools', 'Bash,Read,Glob,Grep,Write'];

    // Validate cwd exists — queue may reference a stale worktree
    let effectiveCwd = cwd || process.cwd();
@@ -188,20 +190,30 @@ async function askClaude(queueEntry: any): Promise<void> {
      }
    });

-    proc.stderr.on('data', () => {}); // Claude logs to stderr, ignore
+    let stderrBuffer = '';
+    proc.stderr.on('data', (data: Buffer) => {
+      stderrBuffer += data.toString();
+    });

    proc.on('close', (code) => {
      if (buffer.trim()) {
        try { handleStreamEvent(JSON.parse(buffer)); } catch {}
      }
-      sendEvent({ type: 'agent_done' }).then(() => {
+      const doneEvent: Record<string, any> = { type: 'agent_done' };
+      if (code !== 0 && stderrBuffer.trim()) {
+        doneEvent.stderr = stderrBuffer.trim().slice(-500);
+      }
+      sendEvent(doneEvent).then(() => {
        isProcessing = false;
        resolve();
      });
    });

    proc.on('error', (err) => {
-      sendEvent({ type: 'agent_error', error: err.message }).then(() => {
+      const errorMsg = stderrBuffer.trim()
+        ? `${err.message}\nstderr: ${stderrBuffer.trim().slice(-500)}`
+        : err.message;
+      sendEvent({ type: 'agent_error', error: errorMsg }).then(() => {
        isProcessing = false;
        resolve();
      });
@@ -211,7 +223,10 @@ async function askClaude(queueEntry: any): Promise<void> {
    const timeoutMs = parseInt(process.env.SIDEBAR_AGENT_TIMEOUT || '300000', 10);
    setTimeout(() => {
      try { proc.kill(); } catch {}
-      sendEvent({ type: 'agent_error', error: `Timed out after ${timeoutMs / 1000}s` }).then(() => {
+      const timeoutMsg = stderrBuffer.trim()
+        ? `Timed out after ${timeoutMs / 1000}s\nstderr: ${stderrBuffer.trim().slice(-500)}`
+        : `Timed out after ${timeoutMs / 1000}s`;
+      sendEvent({ type: 'agent_error', error: timeoutMsg }).then(() => {
        isProcessing = false;
        resolve();
      });
--- a/browse/test/sidebar-security.test.ts
+++ b/browse/test/sidebar-security.test.ts
@@ -115,6 +115,6 @@ describe('Sidebar prompt injection defense', () => {

  test('sidebar-agent falls back to defaults if queue has no args', () => {
    // Backward compatibility: if old queue entries lack args, use defaults
-    expect(AGENT_SRC).toContain("'--allowedTools', 'Bash,Read,Glob,Grep'");
+    expect(AGENT_SRC).toContain("'--allowedTools', 'Bash,Read,Glob,Grep,Write'");
  });
 });