diff --git a/make-pdf/SKILL.md b/make-pdf/SKILL.md new file mode 100644 index 00000000..475889a9 --- /dev/null +++ b/make-pdf/SKILL.md @@ -0,0 +1,627 @@ +--- +name: make-pdf +preamble-tier: 1 +version: 1.0.0 +description: | + Turn any markdown file into a publication-quality PDF. Proper 1in margins, + intelligent page breaks, page numbers, cover pages, running headers, curly + quotes and em dashes, clickable TOC, diagonal DRAFT watermark. Output you'd + send to a VC partner, a book agent, a judge, or Rick Rubin's team. Not a + draft artifact — a finished artifact. Use when asked to "make a PDF", + "export to PDF", "turn this markdown into a PDF", or "generate a document". + (gstack) + Voice triggers (speech-to-text aliases): "make this a pdf", "make it a pdf", "export to pdf", "turn this into a pdf", "turn this markdown into a pdf", "generate a pdf", "make a pdf from", "pdf this markdown". +triggers: + - markdown to pdf + - generate pdf + - make pdf + - export pdf +allowed-tools: + - Bash + - Read + - AskUserQuestion +--- + + + +## Preamble (run first) + +```bash +_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +[ -n "$_UPD" ] && echo "$_UPD" || true +mkdir -p ~/.gstack/sessions +touch ~/.gstack/sessions/"$PPID" +_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ') +find ~/.gstack/sessions -mmin +120 -type f -exec rm {} + 2>/dev/null || true +_PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") +_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") +echo "BRANCH: $_BRANCH" +_SKILL_PREFIX=$(~/.claude/skills/gstack/bin/gstack-config get skill_prefix 2>/dev/null || echo "false") +echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" +echo "SKILL_PREFIX: $_SKILL_PREFIX" +source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true +REPO_MODE=${REPO_MODE:-unknown} +echo "REPO_MODE: $REPO_MODE" +_LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no") +echo "LAKE_INTRO: $_LAKE_SEEN" +_TEL=$(~/.claude/skills/gstack/bin/gstack-config get telemetry 2>/dev/null || true) +_TEL_PROMPTED=$([ -f ~/.gstack/.telemetry-prompted ] && echo "yes" || echo "no") +_TEL_START=$(date +%s) +_SESSION_ID="$$-$(date +%s)" +echo "TELEMETRY: ${_TEL:-off}" +echo "TEL_PROMPTED: $_TEL_PROMPTED" +# Question tuning (opt-in; see /plan-tune + docs/designs/PLAN_TUNING_V0.md) +_QUESTION_TUNING=$(~/.claude/skills/gstack/bin/gstack-config get question_tuning 2>/dev/null || echo "false") +echo "QUESTION_TUNING: $_QUESTION_TUNING" +# Writing style (V1: default = ELI10-style, terse = V0 prose. See docs/designs/PLAN_TUNING_V1.md) +_EXPLAIN_LEVEL=$(~/.claude/skills/gstack/bin/gstack-config get explain_level 2>/dev/null || echo "default") +if [ "$_EXPLAIN_LEVEL" != "default" ] && [ "$_EXPLAIN_LEVEL" != "terse" ]; then _EXPLAIN_LEVEL="default"; fi +echo "EXPLAIN_LEVEL: $_EXPLAIN_LEVEL" +# V1 upgrade migration pending-prompt flag +_WRITING_STYLE_PENDING=$([ -f ~/.gstack/.writing-style-prompt-pending ] && echo "yes" || echo "no") +echo "WRITING_STYLE_PENDING: $_WRITING_STYLE_PENDING" +mkdir -p ~/.gstack/analytics +if [ "$_TEL" != "off" ]; then +echo '{"skill":"make-pdf","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true +fi +# zsh-compatible: use find instead of glob to avoid NOMATCH error +for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do + if [ -f "$_PF" ]; then + if [ "$_TEL" != "off" ] && [ -x "~/.claude/skills/gstack/bin/gstack-telemetry-log" ]; then + ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true + fi + rm -f "$_PF" 2>/dev/null || true + fi + break +done +# Learnings count +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +_LEARN_FILE="${GSTACK_HOME:-$HOME/.gstack}/projects/${SLUG:-unknown}/learnings.jsonl" +if [ -f "$_LEARN_FILE" ]; then + _LEARN_COUNT=$(wc -l < "$_LEARN_FILE" 2>/dev/null | tr -d ' ') + echo "LEARNINGS: $_LEARN_COUNT entries loaded" + if [ "$_LEARN_COUNT" -gt 5 ] 2>/dev/null; then + ~/.claude/skills/gstack/bin/gstack-learnings-search --limit 3 2>/dev/null || true + fi +else + echo "LEARNINGS: 0" +fi +# Session timeline: record skill start (local-only, never sent anywhere) +~/.claude/skills/gstack/bin/gstack-timeline-log '{"skill":"make-pdf","event":"started","branch":"'"$_BRANCH"'","session":"'"$_SESSION_ID"'"}' 2>/dev/null & +# Check if CLAUDE.md has routing rules +_HAS_ROUTING="no" +if [ -f CLAUDE.md ] && grep -q "## Skill routing" CLAUDE.md 2>/dev/null; then + _HAS_ROUTING="yes" +fi +_ROUTING_DECLINED=$(~/.claude/skills/gstack/bin/gstack-config get routing_declined 2>/dev/null || echo "false") +echo "HAS_ROUTING: $_HAS_ROUTING" +echo "ROUTING_DECLINED: $_ROUTING_DECLINED" +# Vendoring deprecation: detect if CWD has a vendored gstack copy +_VENDORED="no" +if [ -d ".claude/skills/gstack" ] && [ ! -L ".claude/skills/gstack" ]; then + if [ -f ".claude/skills/gstack/VERSION" ] || [ -d ".claude/skills/gstack/.git" ]; then + _VENDORED="yes" + fi +fi +echo "VENDORED_GSTACK: $_VENDORED" +# Detect spawned session (OpenClaw or other orchestrator) +[ -n "$OPENCLAW_SESSION" ] && echo "SPAWNED_SESSION: true" || true +``` + +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. + +If `SKILL_PREFIX` is `"true"`, the user has namespaced skill names. When suggesting +or invoking other gstack skills, use the `/gstack-` prefix (e.g., `/gstack-qa` instead +of `/qa`, `/gstack-ship` instead of `/ship`). Disk paths are unaffected — always use +`~/.claude/skills/gstack/[skill-name]/SKILL.md` for reading skill files. + +If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. + +If `WRITING_STYLE_PENDING` is `yes`: You're on the first skill run after upgrading +to gstack v1. Ask the user once about the new default writing style. Use AskUserQuestion: + +> v1 prompts = simpler. Technical terms get a one-sentence gloss on first use, +> questions are framed in outcome terms, sentences are shorter. +> +> Keep the new default, or prefer the older tighter prose? + +Options: +- A) Keep the new default (recommended — good writing helps everyone) +- B) Restore V0 prose — set `explain_level: terse` + +If A: leave `explain_level` unset (defaults to `default`). +If B: run `~/.claude/skills/gstack/bin/gstack-config set explain_level terse`. + +Always run (regardless of choice): +```bash +rm -f ~/.gstack/.writing-style-prompt-pending +touch ~/.gstack/.writing-style-prompted +``` + +This only happens once. If `WRITING_STYLE_PENDING` is `no`, skip this entirely. + +If `LAKE_INTRO` is `no`: Before continuing, introduce the Completeness Principle. +Tell the user: "gstack follows the **Boil the Lake** principle — always do the complete +thing when AI makes the marginal cost near-zero. Read more: https://garryslist.org/posts/boil-the-ocean" +Then offer to open the essay in their default browser: + +```bash +open https://garryslist.org/posts/boil-the-ocean +touch ~/.gstack/.completeness-intro-seen +``` + +Only run `open` if the user says yes. Always run `touch` to mark as seen. This only happens once. + +If `TEL_PROMPTED` is `no` AND `LAKE_INTRO` is `yes`: After the lake intro is handled, +ask the user about telemetry. Use AskUserQuestion: + +> Help gstack get better! Community mode shares usage data (which skills you use, how long +> they take, crash info) with a stable device ID so we can track trends and fix bugs faster. +> No code, file paths, or repo names are ever sent. +> Change anytime with `gstack-config set telemetry off`. + +Options: +- A) Help gstack get better! (recommended) +- B) No thanks + +If A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry community` + +If B: ask a follow-up AskUserQuestion: + +> How about anonymous mode? We just learn that *someone* used gstack — no unique ID, +> no way to connect sessions. Just a counter that helps us know if anyone's out there. + +Options: +- A) Sure, anonymous is fine +- B) No thanks, fully off + +If B→A: run `~/.claude/skills/gstack/bin/gstack-config set telemetry anonymous` +If B→B: run `~/.claude/skills/gstack/bin/gstack-config set telemetry off` + +Always run: +```bash +touch ~/.gstack/.telemetry-prompted +``` + +This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. + +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + +If `HAS_ROUTING` is `no` AND `ROUTING_DECLINED` is `false` AND `PROACTIVE_PROMPTED` is `yes`: +Check if a CLAUDE.md file exists in the project root. If it does not exist, create it. + +Use AskUserQuestion: + +> gstack works best when your project's CLAUDE.md includes skill routing rules. +> This tells Claude to use specialized workflows (like /ship, /investigate, /qa) +> instead of answering directly. It's a one-time addition, about 15 lines. + +Options: +- A) Add routing rules to CLAUDE.md (recommended) +- B) No thanks, I'll invoke skills manually + +If A: Append this section to the end of CLAUDE.md: + +```markdown + +## Skill routing + +When the user's request matches an available skill, ALWAYS invoke it using the Skill +tool as your FIRST action. Do NOT answer directly, do NOT use other tools first. +The skill has specialized workflows that produce better results than ad-hoc answers. + +Key routing rules: +- Product ideas, "is this worth building", brainstorming → invoke office-hours +- Bugs, errors, "why is this broken", 500 errors → invoke investigate +- Ship, deploy, push, create PR → invoke ship +- QA, test the site, find bugs → invoke qa +- Code review, check my diff → invoke review +- Update docs after shipping → invoke document-release +- Weekly retro → invoke retro +- Design system, brand → invoke design-consultation +- Visual audit, design polish → invoke design-review +- Architecture review → invoke plan-eng-review +- Save progress, save state, save my work → invoke context-save +- Resume, where was I, pick up where I left off → invoke context-restore +- Code quality, health check → invoke health +``` + +Then commit the change: `git add CLAUDE.md && git commit -m "chore: add gstack skill routing rules to CLAUDE.md"` + +If B: run `~/.claude/skills/gstack/bin/gstack-config set routing_declined true` +Say "No problem. You can add routing rules later by running `gstack-config set routing_declined false` and re-running any skill." + +This only happens once per project. If `HAS_ROUTING` is `yes` or `ROUTING_DECLINED` is `true`, skip this entirely. + +If `VENDORED_GSTACK` is `yes`: This project has a vendored copy of gstack at +`.claude/skills/gstack/`. Vendoring is deprecated. We will not keep vendored copies +up to date, so this project's gstack will fall behind. + +Use AskUserQuestion (one-time per project, check for `~/.gstack/.vendoring-warned-$SLUG` marker): + +> This project has gstack vendored in `.claude/skills/gstack/`. Vendoring is deprecated. +> We won't keep this copy up to date, so you'll fall behind on new features and fixes. +> +> Want to migrate to team mode? It takes about 30 seconds. + +Options: +- A) Yes, migrate to team mode now +- B) No, I'll handle it myself + +If A: +1. Run `git rm -r .claude/skills/gstack/` +2. Run `echo '.claude/skills/gstack/' >> .gitignore` +3. Run `~/.claude/skills/gstack/bin/gstack-team-init required` (or `optional`) +4. Run `git add .claude/ .gitignore CLAUDE.md && git commit -m "chore: migrate gstack from vendored to team mode"` +5. Tell the user: "Done. Each developer now runs: `cd ~/.claude/skills/gstack && ./setup --team`" + +If B: say "OK, you're on your own to keep the vendored copy up to date." + +Always run (regardless of choice): +```bash +eval "$(~/.claude/skills/gstack/bin/gstack-slug 2>/dev/null)" 2>/dev/null || true +touch ~/.gstack/.vendoring-warned-${SLUG:-unknown} +``` + +This only happens once per project. If the marker file exists, skip entirely. + +If `SPAWNED_SESSION` is `"true"`, you are running inside a session spawned by an +AI orchestrator (e.g., OpenClaw). In spawned sessions: +- Do NOT use AskUserQuestion for interactive prompts. Auto-choose the recommended option. +- Do NOT run upgrade checks, telemetry prompts, routing injection, or lake intro. +- Focus on completing the task and reporting results via prose output. +- End with a completion report: what shipped, decisions made, anything uncertain. + + + +## Voice + +**Tone:** direct, concrete, sharp, never corporate, never academic. Sound like a builder, not a consultant. Name the file, the function, the command. No filler, no throat-clearing. + +**Writing rules:** No em dashes (use commas, periods, "..."). No AI vocabulary (delve, crucial, robust, comprehensive, nuanced, etc.). Short paragraphs. End with what to do. + +The user always has context you don't. Cross-model agreement is a recommendation, not a decision — the user decides. + +## Completion Status Protocol + +When completing a skill workflow, report status using one of: +- **DONE** — All steps completed successfully. Evidence provided for each claim. +- **DONE_WITH_CONCERNS** — Completed, but with issues the user should know about. List each concern. +- **BLOCKED** — Cannot proceed. State what is blocking and what was tried. +- **NEEDS_CONTEXT** — Missing information required to continue. State exactly what you need. + +### Escalation + +It is always OK to stop and say "this is too hard for me" or "I'm not confident in this result." + +Bad work is worse than no work. You will not be penalized for escalating. +- If you have attempted a task 3 times without success, STOP and escalate. +- If you are uncertain about a security-sensitive change, STOP and escalate. +- If the scope of work exceeds what you can verify, STOP and escalate. + +Escalation format: +``` +STATUS: BLOCKED | NEEDS_CONTEXT +REASON: [1-2 sentences] +ATTEMPTED: [what you tried] +RECOMMENDATION: [what the user should do next] +``` + +## Operational Self-Improvement + +Before completing, reflect on this session: +- Did any commands fail unexpectedly? +- Did you take a wrong approach and have to backtrack? +- Did you discover a project-specific quirk (build order, env vars, timing, auth)? +- Did something take longer than expected because of a missing flag or config? + +If yes, log an operational learning for future sessions: + +```bash +~/.claude/skills/gstack/bin/gstack-learnings-log '{"skill":"SKILL_NAME","type":"operational","key":"SHORT_KEY","insight":"DESCRIPTION","confidence":N,"source":"observed"}' +``` + +Replace SKILL_NAME with the current skill name. Only log genuine operational discoveries. +Don't log obvious things or one-time transient errors (network blips, rate limits). +A good test: would knowing this save 5+ minutes in a future session? If yes, log it. + +## Telemetry (run last) + +After the skill workflow completes (success, error, or abort), log the telemetry event. +Determine the skill name from the `name:` field in this file's YAML frontmatter. +Determine the outcome from the workflow result (success if completed normally, error +if it failed, abort if the user interrupted). + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This command writes telemetry to +`~/.gstack/analytics/` (user config directory, not project files). The skill +preamble already writes to the same directory — this is the same pattern. +Skipping this command loses session duration and outcome data. + +Run this bash: + +```bash +_TEL_END=$(date +%s) +_TEL_DUR=$(( _TEL_END - _TEL_START )) +rm -f ~/.gstack/analytics/.pending-"$_SESSION_ID" 2>/dev/null || true +# Session timeline: record skill completion (local-only, never sent anywhere) +~/.claude/skills/gstack/bin/gstack-timeline-log '{"skill":"SKILL_NAME","event":"completed","branch":"'$(git branch --show-current 2>/dev/null || echo unknown)'","outcome":"OUTCOME","duration_s":"'"$_TEL_DUR"'","session":"'"$_SESSION_ID"'"}' 2>/dev/null || true +# Local analytics (gated on telemetry setting) +if [ "$_TEL" != "off" ]; then +echo '{"skill":"SKILL_NAME","duration_s":"'"$_TEL_DUR"'","outcome":"OUTCOME","browse":"USED_BROWSE","session":"'"$_SESSION_ID"'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true +fi +# Remote telemetry (opt-in, requires binary) +if [ "$_TEL" != "off" ] && [ -x ~/.claude/skills/gstack/bin/gstack-telemetry-log ]; then + ~/.claude/skills/gstack/bin/gstack-telemetry-log \ + --skill "SKILL_NAME" --duration "$_TEL_DUR" --outcome "OUTCOME" \ + --used-browse "USED_BROWSE" --session-id "$_SESSION_ID" 2>/dev/null & +fi +``` + +Replace `SKILL_NAME` with the actual skill name from frontmatter, `OUTCOME` with +success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was used. +If you cannot determine the outcome, use "unknown". The local JSONL always logs. The +remote binary only runs if telemetry is not off and the binary exists. + +## Plan Mode Safe Operations + +When in plan mode, these operations are always allowed because they produce +artifacts that inform the plan, not code changes: + +- `$B` commands (browse: screenshots, page inspection, navigation, snapshots) +- `$D` commands (design: generate mockups, variants, comparison boards, iterate) +- `codex exec` / `codex review` (outside voice, plan review, adversarial challenge) +- Writing to `~/.gstack/` (config, analytics, review logs, design artifacts, learnings) +- Writing to the plan file (already allowed by plan mode) +- `open` commands for viewing generated artifacts (comparison boards, HTML previews) + +These are read-only in spirit — they inspect the live site, generate visual artifacts, +or get independent opinions. They do NOT modify project source files. + +## Skill Invocation During Plan Mode + +If a user invokes a skill during plan mode, that invoked skill workflow takes +precedence over generic plan mode behavior until it finishes or the user explicitly +cancels that skill. + +Treat the loaded skill as executable instructions, not reference material. Follow +it step by step. Do not summarize, skip, reorder, or shortcut its steps. + +If the skill says to use AskUserQuestion, do that. Those AskUserQuestion calls +satisfy plan mode's requirement to end turns with AskUserQuestion. + +If the skill reaches a STOP point, stop immediately at that point, ask the required +question if any, and wait for the user's response. Do not continue the workflow +past a STOP point, and do not call ExitPlanMode at that point. + +If the skill includes commands marked "PLAN MODE EXCEPTION — ALWAYS RUN," execute +them. The skill may edit the plan file, and other writes are allowed only if they +are already permitted by Plan Mode Safe Operations or explicitly marked as a plan +mode exception. + +Only call ExitPlanMode after the active skill workflow is complete and there are no +other invoked skill workflows left to run, or if the user explicitly tells you to +cancel the skill or leave plan mode. + +## Plan Status Footer + +When you are in plan mode and about to call ExitPlanMode: + +1. Check if the plan file already has a `## GSTACK REVIEW REPORT` section. +2. If it DOES — skip (a review skill already wrote a richer report). +3. If it does NOT — run this command: + +\`\`\`bash +~/.claude/skills/gstack/bin/gstack-review-read +\`\`\` + +Then write a `## GSTACK REVIEW REPORT` section to the end of the plan file: + +- If the output contains review entries (JSONL lines before `---CONFIG---`): format the + standard report table with runs/status/findings per skill, same format as the review + skills use. +- If the output is `NO_REVIEWS` or empty: write this placeholder table: + +\`\`\`markdown +## GSTACK REVIEW REPORT + +| Review | Trigger | Why | Runs | Status | Findings | +|--------|---------|-----|------|--------|----------| +| CEO Review | \`/plan-ceo-review\` | Scope & strategy | 0 | — | — | +| Codex Review | \`/codex review\` | Independent 2nd opinion | 0 | — | — | +| Eng Review | \`/plan-eng-review\` | Architecture & tests (required) | 0 | — | — | +| Design Review | \`/plan-design-review\` | UI/UX gaps | 0 | — | — | +| DX Review | \`/plan-devex-review\` | Developer experience gaps | 0 | — | — | + +**VERDICT:** NO REVIEWS YET — run \`/autoplan\` for full review pipeline, or individual reviews above. +\`\`\` + +**PLAN MODE EXCEPTION — ALWAYS RUN:** This writes to the plan file, which is the one +file you are allowed to edit in plan mode. The plan file review report is part of the +plan's living status. + +# make-pdf: publication-quality PDFs from markdown + +Turn `.md` files into PDFs that look like Faber & Faber essays: 1in margins, +left-aligned body, Helvetica throughout, curly quotes and em dashes, optional +cover page and clickable TOC, diagonal DRAFT watermark when you need it. +Copy-paste from the PDF produces clean words, never "S a i l i n g". + +## MAKE-PDF SETUP (run this check BEFORE any make-pdf command) + +```bash +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +P="" +[ -n "$MAKE_PDF_BIN" ] && [ -x "$MAKE_PDF_BIN" ] && P="$MAKE_PDF_BIN" +[ -z "$P" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/make-pdf/dist/pdf" ] && P="$_ROOT/.claude/skills/gstack/make-pdf/dist/pdf" +[ -z "$P" ] && P="$HOME/.claude/skills/gstack/make-pdf/dist/pdf" +if [ -x "$P" ]; then + echo "MAKE_PDF_READY: $P" + alias _p_="$P" # shellcheck alias helper (not exported) + export P # available as $P in subsequent blocks within the same skill invocation +else + echo "MAKE_PDF_NOT_AVAILABLE (run './setup' in the gstack repo to build it)" +fi +``` + +If `MAKE_PDF_NOT_AVAILABLE` is printed: tell the user the binary is not +built. Have them run `./setup` from the gstack repo, then retry. + +If `MAKE_PDF_READY` is printed: `$P` is the binary path for the rest of +the skill. Use `$P` (not an explicit path) so the skill body stays portable. + +Core commands: +- `$P generate [output.pdf]` — render markdown to PDF (80% use case) +- `$P generate --cover --toc essay.md out.pdf` — full publication layout +- `$P generate --watermark DRAFT memo.md draft.pdf` — diagonal DRAFT watermark +- `$P preview ` — render HTML and open in browser (fast iteration) +- `$P setup` — verify browse + Chromium + pdftotext and run a smoke test +- `$P --help` — full flag reference + +Output contract: +- `stdout`: ONLY the output path on success. One line. +- `stderr`: progress (`Rendering HTML... Generating PDF...`) unless `--quiet`. +- Exit 0 success / 1 bad args / 2 render error / 3 Paged.js timeout / 4 browse unavailable. + +## Core patterns + +### 80% case — memo/letter + +One command, no flags. Gets a clean PDF with running header + page numbers ++ CONFIDENTIAL footer by default. + +```bash +$P generate letter.md # writes /tmp/letter.pdf +$P generate letter.md letter.pdf # explicit output path +``` + +### Publication mode — cover + TOC + chapter breaks + +```bash +$P generate --cover --toc --author "Garry Tan" --title "On Horizons" \ + essay.md essay.pdf +``` + +Each top-level H1 in the markdown starts a new page. Disable with +`--no-chapter-breaks` for memos that happen to have multiple H1s. + +### Draft-stage watermark + +```bash +$P generate --watermark DRAFT memo.md draft.pdf +``` + +Diagonal 10% opacity DRAFT across every page. When the draft is final, drop +the flag and regenerate. + +### Fast iteration via preview + +```bash +$P preview essay.md +``` + +Renders HTML with the same print CSS and opens it in your browser. Refresh +as you edit the markdown. Skip the PDF round trip until you're ready. + +### Brand-free (no CONFIDENTIAL footer) + +```bash +$P generate --no-confidential memo.md memo.pdf +``` + +## Common flags + +``` +Page layout: + --margins 1in (default) | 72pt | 2.54cm | 25mm + --page-size letter|a4|legal + +Structure: + --cover Cover page (title, author, date, hairline rule) + --toc Clickable TOC with page numbers + --no-chapter-breaks Don't start a new page at every H1 + +Branding: + --watermark Diagonal watermark ("DRAFT", "CONFIDENTIAL") + --header-template Custom running header + --footer-template Custom footer (mutex with --page-numbers) + --no-confidential Suppress the CONFIDENTIAL right-footer + +Output: + --page-numbers "N of M" footer (default on) + --tagged Accessible PDF (default on) + --outline PDF bookmarks from headings (default on) + --quiet Suppress progress on stderr + --verbose Per-stage timings + +Network: + --allow-network Fetch external images. Off by default + (blocks tracking pixels). + +Metadata: + --title "..." Document title (defaults to first H1) + --author "..." Author for cover + PDF metadata + --date "..." Date for cover (defaults to today) +``` + +## When Claude should run it + +Watch for markdown-to-PDF intent. Any of these patterns → run `$P generate`: + +- "Can you make this markdown a PDF" +- "Export it as a PDF" +- "Turn this letter into a PDF" +- "I need a PDF of the essay" +- "Print this as a PDF for me" + +If the user has a `.md` file open and says "make it look nice", propose +`$P generate --cover --toc` and ask before running. + +## Debugging + +- Output looks empty / blank → check browse daemon is running: `$B status`. +- Fragmented text on copy-paste → highlight.js output (Phase 4). Retry with + `--no-syntax` once that flag exists. For now, remove fenced code blocks + and regenerate. +- Paged.js timeout → probably no headings in the markdown. Drop `--toc`. +- External image missing → add `--allow-network` (understand you're giving + the markdown file permission to fetch from its image URLs). +- Generated PDF too tall/wide → `--page-size a4` or `--margins 0.75in`. + +## Output contract + +``` +stdout: /tmp/letter.pdf ← just the path, one line +stderr: Rendering HTML... ← progress spinner (unless --quiet) + Generating PDF... + Done in 1.5s. 43 words · 22KB · /tmp/letter.pdf + +exit code: 0 success / 1 bad args / 2 render error / 3 Paged.js timeout + / 4 browse unavailable +``` + +Capture the path: `PDF=$($P generate letter.md)` — then use `$PDF`. diff --git a/make-pdf/SKILL.md.tmpl b/make-pdf/SKILL.md.tmpl new file mode 100644 index 00000000..38668290 --- /dev/null +++ b/make-pdf/SKILL.md.tmpl @@ -0,0 +1,161 @@ +--- +name: make-pdf +preamble-tier: 1 +version: 1.0.0 +description: | + Turn any markdown file into a publication-quality PDF. Proper 1in margins, + intelligent page breaks, page numbers, cover pages, running headers, curly + quotes and em dashes, clickable TOC, diagonal DRAFT watermark. Output you'd + send to a VC partner, a book agent, a judge, or Rick Rubin's team. Not a + draft artifact — a finished artifact. Use when asked to "make a PDF", + "export to PDF", "turn this markdown into a PDF", or "generate a document". + (gstack) +voice-triggers: + - "make this a pdf" + - "make it a pdf" + - "export to pdf" + - "turn this into a pdf" + - "turn this markdown into a pdf" + - "generate a pdf" + - "make a pdf from" + - "pdf this markdown" +triggers: + - markdown to pdf + - generate pdf + - make pdf + - export pdf +allowed-tools: + - Bash + - Read + - AskUserQuestion +--- + +{{PREAMBLE}} + +# make-pdf: publication-quality PDFs from markdown + +Turn `.md` files into PDFs that look like Faber & Faber essays: 1in margins, +left-aligned body, Helvetica throughout, curly quotes and em dashes, optional +cover page and clickable TOC, diagonal DRAFT watermark when you need it. +Copy-paste from the PDF produces clean words, never "S a i l i n g". + +{{MAKE_PDF_SETUP}} + +## Core patterns + +### 80% case — memo/letter + +One command, no flags. Gets a clean PDF with running header + page numbers ++ CONFIDENTIAL footer by default. + +```bash +$P generate letter.md # writes /tmp/letter.pdf +$P generate letter.md letter.pdf # explicit output path +``` + +### Publication mode — cover + TOC + chapter breaks + +```bash +$P generate --cover --toc --author "Garry Tan" --title "On Horizons" \ + essay.md essay.pdf +``` + +Each top-level H1 in the markdown starts a new page. Disable with +`--no-chapter-breaks` for memos that happen to have multiple H1s. + +### Draft-stage watermark + +```bash +$P generate --watermark DRAFT memo.md draft.pdf +``` + +Diagonal 10% opacity DRAFT across every page. When the draft is final, drop +the flag and regenerate. + +### Fast iteration via preview + +```bash +$P preview essay.md +``` + +Renders HTML with the same print CSS and opens it in your browser. Refresh +as you edit the markdown. Skip the PDF round trip until you're ready. + +### Brand-free (no CONFIDENTIAL footer) + +```bash +$P generate --no-confidential memo.md memo.pdf +``` + +## Common flags + +``` +Page layout: + --margins 1in (default) | 72pt | 2.54cm | 25mm + --page-size letter|a4|legal + +Structure: + --cover Cover page (title, author, date, hairline rule) + --toc Clickable TOC with page numbers + --no-chapter-breaks Don't start a new page at every H1 + +Branding: + --watermark Diagonal watermark ("DRAFT", "CONFIDENTIAL") + --header-template Custom running header + --footer-template Custom footer (mutex with --page-numbers) + --no-confidential Suppress the CONFIDENTIAL right-footer + +Output: + --page-numbers "N of M" footer (default on) + --tagged Accessible PDF (default on) + --outline PDF bookmarks from headings (default on) + --quiet Suppress progress on stderr + --verbose Per-stage timings + +Network: + --allow-network Fetch external images. Off by default + (blocks tracking pixels). + +Metadata: + --title "..." Document title (defaults to first H1) + --author "..." Author for cover + PDF metadata + --date "..." Date for cover (defaults to today) +``` + +## When Claude should run it + +Watch for markdown-to-PDF intent. Any of these patterns → run `$P generate`: + +- "Can you make this markdown a PDF" +- "Export it as a PDF" +- "Turn this letter into a PDF" +- "I need a PDF of the essay" +- "Print this as a PDF for me" + +If the user has a `.md` file open and says "make it look nice", propose +`$P generate --cover --toc` and ask before running. + +## Debugging + +- Output looks empty / blank → check browse daemon is running: `$B status`. +- Fragmented text on copy-paste → highlight.js output (Phase 4). Retry with + `--no-syntax` once that flag exists. For now, remove fenced code blocks + and regenerate. +- Paged.js timeout → probably no headings in the markdown. Drop `--toc`. +- External image missing → add `--allow-network` (understand you're giving + the markdown file permission to fetch from its image URLs). +- Generated PDF too tall/wide → `--page-size a4` or `--margins 0.75in`. + +## Output contract + +``` +stdout: /tmp/letter.pdf ← just the path, one line +stderr: Rendering HTML... ← progress spinner (unless --quiet) + Generating PDF... + Done in 1.5s. 43 words · 22KB · /tmp/letter.pdf + +exit code: 0 success / 1 bad args / 2 render error / 3 Paged.js timeout + / 4 browse unavailable +``` + +Capture the path: `PDF=$($P generate letter.md)` — then use `$PDF`. diff --git a/make-pdf/src/browseClient.ts b/make-pdf/src/browseClient.ts new file mode 100644 index 00000000..92845907 --- /dev/null +++ b/make-pdf/src/browseClient.ts @@ -0,0 +1,326 @@ +/** + * Typed shell-out wrapper for the browse CLI. + * + * Every browse call goes through this file. Reasons: + * - One place to do binary resolution. + * - One place to enforce the --from-file convention for large payloads + * (Windows argv cap is 8191 chars; 200KB HTML dies without this). + * - One place that maps non-zero exit codes to typed errors. + * + * Binary resolution order (Codex round 2 #4): + * 1. $BROWSE_BIN env override + * 2. sibling dir: dirname(argv[0])/../browse/dist/browse + * 3. ~/.claude/skills/gstack/browse/dist/browse + * 4. PATH lookup: `browse` + * 5. error with setup hint + */ + +import { execFileSync } from "node:child_process"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import * as crypto from "node:crypto"; + +import { BrowseClientError } from "./types"; + +export interface LoadHtmlOptions { + html: string; // raw HTML string + waitUntil?: "load" | "domcontentloaded" | "networkidle"; + tabId: number; +} + +export interface PdfOptions { + output: string; + tabId: number; + format?: string; + width?: string; + height?: string; + marginTop?: string; + marginRight?: string; + marginBottom?: string; + marginLeft?: string; + headerTemplate?: string; + footerTemplate?: string; + pageNumbers?: boolean; + tagged?: boolean; + outline?: boolean; + printBackground?: boolean; + preferCSSPageSize?: boolean; + toc?: boolean; +} + +export interface JsOptions { + tabId: number; + expression: string; // JS expression to evaluate +} + +/** + * Locate the browse binary. Throws a BrowseClientError with a + * canonical setup message if not found. + */ +export function resolveBrowseBin(): string { + const envOverride = process.env.BROWSE_BIN; + if (envOverride && isExecutable(envOverride)) return envOverride; + + // Sibling: look relative to this process's binary + // (for when make-pdf and browse live next to each other in dist/) + const selfDir = path.dirname(process.argv[0]); + const siblingCandidates = [ + path.resolve(selfDir, "../browse/dist/browse"), + path.resolve(selfDir, "../../browse/dist/browse"), + path.resolve(selfDir, "../browse"), + ]; + for (const candidate of siblingCandidates) { + if (isExecutable(candidate)) return candidate; + } + + // Global install + const home = os.homedir(); + const globalPath = path.join(home, ".claude/skills/gstack/browse/dist/browse"); + if (isExecutable(globalPath)) return globalPath; + + // PATH lookup + try { + const which = execFileSync("which", ["browse"], { encoding: "utf8" }).trim(); + if (which && isExecutable(which)) return which; + } catch { + // `which` exited non-zero; fall through to error + } + + throw new BrowseClientError( + /* exitCode */ 127, + "resolve", + [ + "browse binary not found.", + "", + "make-pdf needs browse (the gstack Chromium daemon) to render PDFs.", + "Tried:", + ` - $BROWSE_BIN (${envOverride || "unset"})`, + ` - sibling: ${siblingCandidates.join(", ")}`, + ` - global: ${globalPath}`, + " - PATH: `browse`", + "", + "To fix: run gstack setup from the gstack repo:", + " cd ~/.claude/skills/gstack && ./setup", + "", + "Or set BROWSE_BIN explicitly:", + " export BROWSE_BIN=/path/to/browse", + ].join("\n"), + ); +} + +function isExecutable(p: string): boolean { + try { + fs.accessSync(p, fs.constants.X_OK); + return true; + } catch { + return false; + } +} + +/** + * Run a browse command. Returns stdout on success. + * Throws BrowseClientError on non-zero exit. + */ +function runBrowse(args: string[]): string { + const bin = resolveBrowseBin(); + try { + return execFileSync(bin, args, { + encoding: "utf8", + maxBuffer: 16 * 1024 * 1024, // 16MB; tab content can be large + stdio: ["ignore", "pipe", "pipe"], + }); + } catch (err: any) { + const exitCode = typeof err.status === "number" ? err.status : 1; + const stderr = typeof err.stderr === "string" + ? err.stderr + : (err.stderr?.toString() ?? ""); + throw new BrowseClientError(exitCode, args[0] || "unknown", stderr); + } +} + +/** + * Write a payload to a tmp file and return the path. Used for any payload + * >4KB to avoid Windows argv limits (Codex round 2 #3). + */ +function writePayloadFile(payload: Record): string { + const hash = crypto.createHash("sha256") + .update(JSON.stringify(payload)) + .digest("hex") + .slice(0, 12); + const tmpPath = path.join(os.tmpdir(), `make-pdf-browse-${process.pid}-${hash}.json`); + fs.writeFileSync(tmpPath, JSON.stringify(payload), "utf8"); + return tmpPath; +} + +function cleanupPayloadFile(p: string): void { + try { fs.unlinkSync(p); } catch { /* best-effort */ } +} + +// ─── Public API ───────────────────────────────────────────────── + +/** + * Open a new tab. Returns the tabId. + * Requires `$B newtab --json` to be available (added in the browse flag + * extension for this feature). If --json isn't supported yet, the fallback + * parses "Opened tab N" from stdout. + */ +export function newtab(url?: string): number { + const args = ["newtab"]; + if (url) args.push(url); + // Try --json first (preferred path for programmatic use) + try { + const out = runBrowse([...args, "--json"]); + const parsed = JSON.parse(out); + if (typeof parsed.tabId === "number") return parsed.tabId; + } catch { + // Fall back to stdout-string parsing. Brittle, but works on older browse builds. + } + const out = runBrowse(args); + const m = out.match(/tab\s+(\d+)/i); + if (!m) throw new BrowseClientError(1, "newtab", `could not parse tab id from: ${out}`); + return parseInt(m[1], 10); +} + +/** + * Close a tab (by id or the active tab). + */ +export function closetab(tabId?: number): void { + const args = ["closetab"]; + if (tabId !== undefined) args.push(String(tabId)); + runBrowse(args); +} + +/** + * Load raw HTML into a specific tab. + * Uses --from-file for any payload >4KB (Codex round 2 #3). + */ +export function loadHtml(opts: LoadHtmlOptions): void { + // Always use --from-file to dodge argv limits. The HTML is almost always >4KB. + const payload = { + html: opts.html, + waitUntil: opts.waitUntil ?? "domcontentloaded", + }; + const payloadFile = writePayloadFile(payload); + try { + runBrowse([ + "load-html", + "--from-file", payloadFile, + "--tab-id", String(opts.tabId), + ]); + } finally { + cleanupPayloadFile(payloadFile); + } +} + +/** + * Evaluate a JS expression in a tab. Returns the serialized result as string. + */ +export function js(opts: JsOptions): string { + return runBrowse([ + "js", + opts.expression, + "--tab-id", String(opts.tabId), + ]).trim(); +} + +/** + * Poll a boolean JS expression until it evaluates to true, or timeout. + * Returns true if it succeeded, false if timed out. + */ +export function waitForExpression(opts: { + expression: string; + tabId: number; + timeoutMs: number; + pollIntervalMs?: number; +}): boolean { + const poll = opts.pollIntervalMs ?? 200; + const deadline = Date.now() + opts.timeoutMs; + while (Date.now() < deadline) { + try { + const result = js({ expression: opts.expression, tabId: opts.tabId }); + if (result === "true") return true; + } catch { + // Tab may still be loading; keep polling + } + const wait = Math.min(poll, Math.max(0, deadline - Date.now())); + if (wait <= 0) break; + // Synchronous sleep is fine — this only runs once per PDF render + const end = Date.now() + wait; + while (Date.now() < end) { /* busy wait */ } + } + return false; +} + +/** + * Generate a PDF from the given tab. Uses --from-file when header/footer + * templates are present (they can be HTML strings of arbitrary size). + */ +export function pdf(opts: PdfOptions): void { + // If any large payload is present, send via --from-file + const hasLargePayload = + (opts.headerTemplate && opts.headerTemplate.length > 1024) || + (opts.footerTemplate && opts.footerTemplate.length > 1024); + + if (hasLargePayload) { + const payloadFile = writePayloadFile({ + output: opts.output, + tabId: opts.tabId, + ...optionsToPdfFlags(opts), + }); + try { + runBrowse(["pdf", "--from-file", payloadFile]); + } finally { + cleanupPayloadFile(payloadFile); + } + return; + } + + // Small payload: pass flags via argv + const args = ["pdf", opts.output, "--tab-id", String(opts.tabId)]; + pushFlagsFromOptions(args, opts); + runBrowse(args); +} + +function optionsToPdfFlags(opts: PdfOptions): Record { + // Shape mirrors what the browse `pdf` case expects when reading --from-file + const out: Record = {}; + if (opts.format) out.format = opts.format; + if (opts.width) out.width = opts.width; + if (opts.height) out.height = opts.height; + if (opts.marginTop) out.marginTop = opts.marginTop; + if (opts.marginRight) out.marginRight = opts.marginRight; + if (opts.marginBottom) out.marginBottom = opts.marginBottom; + if (opts.marginLeft) out.marginLeft = opts.marginLeft; + if (opts.headerTemplate !== undefined) out.headerTemplate = opts.headerTemplate; + if (opts.footerTemplate !== undefined) out.footerTemplate = opts.footerTemplate; + if (opts.pageNumbers !== undefined) out.pageNumbers = opts.pageNumbers; + if (opts.tagged !== undefined) out.tagged = opts.tagged; + if (opts.outline !== undefined) out.outline = opts.outline; + if (opts.printBackground !== undefined) out.printBackground = opts.printBackground; + if (opts.preferCSSPageSize !== undefined) out.preferCSSPageSize = opts.preferCSSPageSize; + if (opts.toc !== undefined) out.toc = opts.toc; + return out; +} + +function pushFlagsFromOptions(args: string[], opts: PdfOptions): void { + if (opts.format) { args.push("--format", opts.format); } + if (opts.width) { args.push("--width", opts.width); } + if (opts.height) { args.push("--height", opts.height); } + if (opts.marginTop) { args.push("--margin-top", opts.marginTop); } + if (opts.marginRight) { args.push("--margin-right", opts.marginRight); } + if (opts.marginBottom) { args.push("--margin-bottom", opts.marginBottom); } + if (opts.marginLeft) { args.push("--margin-left", opts.marginLeft); } + if (opts.headerTemplate !== undefined) { + args.push("--header-template", opts.headerTemplate); + } + if (opts.footerTemplate !== undefined) { + args.push("--footer-template", opts.footerTemplate); + } + if (opts.pageNumbers === true) args.push("--page-numbers"); + if (opts.tagged === true) args.push("--tagged"); + if (opts.outline === true) args.push("--outline"); + if (opts.printBackground === true) args.push("--print-background"); + if (opts.preferCSSPageSize === true) args.push("--prefer-css-page-size"); + if (opts.toc === true) args.push("--toc"); +} diff --git a/make-pdf/src/cli.ts b/make-pdf/src/cli.ts new file mode 100644 index 00000000..62a3b948 --- /dev/null +++ b/make-pdf/src/cli.ts @@ -0,0 +1,256 @@ +#!/usr/bin/env bun +/** + * make-pdf CLI — argv parse, dispatch, exit. + * + * Output contract (per CEO plan DX spec): + * stdout: ONLY the output path on success. One line. Nothing else. + * stderr: progress spinner per stage, final "Done in Xs. N pages." + * --quiet: suppress progress. Errors still print. + * --verbose: per-stage timings. + * exit 0 success / 1 bad args / 2 render error / 3 Paged.js timeout / 4 browse unavailable. + */ + +import { COMMANDS } from "./commands"; +import { ExitCode, BrowseClientError } from "./types"; +import type { GenerateOptions, PreviewOptions } from "./types"; + +interface ParsedArgs { + command: string; + positional: string[]; + flags: Record; +} + +function parseArgs(argv: string[]): ParsedArgs { + const args = argv.slice(2); + if (args.length === 0) { + printUsage(); + process.exit(ExitCode.Success); + } + + // First non-flag arg is the command. + let command = ""; + const positional: string[] = []; + const flags: Record = {}; + + for (let i = 0; i < args.length; i++) { + const a = args[i]; + if (a.startsWith("--")) { + const key = a.slice(2); + const next = args[i + 1]; + if (next !== undefined && !next.startsWith("--")) { + flags[key] = next; + i++; + } else { + flags[key] = true; + } + } else if (!command) { + command = a; + } else { + positional.push(a); + } + } + + return { command, positional, flags }; +} + +function printUsage(): void { + const lines = [ + "make-pdf — turn markdown into publication-quality PDFs", + "", + "Usage:", + ]; + for (const [name, info] of COMMANDS) { + lines.push(` $P ${info.usage}`); + lines.push(` ${info.description}`); + } + lines.push(""); + lines.push("Page layout:"); + lines.push(" --margins All four margins (default: 1in). in, pt, cm, mm."); + lines.push(" --page-size letter|a4|legal (aliases: --format)"); + lines.push(""); + lines.push("Document structure:"); + lines.push(" --cover Add a cover page."); + lines.push(" --toc Generate clickable table of contents."); + lines.push(" --no-chapter-breaks Don't start a new page at every H1."); + lines.push(""); + lines.push("Branding:"); + lines.push(" --watermark Diagonal watermark on every page."); + lines.push(" --header-template "); + lines.push(" --footer-template Mutex with --page-numbers."); + lines.push(" --no-confidential Suppress the CONFIDENTIAL footer."); + lines.push(""); + lines.push("Output control:"); + lines.push(" --page-numbers / --no-page-numbers (default: on)"); + lines.push(" --tagged / --no-tagged (default: on, accessible PDF)"); + lines.push(" --outline / --no-outline (default: on, PDF bookmarks)"); + lines.push(" --quiet Suppress progress on stderr."); + lines.push(" --verbose Per-stage timings on stderr."); + lines.push(""); + lines.push("Network:"); + lines.push(" --allow-network Load external images (off by default)."); + lines.push(""); + lines.push("Examples:"); + lines.push(" $P generate letter.md"); + lines.push(" $P generate --cover --toc essay.md essay.pdf"); + lines.push(" $P generate --watermark DRAFT memo.md draft.pdf"); + lines.push(" $P preview letter.md"); + lines.push(""); + lines.push("Run `$P setup` to verify browse + Chromium + pdftotext install."); + console.error(lines.join("\n")); +} + +function generateOptionsFromFlags(parsed: ParsedArgs): GenerateOptions { + const p = parsed.positional; + if (p.length === 0) { + console.error("$P generate: missing "); + console.error("Usage: $P generate [output.pdf] [options]"); + process.exit(ExitCode.BadArgs); + } + const f = parsed.flags; + const booleanFlag = (key: string, def: boolean): boolean => { + if (f[key] === true) return true; + if (f[`no-${key}`] === true) return false; + return def; + }; + return { + input: p[0], + output: p[1], + margins: f.margins as string | undefined, + marginTop: f["margin-top"] as string | undefined, + marginRight: f["margin-right"] as string | undefined, + marginBottom: f["margin-bottom"] as string | undefined, + marginLeft: f["margin-left"] as string | undefined, + pageSize: ((f["page-size"] ?? f.format) as any), + cover: f.cover === true, + toc: f.toc === true, + noChapterBreaks: f["no-chapter-breaks"] === true, + watermark: typeof f.watermark === "string" ? f.watermark : undefined, + headerTemplate: typeof f["header-template"] === "string" + ? f["header-template"] : undefined, + footerTemplate: typeof f["footer-template"] === "string" + ? f["footer-template"] : undefined, + confidential: booleanFlag("confidential", true), + pageNumbers: booleanFlag("page-numbers", true), + tagged: booleanFlag("tagged", true), + outline: booleanFlag("outline", true), + quiet: f.quiet === true, + verbose: f.verbose === true, + allowNetwork: f["allow-network"] === true, + title: typeof f.title === "string" ? f.title : undefined, + author: typeof f.author === "string" ? f.author : undefined, + date: typeof f.date === "string" ? f.date : undefined, + }; +} + +function previewOptionsFromFlags(parsed: ParsedArgs): PreviewOptions { + const p = parsed.positional; + if (p.length === 0) { + console.error("$P preview: missing "); + console.error("Usage: $P preview [options]"); + process.exit(ExitCode.BadArgs); + } + const f = parsed.flags; + const booleanFlag = (key: string, def: boolean): boolean => { + if (f[key] === true) return true; + if (f[`no-${key}`] === true) return false; + return def; + }; + return { + input: p[0], + cover: f.cover === true, + toc: f.toc === true, + watermark: typeof f.watermark === "string" ? f.watermark : undefined, + noChapterBreaks: f["no-chapter-breaks"] === true, + confidential: booleanFlag("confidential", true), + allowNetwork: f["allow-network"] === true, + title: typeof f.title === "string" ? f.title : undefined, + author: typeof f.author === "string" ? f.author : undefined, + date: typeof f.date === "string" ? f.date : undefined, + quiet: f.quiet === true, + verbose: f.verbose === true, + }; +} + +async function main(): Promise { + const parsed = parseArgs(process.argv); + + if (!parsed.command) { + printUsage(); + process.exit(ExitCode.BadArgs); + } + + if (!COMMANDS.has(parsed.command)) { + console.error(`$P: unknown command: ${parsed.command}`); + console.error(""); + printUsage(); + process.exit(ExitCode.BadArgs); + } + + try { + switch (parsed.command) { + case "version": { + // Read from VERSION file or fall back to a hard-coded default. + try { + const fs = await import("node:fs"); + const path = await import("node:path"); + const versionFile = path.resolve( + path.dirname(process.argv[1] || ""), + "../../VERSION", + ); + const version = fs.readFileSync(versionFile, "utf8").trim(); + console.log(version); + } catch { + console.log("make-pdf (version unknown)"); + } + process.exit(ExitCode.Success); + } + + case "setup": { + const { runSetup } = await import("./setup"); + await runSetup(); + process.exit(ExitCode.Success); + } + + case "generate": { + const opts = generateOptionsFromFlags(parsed); + const { generate } = await import("./orchestrator"); + const outputPath = await generate(opts); + // Contract: stdout = output path only + console.log(outputPath); + process.exit(ExitCode.Success); + } + + case "preview": { + const opts = previewOptionsFromFlags(parsed); + const { preview } = await import("./orchestrator"); + const htmlPath = await preview(opts); + console.log(htmlPath); + process.exit(ExitCode.Success); + } + + default: + // Unreachable: COMMANDS.has guarded above + process.exit(ExitCode.BadArgs); + } + } catch (err: any) { + if (err instanceof BrowseClientError) { + console.error(`$P: ${err.message}`); + process.exit(ExitCode.BrowseUnavailable); + } + if (err?.code === "ENOENT") { + console.error(`$P: file not found: ${err.path ?? err.message}`); + process.exit(ExitCode.BadArgs); + } + if (err?.name === "PagedJsTimeout") { + console.error(`$P: ${err.message}`); + process.exit(ExitCode.PagedJsTimeout); + } + console.error(`$P: ${err?.message ?? String(err)}`); + if (parsed.flags.verbose && err?.stack) { + console.error(err.stack); + } + process.exit(ExitCode.RenderError); + } +} + +main(); diff --git a/make-pdf/src/commands.ts b/make-pdf/src/commands.ts new file mode 100644 index 00000000..a5e781d1 --- /dev/null +++ b/make-pdf/src/commands.ts @@ -0,0 +1,62 @@ +/** + * Command registry for make-pdf — single source of truth. + * + * Dependency graph: + * commands.ts ──▶ cli.ts (runtime dispatch) + * ──▶ gen-skill-docs.ts (generates usage table in SKILL.md) + * ──▶ tests (validation) + * + * Zero side effects. Safe to import from build scripts. + */ + +export const COMMANDS = new Map([ + ["generate", { + description: "Render a markdown file to a publication-quality PDF", + usage: "generate [output.pdf] [options]", + category: "Primary", + flags: [ + // Page layout + "--margins", "--margin-top", "--margin-right", "--margin-bottom", "--margin-left", + "--page-size", "--format", + // Structure + "--cover", "--toc", "--no-chapter-breaks", + // Branding + "--watermark", "--header-template", "--footer-template", "--no-confidential", + // Output + "--page-numbers", "--no-page-numbers", "--tagged", "--no-tagged", + "--outline", "--no-outline", "--quiet", "--verbose", + // Network + "--allow-network", + // Metadata + "--title", "--author", "--date", + ], + }], + ["preview", { + description: "Render markdown to HTML and open it in the browser (fast iteration)", + usage: "preview [options]", + category: "Primary", + flags: [ + "--cover", "--toc", "--no-chapter-breaks", "--watermark", + "--no-confidential", "--allow-network", + "--title", "--author", "--date", + "--quiet", "--verbose", + ], + }], + ["setup", { + description: "Verify browse + Chromium + pdftotext, then run a smoke test", + usage: "setup", + category: "Setup", + flags: [], + }], + ["version", { + description: "Print make-pdf version", + usage: "version", + category: "Setup", + flags: [], + }], +]); diff --git a/make-pdf/src/orchestrator.ts b/make-pdf/src/orchestrator.ts new file mode 100644 index 00000000..31710ecf --- /dev/null +++ b/make-pdf/src/orchestrator.ts @@ -0,0 +1,228 @@ +/** + * Orchestrator — ties render, browseClient, and filesystem together. + * + * generate(opts): markdown → PDF on disk. Returns output path. + * preview(opts): markdown → HTML, opens it in a browser. + * + * Progress indication (per DX spec): + * - stdout: ONLY the output path, printed by cli.ts after this returns. + * - stderr: spinner + per-stage status lines, unless opts.quiet. + * - --verbose: stage timings. + * + * Tab lifecycle: every generate opens a dedicated tab via $B newtab --json, + * runs load-html/js/pdf against --tab-id , and closes the tab in a + * try/finally. Parallel $P generate calls never race on the active tab. + */ + +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import * as crypto from "node:crypto"; +import { spawn } from "node:child_process"; + +import { render } from "./render"; +import type { GenerateOptions, PreviewOptions } from "./types"; +import { ExitCode } from "./types"; +import * as browseClient from "./browseClient"; + +class ProgressReporter { + private readonly quiet: boolean; + private readonly verbose: boolean; + private readonly stageStart = new Map(); + private readonly totalStart: number; + constructor(opts: { quiet?: boolean; verbose?: boolean }) { + this.quiet = opts.quiet === true; + this.verbose = opts.verbose === true; + this.totalStart = Date.now(); + } + begin(stage: string): void { + this.stageStart.set(stage, Date.now()); + if (this.quiet) return; + process.stderr.write(`\r\x1b[K${stage}...`); + } + end(stage: string, extra?: string): void { + const start = this.stageStart.get(stage) ?? Date.now(); + const ms = Date.now() - start; + if (this.quiet) return; + if (this.verbose) { + process.stderr.write(`\r\x1b[K${stage} (${ms}ms)${extra ? ` — ${extra}` : ""}\n`); + } + } + done(extra: string): void { + if (this.quiet) return; + const total = ((Date.now() - this.totalStart) / 1000).toFixed(1); + process.stderr.write(`\r\x1b[KDone in ${total}s. ${extra}\n`); + } + fail(stage: string, err: Error): void { + if (!this.quiet) process.stderr.write("\r\x1b[K"); + // Always emit failure info, even in quiet mode — this is an error path. + process.stderr.write(`${stage} failed: ${err.message}\n`); + } +} + +/** + * generate — full pipeline. Returns the output PDF path on success. + */ +export async function generate(opts: GenerateOptions): Promise { + const progress = new ProgressReporter(opts); + const input = path.resolve(opts.input); + + if (!fs.existsSync(input)) { + throw new Error(`input file not found: ${input}`); + } + + const outputPath = path.resolve( + opts.output ?? path.join(os.tmpdir(), `${deriveSlug(input)}.pdf`), + ); + + // Stage 1: read markdown + progress.begin("Reading markdown"); + const markdown = fs.readFileSync(input, "utf8"); + progress.end("Reading markdown"); + + // Stage 2: render HTML + progress.begin("Rendering HTML"); + const rendered = render({ + markdown, + title: opts.title, + author: opts.author, + date: opts.date, + cover: opts.cover, + toc: opts.toc, + watermark: opts.watermark, + noChapterBreaks: opts.noChapterBreaks, + confidential: opts.confidential, + pageSize: opts.pageSize, + margins: opts.margins, + }); + progress.end("Rendering HTML", `${rendered.meta.wordCount} words`); + + // Stage 3: write HTML to a tmp file browse can read + // (We don't actually write it; we pass inline via --from-file JSON.) + // But for preview mode and debugging, we still write to tmp. + const htmlTmp = tmpFile("html"); + fs.writeFileSync(htmlTmp, rendered.html, "utf8"); + + // Stage 4: spin up a dedicated tab, load HTML, (wait for Paged.js if TOC), + // then emit PDF. Always close the tab. + progress.begin("Opening tab"); + const tabId = browseClient.newtab(); + progress.end("Opening tab", `tabId=${tabId}`); + + try { + progress.begin("Loading HTML into Chromium"); + browseClient.loadHtml({ + html: rendered.html, + waitUntil: "domcontentloaded", + tabId, + }); + progress.end("Loading HTML into Chromium"); + + if (opts.toc) { + progress.begin("Paginating with Paged.js"); + // Browse's $B pdf already waits internally when --toc is passed. + // We pass toc=true to browseClient.pdf() below. + progress.end("Paginating with Paged.js", "Paged.js after"); + } + + progress.begin("Generating PDF"); + browseClient.pdf({ + output: outputPath, + tabId, + format: opts.pageSize ?? "letter", + marginTop: opts.marginTop ?? opts.margins ?? "1in", + marginRight: opts.marginRight ?? opts.margins ?? "1in", + marginBottom: opts.marginBottom ?? opts.margins ?? "1in", + marginLeft: opts.marginLeft ?? opts.margins ?? "1in", + headerTemplate: opts.headerTemplate, + footerTemplate: opts.footerTemplate, + pageNumbers: opts.pageNumbers !== false && !opts.footerTemplate, + tagged: opts.tagged !== false, + outline: opts.outline !== false, + printBackground: !!opts.watermark, + toc: opts.toc, + }); + progress.end("Generating PDF"); + + const stat = fs.statSync(outputPath); + const kb = Math.round(stat.size / 1024); + progress.done(`${rendered.meta.wordCount} words · ${kb}KB · ${outputPath}`); + } finally { + // Always clean up the tab — even on crash, timeout, or Chromium hang. + try { + browseClient.closetab(tabId); + } catch { + // best-effort; we already exited the main path + } + // Cleanup tmp HTML + try { fs.unlinkSync(htmlTmp); } catch { /* best-effort */ } + } + + return outputPath; +} + +/** + * preview — render HTML and open it. No PDF round trip. + */ +export async function preview(opts: PreviewOptions): Promise { + const progress = new ProgressReporter(opts); + const input = path.resolve(opts.input); + if (!fs.existsSync(input)) { + throw new Error(`input file not found: ${input}`); + } + + progress.begin("Rendering HTML"); + const markdown = fs.readFileSync(input, "utf8"); + const rendered = render({ + markdown, + title: opts.title, + author: opts.author, + date: opts.date, + cover: opts.cover, + toc: opts.toc, + watermark: opts.watermark, + noChapterBreaks: opts.noChapterBreaks, + confidential: opts.confidential, + }); + progress.end("Rendering HTML", `${rendered.meta.wordCount} words`); + + // Write to a stable path under /tmp so the user can reload in the same tab. + const previewPath = path.join(os.tmpdir(), `make-pdf-preview-${deriveSlug(input)}.html`); + fs.writeFileSync(previewPath, rendered.html, "utf8"); + + progress.begin("Opening preview"); + tryOpen(previewPath); + progress.end("Opening preview"); + + progress.done(`Preview at ${previewPath}`); + return previewPath; +} + +// ─── helpers ────────────────────────────────────────────── + +function deriveSlug(p: string): string { + const base = path.basename(p).replace(/\.[^.]+$/, ""); + return base.replace(/[^a-zA-Z0-9-_]+/g, "-").slice(0, 64) || "document"; +} + +function tmpFile(ext: string): string { + const hash = crypto.randomBytes(6).toString("hex"); + return path.join(os.tmpdir(), `make-pdf-${process.pid}-${hash}.${ext}`); +} + +function tryOpen(pathOrUrl: string): void { + const platform = process.platform; + const cmd = platform === "darwin" ? "open" : + platform === "win32" ? "cmd" : + "xdg-open"; + const args = platform === "win32" ? ["/c", "start", "", pathOrUrl] : [pathOrUrl]; + try { + const child = spawn(cmd, args, { detached: true, stdio: "ignore" }); + child.unref(); + } catch { + // Non-fatal; the caller already has the path and will print it. + } +} + +/** Setup-only re-export so cli.ts can dynamic-import without another file. */ +export { ExitCode }; diff --git a/make-pdf/src/pdftotext.ts b/make-pdf/src/pdftotext.ts new file mode 100644 index 00000000..33e79fc6 --- /dev/null +++ b/make-pdf/src/pdftotext.ts @@ -0,0 +1,254 @@ +/** + * pdftotext wrapper — the tool behind the copy-paste CI gate. + * + * Codex round 2 surfaced two real problems we address here: + * + * #18: pdftotext (Poppler) vs pdftotext (Xpdf) vs pdftotext-next vary on + * whitespace, line wrap, Unicode normalization, form feeds, and + * extraction order. Cross-platform exact diffing is a non-starter. + * We normalize aggressively and diff the normalized form. + * + * #19: the regex /(?:\b\w\s){4,}/ only catches one failure shape (letters + * spaced out). It misses word-order corruption, missing whitespace + * between paragraphs, and homoglyph substitution. We add a word-token + * diff and a paragraph-boundary assertion on top. + * + * Resolution order for the pdftotext binary: + * 1. $PDFTOTEXT_BIN env override + * 2. `which pdftotext` on PATH + * 3. standard Homebrew paths on macOS + * 4. throws a friendly "install poppler" error + * + * The wrapper is *optional at runtime*: production renders don't need it. + * Only the CI gate and unit tests invoke pdftotext. + */ + +import { execFileSync } from "node:child_process"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +export class PdftotextUnavailableError extends Error { + constructor(message: string) { + super(message); + this.name = "PdftotextUnavailableError"; + } +} + +export interface PdftotextInfo { + bin: string; + version: string; // "pdftotext version 24.02.0" or similar + flavor: "poppler" | "xpdf" | "unknown"; +} + +/** + * Locate pdftotext. Throws PdftotextUnavailableError if none is found. + */ +export function resolvePdftotext(): PdftotextInfo { + const envOverride = process.env.PDFTOTEXT_BIN; + if (envOverride && isExecutable(envOverride)) { + return describeBinary(envOverride); + } + + // Try PATH + try { + const which = execFileSync("which", ["pdftotext"], { encoding: "utf8" }).trim(); + if (which && isExecutable(which)) return describeBinary(which); + } catch { + // fall through + } + + // Common macOS Homebrew locations + const macCandidates = [ + "/opt/homebrew/bin/pdftotext", // Apple Silicon + "/usr/local/bin/pdftotext", // Intel Mac or Linuxbrew + "/usr/bin/pdftotext", // distro package + ]; + for (const candidate of macCandidates) { + if (isExecutable(candidate)) return describeBinary(candidate); + } + + throw new PdftotextUnavailableError([ + "pdftotext not found.", + "", + "make-pdf needs pdftotext to run the copy-paste CI gate.", + "(Runtime rendering does NOT need it. This only affects tests.)", + "", + "To install:", + " macOS: brew install poppler", + " Ubuntu: sudo apt-get install poppler-utils", + " Fedora: sudo dnf install poppler-utils", + "", + "Or set PDFTOTEXT_BIN to an explicit path:", + " export PDFTOTEXT_BIN=/path/to/pdftotext", + ].join("\n")); +} + +function isExecutable(p: string): boolean { + try { + fs.accessSync(p, fs.constants.X_OK); + return true; + } catch { + return false; + } +} + +function describeBinary(bin: string): PdftotextInfo { + let version = "unknown"; + let flavor: PdftotextInfo["flavor"] = "unknown"; + try { + // pdftotext -v writes to stderr and exits 0 on poppler, 99 on some xpdf builds. + const result = execFileSync(bin, ["-v"], { + encoding: "utf8", + stdio: ["ignore", "pipe", "pipe"], + }); + version = (result || "").trim().split("\n")[0] || "unknown"; + } catch (err: any) { + // Many pdftotext builds exit non-zero on -v but still write to stderr. + const stderr = err?.stderr?.toString?.() ?? ""; + version = stderr.trim().split("\n")[0] || "unknown"; + } + const v = version.toLowerCase(); + if (v.includes("poppler")) flavor = "poppler"; + else if (v.includes("xpdf")) flavor = "xpdf"; + return { bin, version, flavor }; +} + +/** + * Run pdftotext on a PDF and return the extracted text. + * + * Uses `-layout` by default because that's what downstream normalization + * expects. Callers that need raw text can pass layout=false. + */ +export function pdftotext(pdfPath: string, opts?: { layout?: boolean }): string { + const info = resolvePdftotext(); + const layout = opts?.layout ?? true; + const args: string[] = []; + if (layout) args.push("-layout"); + args.push(pdfPath, "-"); // "-" = stdout + try { + return execFileSync(info.bin, args, { + encoding: "utf8", + maxBuffer: 32 * 1024 * 1024, + }); + } catch (err: any) { + throw new Error(`pdftotext failed on ${pdfPath}: ${err.message}`); + } +} + +/** + * Normalize extracted text for cross-platform, cross-flavor diffing. + * + * What we strip / normalize: + * - Unicode: NFC canonical composition (macOS emits NFD; Linux emits NFC; + * this dodges the fundamental encoding diff). + * - CR and CRLF → LF (Windows Xpdf emits CRLF). + * - Form feeds (\f) → double newline (Poppler emits \f at page breaks). + * - Trailing spaces on every line. + * - Runs of 3+ blank lines → 2 blank lines. + * - Leading/trailing whitespace on the whole string. + * - Non-breaking space (U+00A0) → regular space. + * - Zero-width space (U+200B) and zero-width non-joiner (U+200C) → empty. + * - Soft hyphen (U+00AD) → empty (pdftotext -layout sometimes emits these + * for hyphens: auto breaks). + */ +export function normalize(raw: string): string { + let s = raw; + s = s.normalize("NFC"); + s = s.replace(/\r\n/g, "\n"); + s = s.replace(/\r/g, "\n"); + s = s.replace(/\f/g, "\n\n"); + s = s.replace(/\u00a0/g, " "); + s = s.replace(/[\u200b\u200c\u00ad]/g, ""); + s = s.replace(/[ \t]+$/gm, ""); + s = s.replace(/\n{3,}/g, "\n\n"); + s = s.trim(); + return s; +} + +/** + * The canonical copy-paste gate used in the E2E tests. + * + * Returns { ok: true } when all three assertions pass; returns + * { ok: false, reasons: [...] } with one or more failure reasons otherwise. + */ +export interface GateResult { + ok: boolean; + reasons: string[]; + extracted: string; +} + +export function copyPasteGate(pdfPath: string, expected: string): GateResult { + const extracted = normalize(pdftotext(pdfPath, { layout: true })); + const expectedNorm = normalize(expected); + const reasons: string[] = []; + + // Assertion 1: every expected paragraph appears as a whole line or + // contiguous block in the extracted text. + const expectedParagraphs = splitParagraphs(expectedNorm); + for (const paragraph of expectedParagraphs) { + const compact = collapseWhitespace(paragraph); + const extractedCompact = collapseWhitespace(extracted); + if (!extractedCompact.includes(compact)) { + reasons.push( + `expected paragraph not found in extracted text: ${truncate(paragraph, 80)}`, + ); + } + } + + // Assertion 2: no "S a i l i n g"-style single-char runs. + // Count groups of 4+ consecutive letter-then-space tokens. False positive + // risk on things like "A B C D" (initials) — mitigate by requiring the + // letters spell a known-word substring of the expected text. + const fragRegex = /((?:\b\w\s){4,})/g; + let fragMatch: RegExpExecArray | null; + while ((fragMatch = fragRegex.exec(extracted)) !== null) { + const letters = fragMatch[1].replace(/\s/g, ""); + // Only flag if the reassembled letters appear in the expected text. + if (expectedNorm.toLowerCase().includes(letters.toLowerCase()) && letters.length >= 4) { + reasons.push( + `per-glyph emission detected (the "S ai li ng" bug): "${fragMatch[1].trim()}" reassembles to "${letters}"`, + ); + } + } + + // Assertion 3: paragraph boundaries preserved. Count double-newlines + // in both; they should differ by no more than ±2 (header/footer noise). + const expectedBreaks = (expectedNorm.match(/\n\n/g) || []).length; + const extractedBreaks = (extracted.match(/\n\n/g) || []).length; + if (Math.abs(expectedBreaks - extractedBreaks) > 4) { + reasons.push( + `paragraph boundary count drift: expected ~${expectedBreaks}, got ${extractedBreaks}`, + ); + } + + return { ok: reasons.length === 0, reasons, extracted }; +} + +function splitParagraphs(s: string): string[] { + return s.split(/\n\n+/).map(p => p.trim()).filter(p => p.length > 0); +} + +function collapseWhitespace(s: string): string { + return s.replace(/\s+/g, " ").trim(); +} + +function truncate(s: string, n: number): string { + return s.length > n ? s.slice(0, n) + "..." : s; +} + +/** + * Emit diagnostic info to stderr — useful for CI failure debugging. + * Call this once before running any gate in a CI log. + */ +export function logDiagnostics(): void { + try { + const info = resolvePdftotext(); + process.stderr.write( + `[pdftotext] bin=${info.bin} flavor=${info.flavor} version="${info.version}" ` + + `os=${os.platform()}-${os.arch()} node=${process.version}\n`, + ); + } catch (err: any) { + process.stderr.write(`[pdftotext] unavailable: ${err.message}\n`); + } +} diff --git a/make-pdf/src/print-css.ts b/make-pdf/src/print-css.ts new file mode 100644 index 00000000..a4b71dae --- /dev/null +++ b/make-pdf/src/print-css.ts @@ -0,0 +1,350 @@ +/** + * Print stylesheet generator. + * + * Source of truth: .context/designs/make-pdf-print-reference.html and siblings. + * Mirror those CSS rules here. The HTML references were approved via + * /plan-design-review with explicit design decisions locked in the plan: + * + * - Helvetica only (system font, no bundled webfonts — dodges the + * per-glyph Tj bug that breaks copy-paste extraction). + * - All paragraphs flush-left. No first-line indent, no justify, no + * p+p indent. text-align: left everywhere. 12pt margin-bottom. + * - Cover page has the same 1in margins as every other page. No flexbox + * center, no inset padding, no vertical centering. Distinction comes + * from eyebrow + larger title + hairline rule, not from centering. + * - `@page :first` suppresses running header/footer but does NOT override + * the 1in margin. + * - No , no external CSS/fonts — everything inlined. + * - CJK fallback: Helvetica, Arial, Hiragino Kaku Gothic ProN, Noto Sans + * CJK JP, Microsoft YaHei, sans-serif. + */ + +export interface PrintCssOptions { + // Document structure + cover?: boolean; + toc?: boolean; + noChapterBreaks?: boolean; + + // Branding + watermark?: string; + confidential?: boolean; + + // Header (running title, top of page) + runningHeader?: string; + + // Page size (in CSS `@page size:` terms) + pageSize?: "letter" | "a4" | "legal" | "tabloid"; + + // Margins (default 1in) + margins?: string; +} + +/** + * Produce a CSS block (no `, + ``, + ``, + watermarkBlock, + coverBlock, + tocBlock, + chapterHtml, + ``, + ``, + ].filter(Boolean).join("\n"); + + return { + html: fullHtml, + printCss: css, + bodyHtml: typographicHtml, + meta: { + title: derivedTitle, + author: derivedAuthor, + date: derivedDate, + wordCount: countWords(stripTags(typographicHtml)), + }, + }; +} + +/** + * Decode the HTML entities that marked emits for text-node quotes/apostrophes. + * Only the four that matter for smartypants — leaves & alone because it + * can be legitimately doubled (&amp;) and we don't want to double-decode. + */ +function decodeTypographicEntities(html: string): string { + return html + .replace(/"/g, "\"") + .replace(/'/g, "'") + .replace(/'/g, "'") + .replace(/'/g, "'"); +} + +// ─── Sanitizer ──────────────────────────────────────────────────────── + +/** + * Strip dangerous HTML from markdown-produced output. + * + * We can't use DOMPurify (server-side; adds a jsdom dep). A conservative + * regex sanitizer is fine for this use case because: + * 1. marked produces structured HTML (never malformed) + * 2. we only need to strip a fixed blacklist of elements + attrs + * 3. the output goes through Chromium's parser again, which normalizes + * + * What's stripped: + * -

world

`; + const out = sanitizeUntrustedHtml(input); + expect(out).not.toContain("hello

"); + expect(out).toContain("

world

"); + }); + + test("strips `; + expect(sanitizeUntrustedHtml(input)).not.toContain(" { + const input = `click`; + const out = sanitizeUntrustedHtml(input); + expect(out).not.toContain("onclick"); + expect(out).toContain("href=\"#\""); + }); + + test("strips event handlers with mixed case (onClick, ONCLICK)", () => { + const input1 = `a`; + const input2 = `b`; + expect(sanitizeUntrustedHtml(input1)).not.toContain("onClick"); + expect(sanitizeUntrustedHtml(input2)).not.toContain("ONCLICK"); + }); + + test("rewrites javascript: URLs in href to #", () => { + const input = `bad`; + const out = sanitizeUntrustedHtml(input); + expect(out).not.toContain("javascript:"); + expect(out).toContain('href="#"'); + }); + + test("strips inline SVG `; + const out = sanitizeUntrustedHtml(input); + expect(out).not.toContain(", , , , ,
", () => { + const input = ` + + + + + +
+ `; + const out = sanitizeUntrustedHtml(input); + expect(out).not.toContain(" { + const input = `
hi
`; + expect(sanitizeUntrustedHtml(input)).not.toContain("srcdoc"); + }); +}); + +// ─── end-to-end render ────────────────────────────────────────────── + +describe("render (end-to-end)", () => { + test("produces a full HTML document with title, body, and CSS", () => { + const result = render({ + markdown: `# Hello\n\nA paragraph with "quotes" and -- dashes.\n`, + }); + expect(result.html).toContain(""); + expect(result.html).toContain("Hello"); + expect(result.html).toContain("... + expect(result.html).toMatch(/