Browse Source

feat(html2pptx): data-pptx-merge container collapses paragraphs into one editable text frame

Closes #21.

By default each <p>/<h*> becomes its own PowerPoint text box, so multi-paragraph
cards exported from HTML produce stacks of separate text frames that are hard to
edit. Adding `data-pptx-merge="true"` to a wrapper <div> now merges all its
<p>/<h*> descendants into a single text frame, with each paragraph kept as a run
(breakLine between paragraphs).

Per-paragraph fontSize / fontFace / color / bold / italic / underline / inline
<b><i><u><span> formatting are preserved as run options. textbox-level align and
lineSpacing are taken from the first paragraph (PowerPoint constraints). The
container's own background / border / shadow / border-radius still render as a
shape, identical to a normal <div>.

Opt-in by design: HTML without the attribute keeps byte-identical behavior.
Smoke-tested with a fixture containing one merge container and one regular
container — merge produced 1 text frame x 4 paragraphs, regular produced 3
independent text frames as before.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
alchain 1 tháng trước cách đây
mục cha
commit
c4efae9c7b
2 tập tin đã thay đổi với 234 bổ sung2 xóa
  1. 33 0
      references/editable-pptx.md
  2. 201 2
      scripts/html2pptx.js

+ 33 - 0
references/editable-pptx.md

@@ -104,6 +104,39 @@ background: #FF6B6B;
 
 ---
 
+## 合并文本框(`data-pptx-merge`)
+
+**默认行为**:HTML 里每个 `<p>`/`<h1>`-`<h6>` 在 PPTX 里都是**独立文本框**。卡片里写 3 个 `<p>` → PPT 里 3 个文本框摞着,编辑时不能整段回车换行加段,得逐个改字号/对齐。
+
+**解决方法**:给外层 div 加 `data-pptx-merge="true"`,容器内的所有 `<p>/<h*>` 会合并为**一个可编辑文本框**,每段之间用段落分隔符隔开,PPT 里就是一段一段连续编辑。
+
+```html
+<!-- ✅ 合并写法:4 段全部在一个文本框里 -->
+<div class="card" data-pptx-merge="true"
+     style="position: absolute; top: 60pt; left: 60pt; width: 420pt;
+            background: #1A4A8A; border-radius: 8pt; padding: 20pt 24pt;">
+  <h2 style="font-size: 24pt; color: #FFFFFF;">标题</h2>
+  <p  style="font-size: 14pt; color: #DDEEFF;">第一段正文。</p>
+  <p  style="font-size: 14pt; color: #FFD166;">第二段:换颜色作为强调。</p>
+  <p  style="font-size: 14pt; color: #DDEEFF;">第三段:同一个文本框里继续写。</p>
+</div>
+```
+
+**保留的样式**(per-paragraph 作为 run options 写入):`font-size`、`color`、`font-family`、`font-weight`(bold)、`font-style`(italic)、`text-decoration: underline`、`<b>/<i>/<u>/<strong>/<em>/<span>` 内联样式。
+
+**取自第一段、整框统一**:`text-align`、`line-height`。因为 PowerPoint 的对齐和行距是 paragraph/textbox 级别——一框里只能有一种对齐。如果几段对齐不同,请别用 merge,让它们各自独立。
+
+**容器自身的 `background`/`border`/`box-shadow`/`border-radius`** 照常作为 shape 渲染,行为和普通 div 完全一样——也就是说蓝色卡片底 + 文本仍然是「shape + text frame」两层,只是文本层从 3-4 个文本框塌缩成 1 个。
+
+**限制**:
+- 不能嵌套 `data-pptx-merge`(会报错)。
+- 容器不能用 `background-image`(同 4 条硬约束规则 4)。
+- 容器内不要再放有 `background`/`border` 的子 div——它们仍会被当作独立 shape 渲染,但里面的文字已被合并走了,可能产生视觉错位。
+
+**什么时候用**:内容会反复改、要在 PPT 里继续编辑的场景。一次性导出归档的不用加,行为一致。
+
+---
+
 ## Path A HTML 模板骨架
 
 每张 slide 一个独立 HTML 文件,彼此作用域隔离(避开单文件 deck 的 CSS 污染)。

+ 201 - 2
scripts/html2pptx.js

@@ -91,8 +91,8 @@ function validateTextBoxPosition(slideData, bodyDimensions) {
   const minBottomMargin = 0.5; // 0.5 inches from bottom
 
   for (const el of slideData.elements) {
-    // Check text elements (p, h1-h6, list)
-    if (['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'list'].includes(el.type)) {
+    // Check text elements (p, h1-h6, list, merged-text)
+    if (['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'list', 'merged-text'].includes(el.type)) {
       const fontSize = el.style?.fontSize || 0;
       const bottomEdge = el.position.y + el.position.h;
       const distanceFromBottom = slideHeightInches - bottomEdge;
@@ -185,6 +185,26 @@ function addElements(slideData, targetSlide, pres) {
       };
       if (el.style.margin) listOptions.margin = el.style.margin;
       targetSlide.addText(el.items, listOptions);
+    } else if (el.type === 'merged-text') {
+      // data-pptx-merge container — all paragraphs in one editable text frame.
+      const mergedOptions = {
+        x: el.position.x,
+        y: el.position.y,
+        w: el.position.w,
+        h: el.position.h,
+        fontSize: el.style.fontSize,
+        fontFace: el.style.fontFace,
+        color: el.style.color,
+        align: el.style.align,
+        valign: 'top',
+        lineSpacing: el.style.lineSpacing,
+        paraSpaceBefore: el.style.paraSpaceBefore,
+        paraSpaceAfter: el.style.paraSpaceAfter,
+        margin: el.style.margin,
+        inset: 0
+      };
+      if (el.style.transparency != null) mergedOptions.transparency = el.style.transparency;
+      targetSlide.addText(el.items, mergedOptions);
     } else {
       // Check if text is single-line (height suggests one line)
       const lineHeight = el.style.lineSpacing || el.style.fontSize * 1.2;
@@ -534,6 +554,185 @@ async function extractSlideData(page) {
     document.querySelectorAll('*').forEach((el) => {
       if (processed.has(el)) return;
 
+      // [data-pptx-merge="true"] — opt-in: merge all <p>/<h1>-<h6> descendants
+      // into ONE PowerPoint text frame (single editable text box).
+      // Each child paragraph becomes a run with breakLine:true at the end;
+      // per-paragraph fontSize/color/bold/italic/underline are preserved as run options.
+      // The container's bg/border (if any) still becomes its own shape, same as a normal div.
+      if (el.tagName === 'DIV' && el.dataset && el.dataset.pptxMerge === 'true') {
+        const containerRect = el.getBoundingClientRect();
+        if (containerRect.width === 0 || containerRect.height === 0) {
+          processed.add(el);
+          return;
+        }
+
+        // Reject nested merge containers — undefined behavior.
+        if (el.querySelector('[data-pptx-merge="true"]')) {
+          errors.push(
+            `data-pptx-merge container cannot contain another data-pptx-merge container. ` +
+            'Nested merge is not supported.'
+          );
+          processed.add(el);
+          return;
+        }
+
+        const mergeComputed = window.getComputedStyle(el);
+
+        // Container background image — same restriction as regular divs.
+        if (mergeComputed.backgroundImage && mergeComputed.backgroundImage !== 'none') {
+          errors.push(
+            'Background images on data-pptx-merge container are not supported. ' +
+            'Use solid colors or borders, or layer images via slide.addImage().'
+          );
+          return;
+        }
+
+        // Emit a shape for the container's bg/uniform-border (mirrors the regular div branch).
+        const mHasBg = mergeComputed.backgroundColor && mergeComputed.backgroundColor !== 'rgba(0, 0, 0, 0)';
+        const mBorders = [
+          mergeComputed.borderTopWidth,
+          mergeComputed.borderRightWidth,
+          mergeComputed.borderBottomWidth,
+          mergeComputed.borderLeftWidth
+        ].map(b => parseFloat(b) || 0);
+        const mHasBorder = mBorders.some(b => b > 0);
+        const mHasUniformBorder = mHasBorder && mBorders.every(b => b === mBorders[0]);
+
+        if (mHasBg || mHasUniformBorder) {
+          elements.push({
+            type: 'shape',
+            text: '',
+            position: {
+              x: pxToInch(containerRect.left),
+              y: pxToInch(containerRect.top),
+              w: pxToInch(containerRect.width),
+              h: pxToInch(containerRect.height)
+            },
+            shape: {
+              fill: mHasBg ? rgbToHex(mergeComputed.backgroundColor) : null,
+              transparency: mHasBg ? extractAlpha(mergeComputed.backgroundColor) : null,
+              line: mHasUniformBorder ? {
+                color: rgbToHex(mergeComputed.borderColor),
+                width: pxToPoints(mergeComputed.borderWidth)
+              } : null,
+              rectRadius: (() => {
+                const radius = mergeComputed.borderRadius;
+                const radiusValue = parseFloat(radius);
+                if (radiusValue === 0) return 0;
+                if (radius.includes('%')) {
+                  if (radiusValue >= 50) return 1;
+                  const minDim = Math.min(containerRect.width, containerRect.height);
+                  return (radiusValue / 100) * pxToInch(minDim);
+                }
+                if (radius.includes('pt')) return radiusValue / 72;
+                return radiusValue / PX_PER_IN;
+              })(),
+              shadow: parseBoxShadow(mergeComputed.boxShadow)
+            }
+          });
+        }
+
+        // Collect <p>/<h*> descendants in document order.
+        const textDescendants = Array.from(el.querySelectorAll('p, h1, h2, h3, h4, h5, h6'));
+        if (textDescendants.length === 0) {
+          errors.push(
+            `data-pptx-merge container has no <p>/<h*> children to merge. ` +
+            'Remove the data-pptx-merge attribute or add text elements.'
+          );
+          processed.add(el);
+          return;
+        }
+
+        // Use the first text element's computed style as the textbox-level base
+        // (align / lineSpacing / paraSpace are paragraph/textbox-level in pptxgenjs, not per-run).
+        const firstComputed = window.getComputedStyle(textDescendants[0]);
+        const baseStyle = {
+          fontSize: pxToPoints(firstComputed.fontSize),
+          fontFace: firstComputed.fontFamily.split(',')[0].replace(/['"]/g, '').trim(),
+          color: rgbToHex(firstComputed.color),
+          align: firstComputed.textAlign === 'start' ? 'left' : firstComputed.textAlign,
+          lineSpacing: firstComputed.lineHeight && firstComputed.lineHeight !== 'normal'
+            ? pxToPoints(firstComputed.lineHeight)
+            : null,
+          paraSpaceBefore: 0,
+          paraSpaceAfter: 0,
+          // Container padding becomes the textbox internal margin (PptxGenJS: [left, right, bottom, top]).
+          margin: [
+            pxToPoints(mergeComputed.paddingLeft),
+            pxToPoints(mergeComputed.paddingRight),
+            pxToPoints(mergeComputed.paddingBottom),
+            pxToPoints(mergeComputed.paddingTop)
+          ]
+        };
+        const baseTransparency = extractAlpha(firstComputed.color);
+        if (baseTransparency !== null) baseStyle.transparency = baseTransparency;
+
+        // Build the merged runs.
+        const mergedRuns = [];
+        textDescendants.forEach((textEl, idx) => {
+          const isLast = idx === textDescendants.length - 1;
+          const tComputed = window.getComputedStyle(textEl);
+          const transformStr = tComputed.textTransform;
+
+          // Per-paragraph style overrides — only include if they differ from base.
+          const elemFontSize = pxToPoints(tComputed.fontSize);
+          const elemFontFace = tComputed.fontFamily.split(',')[0].replace(/['"]/g, '').trim();
+          const elemColor = rgbToHex(tComputed.color);
+          const elemBold = tComputed.fontWeight === 'bold' || parseInt(tComputed.fontWeight) >= 600;
+          const elemItalic = tComputed.fontStyle === 'italic';
+          const elemUnderline = tComputed.textDecoration.includes('underline');
+
+          const runBaseOptions = {};
+          if (elemFontSize !== baseStyle.fontSize) runBaseOptions.fontSize = elemFontSize;
+          if (elemFontFace !== baseStyle.fontFace) runBaseOptions.fontFace = elemFontFace;
+          if (elemColor !== baseStyle.color) runBaseOptions.color = elemColor;
+          if (elemBold && !shouldSkipBold(tComputed.fontFamily)) runBaseOptions.bold = true;
+          if (elemItalic) runBaseOptions.italic = true;
+          if (elemUnderline) runBaseOptions.underline = true;
+
+          const hasInline = textEl.querySelector('b, i, u, strong, em, span, br');
+          let runs;
+          if (hasInline) {
+            runs = parseInlineFormatting(
+              textEl,
+              runBaseOptions,
+              [],
+              (str) => applyTextTransform(str, transformStr)
+            );
+          } else {
+            const txt = applyTextTransform(textEl.textContent.trim(), transformStr);
+            if (!txt) return;
+            runs = [{ text: txt, options: { ...runBaseOptions } }];
+          }
+
+          if (runs.length > 0 && !isLast) {
+            runs[runs.length - 1].options.breakLine = true;
+          }
+          mergedRuns.push(...runs);
+          processed.add(textEl);
+        });
+
+        if (mergedRuns.length === 0) {
+          processed.add(el);
+          return;
+        }
+
+        elements.push({
+          type: 'merged-text',
+          items: mergedRuns,
+          position: {
+            x: pxToInch(containerRect.left),
+            y: pxToInch(containerRect.top),
+            w: pxToInch(containerRect.width),
+            h: pxToInch(containerRect.height)
+          },
+          style: baseStyle
+        });
+
+        processed.add(el);
+        return;
+      }
+
       // Validate text elements don't have backgrounds, borders, or shadows
       if (textTags.includes(el.tagName)) {
         const computed = window.getComputedStyle(el);