2 ay önce · 43ec5b9ad4
--- a/SKILL.md
+++ b/SKILL.md
@@ -253,18 +253,18 @@ spawn subagent时，用以下结构给任务（以Agent 1著作为例）：
 
				 #### 工具辅助（如可用）
			
 
				 - 书籍：Z-Library/LibGen搜索下载 → 存入 `sources/books/`
			
 
				 - 视频字幕获取（已提供脚本，直接调用）：
			
 
				-  - **Step 1 下载字幕**：`bash [skill目录]/references/download_subtitles.sh <YouTube_URL> [输出目录]`
			
 
				+  - **Step 1 下载字幕**：`bash [skill目录]/scripts/download_subtitles.sh <YouTube_URL> [输出目录]`
			
 
				     - 自动优先人工字幕 → 中文 → 英文 → 自动生成字幕
			
 
				     - 输出SRT/VTT文件到指定目录
			
 
				-  - **Step 2 清洗为纯文本**：`python3 [skill目录]/references/srt_to_transcript.py <input.srt> [output.txt]`
			
 
				+  - **Step 2 清洗为纯文本**：`python3 [skill目录]/scripts/srt_to_transcript.py <input.srt> [output.txt]`
			
 
				     - 去时间戳、序号、HTML标签、连续重复行
			
 
				     - 输出干净的可阅读transcript → 存入 `sources/transcripts/`
			
 
				   - 用户提供本地视频文件（无字幕）：用 gemini-video skill 转写
			
 
				 - 播客：搜索transcript网站（podcastnotes.org等）
			
 
				-- 调研摘要生成（Phase 1.5用）：`python3 [skill目录]/references/merge_research.py <skill目录>`
			
 
				+- 调研摘要生成（Phase 1.5用）：`python3 [skill目录]/scripts/merge_research.py <skill目录>`
			
 
				   - 自动扫描 `references/research/01-06.md`，统计来源数、一手/二手占比、关键发现
			
 
				   - 输出Phase 1.5检查点的markdown表格，无需手动统计
			
 
				-- 质量自检（Phase 4用）：`python3 [skill目录]/references/quality_check.py <SKILL.md路径>`
			
 
				+- 质量自检（Phase 4用）：`python3 [skill目录]/scripts/quality_check.py <SKILL.md路径>`
			
 
				   - 自动检查6项通过标准：心智模型数量、局限性、表达DNA、诚实边界、内在张力、一手来源占比
			
 
				   - 输出逐项PASS/FAIL和总结
			
 
				 
			
--- a/scripts/download_subtitles.sh
+++ b/scripts/download_subtitles.sh
@@ -0,0 +1,55 @@
 
				+#!/bin/bash
			
 
				+# 从YouTube视频下载字幕
			
 
				+# 用法: ./download_subtitles.sh <YouTube_URL> [输出目录]
			
 
				+# 优先下载人工字幕，无人工字幕则下载自动生成字幕
			
 
				+# 语言优先级：中文 > 英文 > 其他
			
 
				+
			
 
				+set -e
			
 
				+
			
 
				+URL="$1"
			
 
				+OUTPUT_DIR="${2:-.}"
			
 
				+
			
 
				+if [ -z "$URL" ]; then
			
 
				+    echo "用法: ./download_subtitles.sh <YouTube_URL> [输出目录]"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+mkdir -p "$OUTPUT_DIR"
			
 
				+
			
 
				+echo ">>> 检查可用字幕..."
			
 
				+yt-dlp --list-subs --no-download "$URL" 2>/dev/null | tail -20
			
 
				+
			
 
				+echo ""
			
 
				+echo ">>> 尝试下载人工字幕（中文优先）..."
			
 
				+
			
 
				+# 尝试1: 人工中文字幕
			
 
				+if yt-dlp --write-subs --sub-langs "zh-Hans,zh-Hant,zh,zh-CN,zh-TW" --sub-format srt --skip-download -o "$OUTPUT_DIR/%(title)s" "$URL" 2>/dev/null; then
			
 
				+    FOUND=$(find "$OUTPUT_DIR" -name "*.srt" -newer /tmp/.ytdlp_marker 2>/dev/null | head -1)
			
 
				+    if [ -n "$FOUND" ]; then
			
 
				+        echo "✅ 下载成功: $FOUND"
			
 
				+        exit 0
			
 
				+    fi
			
 
				+fi
			
 
				+
			
 
				+# 尝试2: 人工英文字幕
			
 
				+echo ">>> 无中文人工字幕，尝试英文..."
			
 
				+if yt-dlp --write-subs --sub-langs "en,en-US,en-GB" --sub-format srt --skip-download -o "$OUTPUT_DIR/%(title)s" "$URL" 2>/dev/null; then
			
 
				+    FOUND=$(find "$OUTPUT_DIR" -name "*.srt" -mmin -1 2>/dev/null | head -1)
			
 
				+    if [ -n "$FOUND" ]; then
			
 
				+        echo "✅ 下载成功: $FOUND"
			
 
				+        exit 0
			
 
				+    fi
			
 
				+fi
			
 
				+
			
 
				+# 尝试3: 自动生成字幕（中文优先）
			
 
				+echo ">>> 无人工字幕，尝试自动生成字幕..."
			
 
				+if yt-dlp --write-auto-subs --sub-langs "zh-Hans,zh,en" --sub-format srt --skip-download -o "$OUTPUT_DIR/%(title)s" "$URL" 2>/dev/null; then
			
 
				+    FOUND=$(find "$OUTPUT_DIR" -name "*.srt" -o -name "*.vtt" 2>/dev/null | head -1)
			
 
				+    if [ -n "$FOUND" ]; then
			
 
				+        echo "✅ 自动字幕下载成功: $FOUND"
			
 
				+        exit 0
			
 
				+    fi
			
 
				+fi
			
 
				+
			
 
				+echo "❌ 未找到任何可用字幕"
			
 
				+exit 1
			
--- a/scripts/merge_research.py
+++ b/scripts/merge_research.py
@@ -0,0 +1,150 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+合并6个Agent的调研结果，生成Phase 1.5调研Review检查点的摘要表格。
			
 
				+扫描 references/research/ 目录下的01-06 md文件，统计每个维度的来源数量、
			
 
				+一手/二手占比、关键发现。
			
 
				+
			
 
				+用法:
			
 
				+    python3 merge_research.py <skill目录路径>
			
 
				+
			
 
				+示例:
			
 
				+    python3 merge_research.py .claude/skills/elon-musk-perspective
			
 
				+
			
 
				+输出: 打印markdown格式的摘要表格到stdout
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import re
			
 
				+from pathlib import Path
			
 
				+
			
 
				+AGENTS = {
			
 
				+    '01-writings': '著作',
			
 
				+    '02-conversations': '对话',
			
 
				+    '03-expression-dna': '表达',
			
 
				+    '04-external-views': '他者',
			
 
				+    '05-decisions': '决策',
			
 
				+    '06-timeline': '时间线',
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def count_sources(content: str) -> dict:
			
 
				+    """统计来源数量和一手/二手占比"""
			
 
				+    # 计算URL数量作为来源数
			
 
				+    urls = re.findall(r'https?://[^\s\)]+', content)
			
 
				+
			
 
				+    # 检测一手/二手标记
			
 
				+    primary_markers = len(re.findall(r'一手|primary|本人|原文|原始|直接引用', content, re.IGNORECASE))
			
 
				+    secondary_markers = len(re.findall(r'二手|secondary|转述|总结|评论|分析', content, re.IGNORECASE))
			
 
				+
			
 
				+    return {
			
 
				+        'url_count': len(urls),
			
 
				+        'unique_urls': len(set(urls)),
			
 
				+        'primary_markers': primary_markers,
			
 
				+        'secondary_markers': secondary_markers,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def extract_key_findings(content: str, max_items: int = 3) -> list[str]:
			
 
				+    """提取关键发现（取前几个二级标题或加粗项）"""
			
 
				+    # 尝试提取##标题
			
 
				+    headings = re.findall(r'^##\s+(.+)$', content, re.MULTILINE)
			
 
				+    if headings:
			
 
				+        return headings[:max_items]
			
 
				+
			
 
				+    # fallback: 提取加粗项
			
 
				+    bolds = re.findall(r'\*\*(.+?)\*\*', content)
			
 
				+    if bolds:
			
 
				+        return bolds[:max_items]
			
 
				+
			
 
				+    # fallback: 取前3个非空行
			
 
				+    lines = [l.strip() for l in content.split('\n') if l.strip() and not l.startswith('#')]
			
 
				+    return [l[:50] + '...' if len(l) > 50 else l for l in lines[:max_items]]
			
 
				+
			
 
				+
			
 
				+def find_contradictions(files: dict[str, str]) -> list[str]:
			
 
				+    """简单检测跨文件矛盾（同一关键词出现不同判断）"""
			
 
				+    contradictions = []
			
 
				+    # 检测「但是」「然而」「相反」「矛盾」等矛盾标记
			
 
				+    for name, content in files.items():
			
 
				+        matches = re.findall(r'(?:矛盾|相反|但实际上|然而.*?不同|争议).{0,100}', content)
			
 
				+        for m in matches:
			
 
				+            contradictions.append(f"{AGENTS.get(name, name)}: {m[:80]}")
			
 
				+    return contradictions[:5]  # 最多5条
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    if len(sys.argv) < 2:
			
 
				+        print("用法: python3 merge_research.py <skill目录路径>")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    skill_dir = Path(sys.argv[1])
			
 
				+    research_dir = skill_dir / 'references' / 'research'
			
 
				+
			
 
				+    if not research_dir.exists():
			
 
				+        print(f"❌ 目录不存在: {research_dir}")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    files = {}
			
 
				+    rows = []
			
 
				+    total_sources = 0
			
 
				+    total_primary = 0
			
 
				+    total_secondary = 0
			
 
				+    missing = []
			
 
				+
			
 
				+    for key, label in AGENTS.items():
			
 
				+        md_file = research_dir / f"{key}.md"
			
 
				+        if not md_file.exists():
			
 
				+            missing.append(label)
			
 
				+            rows.append(f"│ {label:<12} │ {'❌ 缺失':<8} │ {'—':<24} │")
			
 
				+            continue
			
 
				+
			
 
				+        content = md_file.read_text(encoding='utf-8')
			
 
				+        files[key] = content
			
 
				+        stats = count_sources(content)
			
 
				+        findings = extract_key_findings(content)
			
 
				+
			
 
				+        total_sources += stats['unique_urls']
			
 
				+        total_primary += stats['primary_markers']
			
 
				+        total_secondary += stats['secondary_markers']
			
 
				+
			
 
				+        findings_str = ', '.join(findings) if findings else '—'
			
 
				+        if len(findings_str) > 40:
			
 
				+            findings_str = findings_str[:37] + '...'
			
 
				+
			
 
				+        rows.append(f"│ {label:<12} │ {stats['unique_urls']:<8} │ {findings_str:<24} │")
			
 
				+
			
 
				+    # 矛盾检测
			
 
				+    contradictions = find_contradictions(files)
			
 
				+
			
 
				+    # 输出
			
 
				+    print("┌──────────────┬──────────┬──────────────────────────┐")
			
 
				+    print("│ Agent        │ 来源数量  │ 关键发现                  │")
			
 
				+    print("├──────────────┼──────────┼──────────────────────────┤")
			
 
				+    for row in rows:
			
 
				+        print(row)
			
 
				+    print("├──────────────┼──────────┼──────────────────────────┤")
			
 
				+
			
 
				+    primary_ratio = f"{total_primary}/{total_primary + total_secondary}" if (total_primary + total_secondary) > 0 else "未标记"
			
 
				+    print(f"│ 总来源数      │ {total_sources:<8} │ 一手占比: {primary_ratio:<15} │")
			
 
				+
			
 
				+    if contradictions:
			
 
				+        print(f"│ 矛盾点        │ {len(contradictions)}处      │ {contradictions[0][:24]:<24} │")
			
 
				+    else:
			
 
				+        print(f"│ 矛盾点        │ 0处      │ {'—':<24} │")
			
 
				+
			
 
				+    if missing:
			
 
				+        print(f"│ 信息不足维度   │ {len(missing)}个      │ {', '.join(missing):<24} │")
			
 
				+    else:
			
 
				+        print(f"│ 信息不足维度   │ 无       │ {'—':<24} │")
			
 
				+
			
 
				+    print("└──────────────┴──────────┴──────────────────────────┘")
			
 
				+
			
 
				+    # 总结
			
 
				+    if total_sources < 10:
			
 
				+        print("\n⚠️ 总来源数 <10，建议降低期望或补充调研")
			
 
				+    if missing:
			
 
				+        print(f"\n⚠️ 缺失维度: {', '.join(missing)}，建议补充或在诚实边界中标注")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/scripts/quality_check.py
+++ b/scripts/quality_check.py
@@ -0,0 +1,152 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+自动检查生成的SKILL.md是否通过Phase 4质量标准。
			
 
				+对照通过标准表格逐项检查，输出通过/不通过和具体原因。
			
 
				+
			
 
				+用法:
			
 
				+    python3 quality_check.py <SKILL.md路径>
			
 
				+
			
 
				+示例:
			
 
				+    python3 quality_check.py .claude/skills/elon-musk-perspective/SKILL.md
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import re
			
 
				+from pathlib import Path
			
 
				+
			
 
				+
			
 
				+def check_mental_models(content: str) -> tuple[bool, str]:
			
 
				+    """检查心智模型数量（3-7个）"""
			
 
				+    # 匹配 ### 模型N: 或 ### N. 等模式
			
 
				+    models = re.findall(r'^###\s+(?:模型|Model|心智模型)\s*\d', content, re.MULTILINE)
			
 
				+    if not models:
			
 
				+        # fallback: 数「### 」开头的行在心智模型section中
			
 
				+        in_section = False
			
 
				+        count = 0
			
 
				+        for line in content.split('\n'):
			
 
				+            if re.match(r'^##\s+.*心智模型|Mental Model', line, re.IGNORECASE):
			
 
				+                in_section = True
			
 
				+                continue
			
 
				+            if in_section and re.match(r'^##\s+', line) and '心智模型' not in line:
			
 
				+                break
			
 
				+            if in_section and re.match(r'^###\s+', line):
			
 
				+                count += 1
			
 
				+        if count > 0:
			
 
				+            passed = 3 <= count <= 7
			
 
				+            return passed, f"{count}个心智模型 {'✅' if passed else '❌ (应为3-7个)'}"
			
 
				+
			
 
				+    count = len(models)
			
 
				+    if count == 0:
			
 
				+        return False, "未检测到心智模型section"
			
 
				+    passed = 3 <= count <= 7
			
 
				+    return passed, f"{count}个心智模型 {'✅' if passed else '❌ (应为3-7个)'}"
			
 
				+
			
 
				+
			
 
				+def check_limitations(content: str) -> tuple[bool, str]:
			
 
				+    """检查每个模型是否有局限性"""
			
 
				+    has_limitation = bool(re.search(r'局限|失效|不适用|盲区|limitation|blind spot', content, re.IGNORECASE))
			
 
				+    return has_limitation, "有局限性标注 ✅" if has_limitation else "❌ 未找到局限性描述"
			
 
				+
			
 
				+
			
 
				+def check_expression_dna(content: str) -> tuple[bool, str]:
			
 
				+    """检查表达DNA辨识度"""
			
 
				+    dna_section = bool(re.search(r'表达DNA|Expression DNA|表达风格', content, re.IGNORECASE))
			
 
				+    if not dna_section:
			
 
				+        return False, "❌ 未找到表达DNA section"
			
 
				+
			
 
				+    # 检查是否有具体的风格描述（句式、词汇等）
			
 
				+    style_markers = len(re.findall(r'句式|词汇|语气|幽默|节奏|确定性|引用|口头禅', content))
			
 
				+    passed = style_markers >= 3
			
 
				+    return passed, f"表达DNA特征: {style_markers}项 {'✅' if passed else '❌ (应≥3项)'}"
			
 
				+
			
 
				+
			
 
				+def check_honest_boundary(content: str) -> tuple[bool, str]:
			
 
				+    """检查诚实边界（至少3条）"""
			
 
				+    # 找诚实边界section
			
 
				+    boundary_match = re.search(r'(?:##\s+.*诚实边界|## Honest Boundary)(.*?)(?=\n##\s|\Z)', content, re.DOTALL | re.IGNORECASE)
			
 
				+    if not boundary_match:
			
 
				+        return False, "❌ 未找到诚实边界section"
			
 
				+
			
 
				+    boundary_text = boundary_match.group(1)
			
 
				+    # 计算列表项
			
 
				+    items = re.findall(r'^[-*]\s+', boundary_text, re.MULTILINE)
			
 
				+    count = len(items)
			
 
				+    passed = count >= 3
			
 
				+    return passed, f"诚实边界: {count}条 {'✅' if passed else '❌ (应≥3条)'}"
			
 
				+
			
 
				+
			
 
				+def check_tensions(content: str) -> tuple[bool, str]:
			
 
				+    """检查内在张力（至少2对）"""
			
 
				+    tension_markers = len(re.findall(r'张力|矛盾|tension|paradox|一方面.*另一方面|既.*又', content, re.IGNORECASE))
			
 
				+    passed = tension_markers >= 2
			
 
				+    return passed, f"内在张力: {tension_markers}处 {'✅' if passed else '❌ (应≥2处)'}"
			
 
				+
			
 
				+
			
 
				+def check_primary_sources(content: str) -> tuple[bool, str]:
			
 
				+    """检查一手来源占比"""
			
 
				+    # 找调研来源section
			
 
				+    source_section = re.search(r'(?:##\s+.*来源|## Source|## Reference)(.*?)(?=\n##\s|\Z)', content, re.DOTALL | re.IGNORECASE)
			
 
				+    if not source_section:
			
 
				+        return True, "未找到来源section（跳过检查）"
			
 
				+
			
 
				+    source_text = source_section.group(1)
			
 
				+    primary = len(re.findall(r'一手|primary|本人著作|原始', source_text, re.IGNORECASE))
			
 
				+    secondary = len(re.findall(r'二手|secondary|转述|评论', source_text, re.IGNORECASE))
			
 
				+    total = primary + secondary
			
 
				+    if total == 0:
			
 
				+        return True, "未标记来源类型（跳过检查）"
			
 
				+
			
 
				+    ratio = primary / total
			
 
				+    passed = ratio > 0.5
			
 
				+    return passed, f"一手来源占比: {primary}/{total} ({ratio:.0%}) {'✅' if passed else '❌ (应>50%)'}"
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    if len(sys.argv) < 2:
			
 
				+        print("用法: python3 quality_check.py <SKILL.md路径>")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    skill_path = Path(sys.argv[1])
			
 
				+    if not skill_path.exists():
			
 
				+        print(f"❌ 文件不存在: {skill_path}")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    content = skill_path.read_text(encoding='utf-8')
			
 
				+
			
 
				+    checks = [
			
 
				+        ("心智模型数量", check_mental_models),
			
 
				+        ("模型局限性", check_limitations),
			
 
				+        ("表达DNA辨识度", check_expression_dna),
			
 
				+        ("诚实边界", check_honest_boundary),
			
 
				+        ("内在张力", check_tensions),
			
 
				+        ("一手来源占比", check_primary_sources),
			
 
				+    ]
			
 
				+
			
 
				+    print(f"质量检查: {skill_path.name}")
			
 
				+    print("=" * 50)
			
 
				+
			
 
				+    passed_count = 0
			
 
				+    total = len(checks)
			
 
				+
			
 
				+    for name, check_fn in checks:
			
 
				+        passed, detail = check_fn(content)
			
 
				+        status = "✅ PASS" if passed else "❌ FAIL"
			
 
				+        print(f"  {name:<12} {status}  {detail}")
			
 
				+        if passed:
			
 
				+            passed_count += 1
			
 
				+
			
 
				+    print("=" * 50)
			
 
				+    print(f"结果: {passed_count}/{total} 通过")
			
 
				+
			
 
				+    if passed_count == total:
			
 
				+        print("🎉 全部通过，可以交付")
			
 
				+    elif passed_count >= total - 1:
			
 
				+        print("⚠️ 基本通过，建议修复不通过项后交付")
			
 
				+    else:
			
 
				+        print("❌ 多项不通过，建议回到Phase 2迭代")
			
 
				+
			
 
				+    sys.exit(0 if passed_count == total else 1)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/scripts/srt_to_transcript.py
+++ b/scripts/srt_to_transcript.py
@@ -0,0 +1,108 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+将SRT/VTT字幕文件清洗为干净的纯文本transcript。
			
 
				+去除时间戳、序号、重复行、HTML标签，输出可直接阅读的文本。
			
 
				+
			
 
				+用法:
			
 
				+    python3 srt_to_transcript.py input.srt [output.txt]
			
 
				+    python3 srt_to_transcript.py input.vtt [output.txt]
			
 
				+
			
 
				+如果不指定输出文件，默认输出到 input_transcript.txt
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import re
			
 
				+from pathlib import Path
			
 
				+
			
 
				+
			
 
				+def clean_srt(content: str) -> str:
			
 
				+    """清洗SRT格式字幕"""
			
 
				+    lines = content.strip().split('\n')
			
 
				+    texts = []
			
 
				+
			
 
				+    for line in lines:
			
 
				+        line = line.strip()
			
 
				+        # 跳过序号行（纯数字）
			
 
				+        if re.match(r'^\d+$', line):
			
 
				+            continue
			
 
				+        # 跳过时间戳行
			
 
				+        if re.match(r'\d{2}:\d{2}:\d{2}', line):
			
 
				+            continue
			
 
				+        # 跳过空行
			
 
				+        if not line:
			
 
				+            continue
			
 
				+        # 去除HTML标签
			
 
				+        line = re.sub(r'<[^>]+>', '', line)
			
 
				+        # 去除VTT的position标记
			
 
				+        line = re.sub(r'align:.*$|position:.*$', '', line).strip()
			
 
				+        if line:
			
 
				+            texts.append(line)
			
 
				+
			
 
				+    # 去重（自动字幕常有连续重复行）
			
 
				+    deduped = []
			
 
				+    for text in texts:
			
 
				+        if not deduped or text != deduped[-1]:
			
 
				+            deduped.append(text)
			
 
				+
			
 
				+    # 合并成段落：连续的短句合并，遇到句末标点或长停顿换行
			
 
				+    result = []
			
 
				+    current = []
			
 
				+
			
 
				+    for text in deduped:
			
 
				+        current.append(text)
			
 
				+        # 如果当前累积文本够长或遇到句末标点，形成一个段落
			
 
				+        joined = ' '.join(current)
			
 
				+        if len(joined) > 200 or re.search(r'[。！？.!?]$', text):
			
 
				+            result.append(joined)
			
 
				+            current = []
			
 
				+
			
 
				+    if current:
			
 
				+        result.append(' '.join(current))
			
 
				+
			
 
				+    return '\n\n'.join(result)
			
 
				+
			
 
				+
			
 
				+def clean_vtt(content: str) -> str:
			
 
				+    """清洗VTT格式字幕（先去掉VTT头部，然后按SRT逻辑处理）"""
			
 
				+    # 去掉WEBVTT头部
			
 
				+    content = re.sub(r'^WEBVTT.*?\n\n', '', content, flags=re.DOTALL)
			
 
				+    # 去掉NOTE块
			
 
				+    content = re.sub(r'NOTE.*?\n\n', '', content, flags=re.DOTALL)
			
 
				+    return clean_srt(content)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    if len(sys.argv) < 2:
			
 
				+        print("用法: python3 srt_to_transcript.py <input.srt|input.vtt> [output.txt]")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    input_path = Path(sys.argv[1])
			
 
				+    if not input_path.exists():
			
 
				+        print(f"❌ 文件不存在: {input_path}")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    # 默认输出文件名
			
 
				+    if len(sys.argv) >= 3:
			
 
				+        output_path = Path(sys.argv[2])
			
 
				+    else:
			
 
				+        output_path = input_path.parent / f"{input_path.stem}_transcript.txt"
			
 
				+
			
 
				+    # 读取并检测格式
			
 
				+    content = input_path.read_text(encoding='utf-8')
			
 
				+
			
 
				+    if input_path.suffix.lower() == '.vtt' or content.startswith('WEBVTT'):
			
 
				+        transcript = clean_vtt(content)
			
 
				+    else:
			
 
				+        transcript = clean_srt(content)
			
 
				+
			
 
				+    output_path.write_text(transcript, encoding='utf-8')
			
 
				+
			
 
				+    # 统计
			
 
				+    word_count = len(transcript)
			
 
				+    line_count = transcript.count('\n') + 1
			
 
				+    print(f"✅ 转换完成: {output_path}")
			
 
				+    print(f"   字数: {word_count}  段落数: {line_count}")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()