haiany
/
webnovel-writer
镜像来自 https://github.com/lingfengQAQ/webnovel-writer.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
							#!/usr/bin/env python3
"""
extract_chapter_context.py - 提取章节创作所需的精简上下文

功能：
- 提取当前章节的大纲片段（~500字）
- 提取前2章的摘要（优先 .webnovel/summaries）
- 提取 state.json 关键字段（~300字）

用法：
    python extract_chapter_context.py --chapter 7
    python extract_chapter_context.py --chapter 7 --project-root ./webnovel-project
"""

import argparse
import json
import re
import sys
from pathlib import Path

try:
    from chapter_paths import find_chapter_file
except ImportError:  # pragma: no cover
    from scripts.chapter_paths import find_chapter_file


def find_project_root(start_path: Path = None) -> Path:
    """查找包含 .webnovel 目录的项目根目录"""
    if start_path is None:
        start_path = Path.cwd()

    search_paths = [
        start_path,
        start_path / "webnovel-project",
        start_path.parent,
    ]

    for path in search_paths:
        if (path / ".webnovel").exists():
            return path

    raise FileNotFoundError("未找到 .webnovel 目录，请确认项目路径")


def extract_chapter_outline(project_root: Path, chapter_num: int) -> str:
    """从大纲文件中提取指定章节的大纲片段"""
    volume_num = (chapter_num - 1) // 50 + 1
    outline_file = project_root / "大纲" / f"第{volume_num}卷-详细大纲.md"

    if not outline_file.exists():
        return f"⚠️ 大纲文件不存在: {outline_file}"

    content = outline_file.read_text(encoding="utf-8")

    # 匹配章节大纲块
    # 格式：### 第 N 章：标题 或 ### 第 N 章: 标题
    pattern = rf"###\s*第\s*{chapter_num}\s*章[：:]\s*(.+?)(?=###\s*第\s*\d+\s*章|##\s|$)"
    match = re.search(pattern, content, re.DOTALL)

    if match:
        outline = match.group(0).strip()
        # 限制长度
        if len(outline) > 1500:
            outline = outline[:1500] + "\n...(已截断)"
        return outline

    # 尝试另一种格式：### 第 1 章：标题（无空格）
    pattern2 = rf"###\s*第{chapter_num}章[：:]\s*(.+?)(?=###\s*第\d+章|##\s|$)"
    match2 = re.search(pattern2, content, re.DOTALL)

    if match2:
        outline = match2.group(0).strip()
        if len(outline) > 1500:
            outline = outline[:1500] + "\n...(已截断)"
        return outline

    return f"⚠️ 未找到第 {chapter_num} 章的大纲"


def _load_summary_file(project_root: Path, chapter_num: int) -> str:
    """从 .webnovel/summaries/chNNNN.md 提取剧情摘要"""
    summary_path = project_root / ".webnovel" / "summaries" / f"ch{chapter_num:04d}.md"
    if not summary_path.exists():
        return ""

    text = summary_path.read_text(encoding="utf-8")
    summary_match = re.search(r"##\s*剧情摘要\s*\r?\n(.+?)(?=\r?\n##|$)", text, re.DOTALL)
    if summary_match:
        return summary_match.group(1).strip()
    return ""


def extract_chapter_summary(project_root: Path, chapter_num: int) -> str:
    """提取指定章节摘要（优先 summaries/，再降级正文）"""
    summary = _load_summary_file(project_root, chapter_num)
    if summary:
        return summary

    chapter_file = find_chapter_file(project_root, chapter_num)
    if not chapter_file or not chapter_file.exists():
        return f"⚠️ 第 {chapter_num} 章文件不存在"

    content = chapter_file.read_text(encoding="utf-8")

    # 兼容旧格式：尝试提取"本章摘要"部分
    summary_match = re.search(r"##\s*本章摘要\s*\r?\n(.+?)(?=\r?\n##|$)", content, re.DOTALL)
    if summary_match:
        return summary_match.group(1).strip()

    # 如果没有摘要，提取"本章统计"部分
    stats_match = re.search(r"##\s*本章统计\s*\r?\n(.+?)(?=\r?\n##|$)", content, re.DOTALL)
    if stats_match:
        return f"[无摘要，仅统计]\n{stats_match.group(1).strip()}"

    # 最后降级：提取前500字作为摘要
    lines = content.split("\n")
    text_lines = [l for l in lines if not l.startswith("#") and l.strip()]
    text = "\n".join(text_lines)[:500]
    return f"[自动截取前500字]\n{text}..."


def extract_state_summary(project_root: Path) -> str:
    """提取 state.json 的关键字段"""
    state_file = project_root / ".webnovel" / "state.json"

    if not state_file.exists():
        return "⚠️ state.json 不存在"

    state = json.loads(state_file.read_text(encoding="utf-8"))

    # 提取关键字段
    summary_parts = []

    # 进度
    if "progress" in state:
        p = state["progress"]
        summary_parts.append(f"**进度**: 第 {p.get('current_chapter', '?')} 章 / {p.get('total_words', '?')} 字")

    # 主角状态
    if "protagonist_state" in state:
        ps = state["protagonist_state"]
        power = ps.get("power", {})
        summary_parts.append(f"**主角实力**: {power.get('realm', '?')} {power.get('layer', '?')}层")
        summary_parts.append(f"**当前位置**: {ps.get('location', '?')}")

        gf = ps.get("golden_finger", {})
        summary_parts.append(f"**金手指**: {gf.get('name', '?')} Lv.{gf.get('level', '?')}")

    # Strand 追踪
    if "strand_tracker" in state:
        st = state["strand_tracker"]
        history = st.get("history", [])[-5:]  # 最近5章
        if history:
            strand_str = ", ".join([f"Ch{h['chapter']}:{h['strand']}" for h in history])
            summary_parts.append(f"**近5章Strand**: {strand_str}")

    # 活跃伏笔（只显示紧急的）
    plot_threads = state.get("plot_threads", {}) if isinstance(state.get("plot_threads"), dict) else {}
    foreshadowing = plot_threads.get("foreshadowing", [])
    if isinstance(foreshadowing, list) and foreshadowing:
        active = [f for f in foreshadowing if f.get("status") in {"active", "未回收"}]
        urgent = [f for f in active if f.get("urgency", 0) > 50]
        if urgent:
            urgent_list = [f"{f.get('content', '?')[:30]}... (紧急度:{f.get('urgency')})" for f in urgent[:3]]
            summary_parts.append(f"**紧急伏笔**: {'; '.join(urgent_list)}")

    return "\n".join(summary_parts)


def main():
    parser = argparse.ArgumentParser(description="提取章节创作所需的精简上下文")
    parser.add_argument("--chapter", type=int, required=True, help="目标章节号")
    parser.add_argument("--project-root", type=str, help="项目根目录")
    parser.add_argument("--format", choices=["text", "json"], default="text", help="输出格式")

    args = parser.parse_args()

    try:
        if args.project_root:
            project_root = Path(args.project_root)
        else:
            project_root = find_project_root()

        chapter_num = args.chapter

        # 提取各部分
        outline = extract_chapter_outline(project_root, chapter_num)

        # 提取前2章摘要
        prev_summaries = []
        for prev_ch in range(max(1, chapter_num - 2), chapter_num):
            summary = extract_chapter_summary(project_root, prev_ch)
            prev_summaries.append(f"### 第 {prev_ch} 章摘要\n{summary}")

        state_summary = extract_state_summary(project_root)

        if args.format == "json":
            result = {
                "chapter": chapter_num,
                "outline": outline,
                "previous_summaries": prev_summaries,
                "state_summary": state_summary,
            }
            print(json.dumps(result, ensure_ascii=False, indent=2))
        else:
            print(f"# 第 {chapter_num} 章创作上下文\n")
            print("## 本章大纲\n")
            print(outline)
            print("\n---\n")
            print("## 前文摘要\n")
            for s in prev_summaries:
                print(s)
                print()
            print("---\n")
            print("## 当前状态\n")
            print(state_summary)

    except Exception as e:
        print(f"❌ 错误: {e}", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()