haiany
/
webnovel-writer
spegling av https://github.com/lingfengQAQ/webnovel-writer.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
							#!/usr/bin/env python3
"""
Context Pack Builder v5.1

为章节写作生成结构化上下文包，取代直接读取 state.json。

v5.1 变更:
- 使用 v5.1 index_manager schema (entities.id, aliases, current_json)
- 移除对 entity_kv 表的依赖，改用 current_json 字段
- 移除对 entity_aliases 表的依赖，改用 aliases 表

输出 Schema:
{
  "core": {
    "chapter_outline": "本章大纲内容",
    "protagonist_snapshot": {...},
    "recent_summaries": [{...}, ...]
  },
  "scene": {
    "location_context": {...},
    "appearing_characters": [{entity_id, name, snapshot}, ...],
    "urgent_foreshadowing": [{...}, ...]
  },
  "global": {
    "worldview_skeleton": "...",
    "power_system_skeleton": "...",
    "style_contract_ref": "..."
  }
}

使用方式:
  python context_pack_builder.py --chapter 45 --project-root /path/to/project
  python context_pack_builder.py --chapter 45 --output /tmp/context_pack.json
"""

import json
import os
import sys
import argparse
import re
import sqlite3
from pathlib import Path
from typing import Optional, Dict, List, Any

# 导入项目工具
from project_locator import resolve_project_root
from chapter_paths import find_chapter_file

# 导入配置
try:
    from data_modules.config import get_config, DataModulesConfig
except ImportError:
    from scripts.data_modules.config import get_config, DataModulesConfig


class ContextPackBuilder:
    """上下文包构建器"""

    def __init__(self, project_root: Path = None):
        if project_root is None:
            try:
                project_root = resolve_project_root()
            except FileNotFoundError:
                project_root = Path.cwd()
        else:
            project_root = Path(project_root)

        self.project_root = project_root
        self.config = get_config(project_root)
        self.state_file = project_root / ".webnovel" / "state.json"
        self.index_db = project_root / ".webnovel" / "index.db"
        self.outline_dir = project_root / "大纲"
        self.settings_dir = project_root / "设定集"
        self.chapters_dir = project_root / "正文"

        self._conn: Optional[sqlite3.Connection] = None

    def _conn_index(self) -> Optional[sqlite3.Connection]:
        if self._conn is not None:
            return self._conn
        if not self.index_db.exists():
            return None
        conn = sqlite3.connect(str(self.index_db))
        conn.row_factory = sqlite3.Row
        self._conn = conn
        return conn

    def build(self, chapter_num: int) -> Dict[str, Any]:
        """构建完整上下文包"""
        state = self._load_state()

        return {
            "meta": {
                "chapter": chapter_num,
                "project_root": str(self.project_root),
                "version": "5.1"
            },
            "core": self._build_core(chapter_num),
            "scene": self._build_scene(chapter_num),
            "global": self._build_global(),
            "alerts": self._build_alerts(state)
        }

    def _build_core(self, chapter_num: int) -> Dict[str, Any]:
        """核心上下文：大纲、主角状态、近期摘要"""
        state = self._load_state()

        return {
            "chapter_outline": self._get_chapter_outline(chapter_num),
            "protagonist_snapshot": self._get_protagonist_snapshot(state),
            "recent_summaries": self._get_recent_summaries(
                chapter_num, window=self.config.context_recent_summaries_window
            )
        }

    def _build_scene(self, chapter_num: int) -> Dict[str, Any]:
        """场景上下文：地点、出场角色、紧急伏笔"""
        state = self._load_state()

        # 从大纲推断本章地点和角色
        outline = self._get_chapter_outline(chapter_num)
        predicted_location = self._predict_location(outline, state)
        predicted_characters = self._predict_characters(outline, state)

        return {
            "location_context": predicted_location,
            "appearing_characters": predicted_characters,
            "urgent_foreshadowing": self._get_urgent_foreshadowing(state, chapter_num)
        }

    def _build_global(self) -> Dict[str, Any]:
        """全局上下文：世界观、力量体系、风格契约"""
        return {
            "worldview_skeleton": self._load_skeleton("世界观"),
            "power_system_skeleton": self._load_skeleton("力量体系"),
            "style_contract_ref": self._get_style_contract_ref()
        }

    def _build_alerts(self, state: Dict) -> Dict[str, Any]:
        """风险提示：消歧警告、待确认项（v5.0）"""
        slice_size = self.config.context_alerts_slice
        return {
            "disambiguation_warnings": state.get("disambiguation_warnings", [])[-slice_size:],
            "disambiguation_pending": state.get("disambiguation_pending", [])[-slice_size:]
        }

    # ================== 辅助方法 ==================

    def _load_state(self) -> Dict:
        """加载 state.json"""
        if not self.state_file.exists():
            return {}
        with open(self.state_file, 'r', encoding='utf-8') as f:
            return json.load(f)

    def _get_chapter_outline(self, chapter_num: int) -> str:
        """获取本章大纲"""
        # 尝试多种大纲文件格式
        patterns = [
            f"第{chapter_num}章*.md",
            f"第{chapter_num:02d}章*.md",
            f"第{chapter_num:03d}章*.md",
            f"第{chapter_num:04d}章*.md",
            f"章纲/第{chapter_num}章*.md",
            f"章纲/第{chapter_num:02d}章*.md",
        ]

        for pattern in patterns:
            matches = list(self.outline_dir.glob(pattern))
            if matches:
                with open(matches[0], 'r', encoding='utf-8') as f:
                    return f.read()

        # 尝试从卷纲中提取
        volume_outline = self._extract_from_volume_outline(chapter_num)
        if volume_outline:
            return volume_outline

        return f"[大纲未找到: 第{chapter_num}章]"

    def _extract_from_volume_outline(self, chapter_num: int) -> Optional[str]:
        """从卷纲中提取章节大纲"""
        volume_files = list(self.outline_dir.glob("卷纲*.md")) + list(self.outline_dir.glob("*卷*.md"))

        for vf in volume_files:
            with open(vf, 'r', encoding='utf-8') as f:
                content = f.read()

            # 查找章节标记
            pattern = rf'第{chapter_num}章[^\n]*\n(.*?)(?=第\d+章|$)'
            match = re.search(pattern, content, re.DOTALL)
            if match:
                return match.group(0).strip()

        return None

    def _get_protagonist_snapshot(self, state: Dict) -> Dict:
        """获取主角状态快照"""
        protagonist = state.get("protagonist_state", {}) or {}
        power = protagonist.get("power", {}) or {}
        location = protagonist.get("location", {}) or {}

        snapshot: Dict[str, Any] = {
            "entity_id": str(protagonist.get("entity_id", "") or "").strip(),
            "name": str(protagonist.get("name", "") or "").strip() or "主角",
            "realm": str(power.get("realm", "") or "").strip(),
            "layer": power.get("layer", 0),
            "bottleneck": str(power.get("bottleneck", "") or "").strip(),
            "golden_finger": protagonist.get("golden_finger", {}) or {},
            "location": str(location.get("current", "") or "").strip(),
        }

        # 可选：从 index.db 补齐（以 entity_id 为准）
        protagonist_id = snapshot.get("entity_id", "")
        conn = self._conn_index()
        if protagonist_id and conn is not None:
            # v5.1 schema: entities 表使用 id 字段，current_json 存储状态
            row = conn.execute(
                "SELECT canonical_name, current_json FROM entities WHERE id = ? LIMIT 1",
                (protagonist_id,),
            ).fetchone()
            if row:
                if row["canonical_name"]:
                    snapshot["name"] = row["canonical_name"]
                # 从 current_json 解析状态
                if row["current_json"]:
                    try:
                        current = json.loads(row["current_json"])
                        if isinstance(current.get("realm"), str) and current.get("realm"):
                            snapshot["realm"] = current["realm"]
                        if current.get("layer") is not None and current.get("layer") != "":
                            snapshot["layer"] = current["layer"]
                        if isinstance(current.get("bottleneck"), str) and current.get("bottleneck"):
                            snapshot["bottleneck"] = current["bottleneck"]
                        if isinstance(current.get("location"), str) and current.get("location"):
                            snapshot["location"] = current["location"]
                    except (json.JSONDecodeError, TypeError):
                        pass

        return snapshot

    def _get_recent_summaries(self, chapter_num: int, window: int = 5) -> List[Dict]:
        """获取最近 N 章的摘要"""
        summaries = []
        start = max(1, chapter_num - window)

        for ch in range(start, chapter_num):
            chapter_file = find_chapter_file(self.project_root, ch)
            if chapter_file and chapter_file.exists():
                summary = self._extract_summary_from_chapter(chapter_file, ch)
                if summary:
                    summaries.append(summary)

        return summaries

    def _extract_summary_from_chapter(self, chapter_file: Path, chapter_num: int) -> Optional[Dict]:
        """从章节文件中提取摘要"""
        with open(chapter_file, 'r', encoding='utf-8') as f:
            content = f.read()

        # 查找摘要区块
        summary_match = re.search(r'## 本章摘要\s*\n(.*?)(?=\n##|$)', content, re.DOTALL)
        if summary_match:
            summary_text = summary_match.group(1).strip()
            return {
                "chapter": chapter_num,
                "summary": summary_text
            }

        # 没有摘要，返回章节标题
        title_match = re.match(r'^#\s*(.+)', content)
        title = title_match.group(1).strip() if title_match else f"第{chapter_num}章"

        return {
            "chapter": chapter_num,
            "title": title,
            "summary": None
        }

    def _predict_location(self, outline: str, state: Dict) -> Dict:
        """从大纲推断地点（优先使用 index.db 别名表）"""
        conn = self._conn_index()
        if conn is None:
            return {"name": "未知地点", "desc": ""}

        # v5.1 schema: 使用 aliases 表（替代 entity_aliases）
        rows = conn.execute(
            "SELECT alias, entity_id FROM aliases WHERE entity_type = ?",
            ("地点",),
        ).fetchall()
        if not rows:
            return {"name": "未知地点", "desc": ""}

        # 先匹配更长的别名，降低误命中
        candidates = sorted(
            ((r["alias"], r["entity_id"]) for r in rows if r["alias"]),
            key=lambda x: len(x[0]),
            reverse=True,
        )
        for alias, entity_id in candidates:
            if len(alias) < 2:
                continue
            if alias not in outline:
                continue

            # v5.1 schema: entities 表使用 id 字段
            e = conn.execute(
                "SELECT canonical_name, desc FROM entities WHERE id = ? LIMIT 1",
                (entity_id,),
            ).fetchone()
            return {
                "entity_id": entity_id,
                "name": (e["canonical_name"] if e else "") or alias,
                "desc": (e["desc"] if e else "") or "",
                "match": alias,
            }

        return {"name": "未知地点", "desc": ""}

    def _predict_characters(self, outline: str, state: Dict) -> List[Dict]:
        """从大纲推断出场角色（优先使用 index.db 别名表）"""
        conn = self._conn_index()
        if conn is None:
            return []

        # v5.1 schema: 使用 aliases 表（替代 entity_aliases）
        rows = conn.execute(
            "SELECT alias, entity_id FROM aliases WHERE entity_type = ?",
            ("角色",),
        ).fetchall()
        if not rows:
            return []

        matched_ids: set[str] = set()
        for r in rows:
            alias = r["alias"] or ""
            if len(alias) < 2:
                continue
            if alias in outline:
                matched_ids.add(r["entity_id"])

        if not matched_ids:
            return []

        tier_order = {"核心": 0, "支线": 1, "装饰": 2, "": 3}
        matched: List[Dict[str, Any]] = []
        for entity_id in matched_ids:
            # v5.1 schema: entities 表使用 id 字段，current_json 存储状态
            e = conn.execute(
                "SELECT canonical_name, tier, current_json FROM entities WHERE id = ? LIMIT 1",
                (entity_id,),
            ).fetchone()
            if not e:
                continue

            # 从 current_json 解析快照
            snapshot = {}
            if e["current_json"]:
                try:
                    snapshot = json.loads(e["current_json"])
                except (json.JSONDecodeError, TypeError):
                    pass

            matched.append(
                {
                    "entity_id": entity_id,
                    "name": e["canonical_name"] or entity_id,
                    "tier": e["tier"] or "",
                    "snapshot": snapshot,
                }
            )

        matched.sort(key=lambda x: tier_order.get(x.get("tier", ""), 3))
        return matched[:self.config.context_max_appearing_characters]

    def _get_urgent_foreshadowing(self, state: Dict, chapter_num: int) -> List[Dict]:
        """获取紧急伏笔（优先使用 index.db 伏笔索引）"""
        conn = self._conn_index()
        if conn is not None:
            try:
                rows = conn.execute(
                    "SELECT content, introduced_chapter, resolved_chapter, status, urgency, location "
                    "FROM foreshadowing_index WHERE status = '未回收' ORDER BY urgency DESC LIMIT 5"
                ).fetchall()
                return [dict(r) for r in rows] if rows else []
            except sqlite3.Error:
                pass

        # fallback：项目未建索引时直接读取 state.json
        plot_threads = state.get("plot_threads", {}) or {}
        items = plot_threads.get("foreshadowing", []) or []
        urgent: List[Dict[str, Any]] = []

        for fs in items:
            if not isinstance(fs, dict):
                continue
            status = str(fs.get("status", "")).strip()
            if status in {"已回收"}:
                continue

            planted_chapter = fs.get("planted_chapter") or fs.get("introduced_chapter") or 0
            target_chapter = fs.get("target_chapter") or fs.get("target") or 0
            try:
                planted_chapter = int(planted_chapter)
            except (TypeError, ValueError):
                planted_chapter = 0
            try:
                target_chapter = int(target_chapter) if target_chapter else 0
            except (TypeError, ValueError):
                target_chapter = 0

            chapters_pending = chapter_num - planted_chapter if planted_chapter else 0

            # 使用配置的紧急度阈值
            cfg = self.config
            if chapters_pending > cfg.foreshadowing_urgency_pending_high:
                urgency = cfg.foreshadowing_urgency_score_high
            elif chapters_pending > cfg.foreshadowing_urgency_pending_medium:
                urgency = cfg.foreshadowing_urgency_score_medium
            elif target_chapter and chapter_num >= target_chapter - cfg.foreshadowing_urgency_target_proximity:
                urgency = cfg.foreshadowing_urgency_score_target
            else:
                urgency = cfg.foreshadowing_urgency_score_low

            if urgency >= cfg.foreshadowing_urgency_threshold_show:
                urgent.append(
                    {
                        "content": fs.get("content") or fs.get("description") or "",
                        "planted_chapter": planted_chapter,
                        "target_chapter": target_chapter,
                        "tier": fs.get("tier", ""),
                        "urgency": urgency,
                    }
                )

        urgent.sort(key=lambda x: x.get("urgency", 0), reverse=True)
        return urgent[:self.config.context_max_urgent_foreshadowing]

    def _load_skeleton(self, setting_type: str) -> str:
        """加载设定骨架"""
        patterns = [
            f"{setting_type}.md",
            f"{setting_type}/*.md",
            f"*{setting_type}*.md"
        ]

        for pattern in patterns:
            matches = list(self.settings_dir.glob(pattern))
            if matches:
                # 如果是目录，合并所有文件
                if matches[0].is_dir():
                    content = []
                    for f in sorted(matches[0].glob("*.md")):
                        with open(f, 'r', encoding='utf-8') as file:
                            content.append(f"## {f.stem}\n{file.read()}")
                    return "\n\n".join(content)
                else:
                    with open(matches[0], 'r', encoding='utf-8') as f:
                        return f.read()

        return f"[{setting_type}设定未找到]"

    def _get_style_contract_ref(self) -> str:
        """获取风格契约引用"""
        style_file = self.settings_dir / "风格契约.md"
        if style_file.exists():
            with open(style_file, 'r', encoding='utf-8') as f:
                return f.read()

        # 检查其他可能的位置
        for pattern in ["风格*.md", "写作风格*.md", "style*.md"]:
            matches = list(self.settings_dir.glob(pattern))
            if matches:
                with open(matches[0], 'r', encoding='utf-8') as f:
                    return f.read()

        return "[风格契约未定义]"


def main():
    parser = argparse.ArgumentParser(description="Context Pack Builder v5.1")
    parser.add_argument("--chapter", type=int, required=True, help="章节编号")
    parser.add_argument("--project-root", metavar="PATH", help="项目根目录")
    parser.add_argument("--output", metavar="FILE", help="输出文件路径（默认输出到 stdout）")
    parser.add_argument("--pretty", action="store_true", help="格式化 JSON 输出")

    args = parser.parse_args()

    # 构建上下文包
    builder = ContextPackBuilder(project_root=args.project_root)
    context_pack = builder.build(args.chapter)

    # 输出
    if args.pretty:
        output = json.dumps(context_pack, ensure_ascii=False, indent=2)
    else:
        output = json.dumps(context_pack, ensure_ascii=False)

    if args.output:
        with open(args.output, 'w', encoding='utf-8') as f:
            f.write(output)
        print(f"✅ 上下文包已保存到: {args.output}")
    else:
        print(output)


if __name__ == "__main__":
    # Windows UTF-8 编码修复
    if sys.platform == 'win32':
        import io
        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
        sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')

    main()