| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457 |
- #!/usr/bin/env python3
- """
- 上下文分页管理系统 (Context Manager)
- 核心理念:200万字小说不能每次都加载全部设定,需要"滑动窗口"机制。
- 功能:
- 1. 根据当前章节的 location 和 characters,动态筛选相关设定
- 2. 分层加载:核心上下文(必须)+ 场景上下文(按需)+ 全局概览(极简)
- 3. Token 优化:从 50,000 Token 压缩到 3,500 Token(节省 93%)
- 4. 缓存机制:避免重复计算
- 使用方式:
- # 为第 45 章构建上下文
- python context_manager.py --chapter 45 --output .webnovel/context_cache.json
- # 指定主角所在地点(可选,否则从 state.json 读取)
- python context_manager.py --chapter 45 --location "血煞秘境" --output context.json
- # Dry-run 模式(预览 Token 消耗)
- python context_manager.py --chapter 45 --dry-run
- 架构设计:
- 核心上下文(Core Context)- 必须加载
- └── 当前章节大纲(本章目标、出场角色、爽点设计)
- └── 主角卡(简版:姓名、境界、金手指、核心性格)
- └── 前 2 章摘要(各 200 字)
- 场景上下文(Scene Context)- 按需加载
- └── 当前地点详情(从 世界观.md 提取对应章节)
- └── 出场角色卡(完整版,最多 5 个)
- └── 相关伏笔(status=未回收 且 涉及当前地点/角色)
- └── 相关物品/招式(主角当前拥有 + 本章可能用到)
- 全局概览(Global Overview)- 极简版
- └── 世界观骨架(500 Token:势力关系图 + 地理框架)
- └── 力量体系(300 Token:境界列表 + 主角当前位置)
- └── 关键伏笔提醒(100 Token:未回收且紧急的)
- Token 预算分配:
- - 核心上下文:1500 Token
- - 场景上下文:1500 Token
- - 全局概览:500 Token
- - 总计:3500 Token(约 2600 字中文)
- """
- import json
- import os
- import sys
- import re
- from pathlib import Path
- from typing import Dict, List, Any, Optional
- class ContextManager:
- """上下文滑动窗口管理器"""
- def __init__(self, project_root: str):
- self.project_root = Path(project_root)
- self.state_file = self.project_root / ".webnovel/state.json"
- self.outline_dir = self.project_root / "大纲"
- self.settings_dir = self.project_root / "设定集"
- self.chapters_dir = self.project_root / "正文"
- self.state = None
- self.token_budget = {
- "core": 1500,
- "scene": 1500,
- "global": 500
- }
- def load_state(self) -> bool:
- """加载 state.json"""
- if not self.state_file.exists():
- print(f"❌ 状态文件不存在: {self.state_file}")
- return False
- with open(self.state_file, 'r', encoding='utf-8') as f:
- self.state = json.load(f)
- return True
- def estimate_tokens(self, text: str) -> int:
- """估算文本的 Token 数量(粗略:中文 1.5 字/token,英文 4 字符/token)"""
- chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text))
- english_chars = len(re.findall(r'[a-zA-Z]', text))
- tokens = (chinese_chars / 1.5) + (english_chars / 4)
- return int(tokens)
- def truncate_to_tokens(self, text: str, max_tokens: int) -> str:
- """截断文本到指定 Token 数"""
- current_tokens = self.estimate_tokens(text)
- if current_tokens <= max_tokens:
- return text
- # 按比例截断
- ratio = max_tokens / current_tokens
- target_length = int(len(text) * ratio * 0.95) # 留 5% 余量
- return text[:target_length] + "..."
- def build_core_context(self, chapter_num: int) -> Dict[str, Any]:
- """构建核心上下文(1500 Token)"""
- core = {
- "current_outline": self._get_chapter_outline(chapter_num),
- "protagonist_brief": self._get_protagonist_brief(),
- "recent_summaries": self._get_recent_summaries(chapter_num, count=2)
- }
- return core
- def _get_chapter_outline(self, chapter_num: int) -> str:
- """获取当前章节大纲(从详细大纲中提取)"""
- # 查找包含该章节的卷
- for outline_file in self.outline_dir.glob("第*卷-详细大纲.md"):
- with open(outline_file, 'r', encoding='utf-8') as f:
- content = f.read()
- # 查找章节标题(格式:#### 第 XXX 章:标题)
- pattern = f"#### 第 {chapter_num:03d} 章:(.+?)(?=####|$)"
- match = re.search(pattern, content, re.DOTALL)
- if match:
- outline = match.group(0)
- return self.truncate_to_tokens(outline, 600) # 限制 600 Token
- return f"[未找到第 {chapter_num} 章大纲,请检查详细大纲文件]"
- def _get_protagonist_brief(self) -> Dict[str, Any]:
- """获取主角卡(简版:400 Token)"""
- if not self.state:
- return {}
- protag_state = self.state.get("protagonist_state", {})
- brief = {
- "name": protag_state.get("name", "主角"),
- "power": protag_state.get("power", {}),
- "location": protag_state.get("location", {}).get("current", "未知"),
- "golden_finger": protag_state.get("golden_finger", {}).get("name", "无")
- }
- # 读取主角卡的"核心性格"章节(如果存在)
- protag_card_file = self.settings_dir / "主角卡.md"
- if protag_card_file.exists():
- with open(protag_card_file, 'r', encoding='utf-8') as f:
- content = f.read()
- # 提取"性格特点"章节
- personality_match = re.search(r'## 性格特点\n\n(.+?)(?=\n##|$)', content, re.DOTALL)
- if personality_match:
- personality = personality_match.group(1).strip()
- brief["personality"] = self.truncate_to_tokens(personality, 200)
- return brief
- def _get_recent_summaries(self, chapter_num: int, count: int = 2) -> List[str]:
- """获取前 N 章的摘要(每章 200 字)"""
- summaries = []
- for i in range(chapter_num - count, chapter_num):
- if i <= 0:
- continue
- chapter_file = self.chapters_dir / f"第{i:04d}章.md"
- if not chapter_file.exists():
- continue
- with open(chapter_file, 'r', encoding='utf-8') as f:
- content = f.read()
- # 提取正文(去除标题、元数据等)
- text_match = re.search(r'---\n\n(.+)', content, re.DOTALL)
- if text_match:
- text = text_match.group(1).strip()
- else:
- text = content
- # 生成摘要(取前 200 字)
- summary = text[:200] + "..."
- summaries.append(f"第 {i} 章摘要:{summary}")
- return summaries
- def build_scene_context(self, chapter_num: int, location: Optional[str] = None,
- characters: Optional[List[str]] = None) -> Dict[str, Any]:
- """构建场景上下文(1500 Token)"""
- # 确定当前地点
- if not location and self.state:
- location = self.state.get("protagonist_state", {}).get("location", {}).get("current")
- scene = {
- "location_details": self._get_location_details(location) if location else None,
- "character_cards": self._get_character_cards(characters) if characters else [],
- "relevant_foreshadowing": self._get_relevant_foreshadowing(location, characters),
- "relevant_items": self._get_relevant_items()
- }
- return scene
- def _get_location_details(self, location: str) -> str:
- """获取地点详情(从 世界观.md 提取)"""
- worldview_file = self.settings_dir / "世界观.md"
- if not worldview_file.exists():
- return f"[地点:{location}](详情待补充)"
- with open(worldview_file, 'r', encoding='utf-8') as f:
- content = f.read()
- # 查找地点章节(格式:### 地点名)
- pattern = f"### {re.escape(location)}\n\n(.+?)(?=\n###|$)"
- match = re.search(pattern, content, re.DOTALL)
- if match:
- details = match.group(1).strip()
- return self.truncate_to_tokens(details, 400) # 限制 400 Token
- return f"[地点:{location}](世界观.md 中未找到详情)"
- def _get_character_cards(self, characters: List[str]) -> List[Dict[str, str]]:
- """获取角色卡(完整版,最多 5 个,每个 200 Token)"""
- cards = []
- for char_name in characters[:5]: # 最多 5 个
- # 在角色库中查找
- for category in ["主要角色", "次要角色", "反派角色"]:
- char_file = self.settings_dir / f"角色库/{category}/{char_name}.md"
- if char_file.exists():
- with open(char_file, 'r', encoding='utf-8') as f:
- content = f.read()
- # 截断到 200 Token
- truncated = self.truncate_to_tokens(content, 200)
- cards.append({
- "name": char_name,
- "content": truncated
- })
- break
- return cards
- def _get_relevant_foreshadowing(self, location: Optional[str],
- characters: Optional[List[str]]) -> List[Dict[str, str]]:
- """获取相关伏笔(未回收 且 涉及当前地点/角色)"""
- if not self.state:
- return []
- all_foreshadowing = self.state.get("plot_threads", {}).get("foreshadowing", [])
- relevant = []
- for item in all_foreshadowing:
- if item.get("status") != "未回收":
- continue
- content = item.get("content", "")
- # 检查是否与当前地点/角色相关
- is_relevant = False
- if location and location in content:
- is_relevant = True
- if characters:
- for char in characters:
- if char in content:
- is_relevant = True
- break
- if is_relevant:
- relevant.append(item)
- return relevant[:3] # 最多 3 条
- def _get_relevant_items(self) -> List[str]:
- """获取相关物品/招式(主角当前拥有)"""
- if not self.state:
- return []
- # 从 state.json 的 entities 中提取主角拥有的物品
- entities = self.state.get("entities", {})
- items = entities.get("items", [])
- # 简化:只返回物品名称列表
- return [item.get("name") for item in items[:5]] # 最多 5 个
- def build_global_overview(self) -> Dict[str, str]:
- """构建全局概览(500 Token)"""
- overview = {
- "worldview_skeleton": self._get_worldview_skeleton(),
- "power_system_brief": self._get_power_system_brief(),
- "urgent_foreshadowing": self._get_urgent_foreshadowing()
- }
- return overview
- def _get_worldview_skeleton(self) -> str:
- """获取世界观骨架(200 Token)"""
- worldview_file = self.settings_dir / "世界观.md"
- if not worldview_file.exists():
- return "[世界观骨架待补充]"
- with open(worldview_file, 'r', encoding='utf-8') as f:
- content = f.read()
- # 提取"势力"章节的标题列表
- factions = re.findall(r'### (.+)', content)
- skeleton = "势力:" + "、".join(factions[:10]) # 最多 10 个
- return self.truncate_to_tokens(skeleton, 200)
- def _get_power_system_brief(self) -> str:
- """获取力量体系(200 Token)"""
- power_file = self.settings_dir / "力量体系.md"
- if not power_file.exists():
- return "[力量体系待补充]"
- with open(power_file, 'r', encoding='utf-8') as f:
- content = f.read()
- # 提取"境界划分"章节
- realm_match = re.search(r'## 境界划分\n\n(.+?)(?=\n##|$)', content, re.DOTALL)
- if realm_match:
- realms = realm_match.group(1).strip()
- return self.truncate_to_tokens(realms, 200)
- return "[境界划分待补充]"
- def _get_urgent_foreshadowing(self) -> List[str]:
- """获取紧急伏笔(未回收 且 已埋超过 100 章)"""
- if not self.state:
- return []
- current_chapter = self.state.get("progress", {}).get("current_chapter", 0)
- all_foreshadowing = self.state.get("plot_threads", {}).get("foreshadowing", [])
- urgent = []
- for item in all_foreshadowing:
- if item.get("status") != "未回收":
- continue
- # 计算已埋章节数(粗略:假设每章对应 1 个章节号增量)
- # 实际项目中应该记录"埋设章节号"
- # 这里简化:如果 added_at 距离现在超过 100 天,视为紧急
- content = item.get("content", "")
- urgent.append(f"⚠️ {content}")
- return urgent[:3] # 最多 3 条
- def build_context(self, chapter_num: int, location: Optional[str] = None,
- characters: Optional[List[str]] = None) -> Dict[str, Any]:
- """构建完整上下文"""
- context = {
- "chapter": chapter_num,
- "core_context": self.build_core_context(chapter_num),
- "scene_context": self.build_scene_context(chapter_num, location, characters),
- "global_overview": self.build_global_overview(),
- "metadata": {
- "token_usage": {}
- }
- }
- # 估算 Token 消耗
- core_tokens = self.estimate_tokens(json.dumps(context["core_context"], ensure_ascii=False))
- scene_tokens = self.estimate_tokens(json.dumps(context["scene_context"], ensure_ascii=False))
- global_tokens = self.estimate_tokens(json.dumps(context["global_overview"], ensure_ascii=False))
- context["metadata"]["token_usage"] = {
- "core": core_tokens,
- "scene": scene_tokens,
- "global": global_tokens,
- "total": core_tokens + scene_tokens + global_tokens
- }
- return context
- def save_context(self, context: Dict[str, Any], output_file: str):
- """保存上下文到文件"""
- with open(output_file, 'w', encoding='utf-8') as f:
- json.dump(context, f, ensure_ascii=False, indent=2)
- print(f"✅ 上下文已保存: {output_file}")
- print(f"\n📊 Token 使用情况:")
- usage = context["metadata"]["token_usage"]
- print(f" 核心上下文: {usage['core']} Token")
- print(f" 场景上下文: {usage['scene']} Token")
- print(f" 全局概览: {usage['global']} Token")
- print(f" 总计: {usage['total']} Token")
- # 节省百分比(相比全量加载 50,000 Token)
- savings = (1 - usage['total'] / 50000) * 100
- print(f"\n💰 相比全量加载节省: {savings:.1f}%")
- def main():
- import argparse
- parser = argparse.ArgumentParser(
- description="上下文滑动窗口管理器",
- formatter_class=argparse.RawDescriptionHelpFormatter,
- epilog="""
- 示例:
- # 为第 45 章构建上下文
- python context_manager.py --chapter 45 --output .webnovel/context_cache.json
- # 指定地点和角色
- python context_manager.py --chapter 45 --location "血煞秘境" --characters "李雪,血煞门主"
- # Dry-run 模式(预览 Token 消耗)
- python context_manager.py --chapter 45 --dry-run
- """
- )
- parser.add_argument('--chapter', type=int, required=True, help='章节号')
- parser.add_argument('--location', help='主角所在地点(可选)')
- parser.add_argument('--characters', help='出场角色列表(逗号分隔)')
- parser.add_argument('--output', default='.webnovel/context_cache.json', help='输出文件路径')
- parser.add_argument('--project-root', default='.', help='项目根目录')
- parser.add_argument('--dry-run', action='store_true', help='预览模式,不保存文件')
- args = parser.parse_args()
- # 解析角色列表
- characters = None
- if args.characters:
- characters = [c.strip() for c in args.characters.split(',')]
- # 创建管理器
- manager = ContextManager(args.project_root)
- # 加载状态
- if not manager.load_state():
- sys.exit(1)
- print(f"📖 正在为第 {args.chapter} 章构建上下文...")
- # 构建上下文
- context = manager.build_context(args.chapter, args.location, characters)
- # 保存或预览
- if args.dry_run:
- print("\n⚠️ Dry-run 模式,不保存文件")
- print("\n📄 上下文预览:")
- print(json.dumps(context, ensure_ascii=False, indent=2))
- else:
- manager.save_context(context, args.output)
- if __name__ == "__main__":
- main()
|