config.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Data Modules - 配置文件
  5. API 配置通过环境变量读取(支持 .env 文件):
  6. - EMBED_BASE_URL, EMBED_MODEL, EMBED_API_KEY
  7. - RERANK_BASE_URL, RERANK_MODEL, RERANK_API_KEY
  8. """
  9. import os
  10. from pathlib import Path
  11. from dataclasses import dataclass, field
  12. from typing import Optional
  13. from runtime_compat import normalize_windows_path
  14. from .context_weights import TEMPLATE_WEIGHTS_DYNAMIC_DEFAULT
  15. def _get_user_claude_root() -> Path:
  16. raw = os.environ.get("WEBNOVEL_CLAUDE_HOME") or os.environ.get("CLAUDE_HOME")
  17. if raw:
  18. try:
  19. return normalize_windows_path(raw).expanduser().resolve()
  20. except Exception:
  21. return normalize_windows_path(raw).expanduser()
  22. return (Path.home() / ".claude").resolve()
  23. def _load_dotenv_file(env_path: Path, *, override: bool = False) -> bool:
  24. if not env_path.exists():
  25. return False
  26. try:
  27. with open(env_path, "r", encoding="utf-8") as f:
  28. for line in f:
  29. line = line.strip()
  30. if line and not line.startswith("#") and "=" in line:
  31. key, _, value = line.partition("=")
  32. key = key.strip()
  33. value = value.strip()
  34. if not key:
  35. continue
  36. # 默认不覆盖已有环境变量(保持“显式 > .env”优先级)
  37. if override or key not in os.environ:
  38. os.environ[key] = value
  39. return True
  40. except Exception:
  41. return False
  42. def _load_dotenv():
  43. """
  44. 加载 .env 文件(best-effort)。
  45. 约定:
  46. - 项目级 `.env`(当前工作目录下)优先;
  47. - 全局 `.env` 作为兜底:`~/.claude/webnovel-writer/.env`
  48. """
  49. # 1) 当前目录(常见:用户从项目根目录执行)
  50. _load_dotenv_file(Path.cwd() / ".env", override=False)
  51. # 2) 用户级全局(常见:skills/agents 全局安装,API key 放这里最省心)
  52. global_env = _get_user_claude_root() / "webnovel-writer" / ".env"
  53. _load_dotenv_file(global_env, override=False)
  54. def _load_project_dotenv(project_root: Path) -> None:
  55. """
  56. 加载某个项目根目录下的 `.env`(best-effort)。
  57. 注意:不覆盖已存在环境变量,避免意外串台。
  58. """
  59. try:
  60. _load_dotenv_file(Path(project_root) / ".env", override=False)
  61. except Exception:
  62. return
  63. _load_dotenv()
  64. def _default_context_template_weights_dynamic() -> dict[str, dict[str, dict[str, float]]]:
  65. return {
  66. stage: {
  67. template: dict(weights)
  68. for template, weights in templates.items()
  69. }
  70. for stage, templates in TEMPLATE_WEIGHTS_DYNAMIC_DEFAULT.items()
  71. }
  72. @dataclass
  73. class DataModulesConfig:
  74. """数据模块配置"""
  75. # ================= 项目路径 =================
  76. project_root: Path = field(default_factory=lambda: Path.cwd())
  77. @property
  78. def webnovel_dir(self) -> Path:
  79. return self.project_root / ".webnovel"
  80. @property
  81. def state_file(self) -> Path:
  82. return self.webnovel_dir / "state.json"
  83. @property
  84. def scratchpad_file(self) -> Path:
  85. return self.webnovel_dir / "memory_scratchpad.json"
  86. @property
  87. def index_db(self) -> Path:
  88. return self.webnovel_dir / "index.db"
  89. # v5.1 引入: alias_index_file 已废弃,别名存储在 index.db aliases 表
  90. @property
  91. def chapters_dir(self) -> Path:
  92. return self.project_root / "正文"
  93. @property
  94. def settings_dir(self) -> Path:
  95. return self.project_root / "设定集"
  96. @property
  97. def outline_dir(self) -> Path:
  98. return self.project_root / "大纲"
  99. @property
  100. def story_system_dir(self) -> Path:
  101. return self.project_root / ".story-system"
  102. @property
  103. def story_system_chapters_dir(self) -> Path:
  104. return self.story_system_dir / "chapters"
  105. @property
  106. def story_system_master_json(self) -> Path:
  107. return self.story_system_dir / "MASTER_SETTING.json"
  108. @property
  109. def story_system_anti_patterns_json(self) -> Path:
  110. return self.story_system_dir / "anti_patterns.json"
  111. # ================= Embedding API 配置 =================
  112. embed_api_type: str = "openai"
  113. embed_base_url: str = field(default_factory=lambda: os.getenv("EMBED_BASE_URL", "https://api-inference.modelscope.cn/v1"))
  114. embed_model: str = field(default_factory=lambda: os.getenv("EMBED_MODEL", "Qwen/Qwen3-Embedding-8B"))
  115. embed_api_key: str = field(default_factory=lambda: os.getenv("EMBED_API_KEY", ""))
  116. @property
  117. def embed_url(self) -> str:
  118. return self.embed_base_url
  119. # ================= Rerank API 配置 =================
  120. rerank_api_type: str = "openai"
  121. rerank_base_url: str = field(default_factory=lambda: os.getenv("RERANK_BASE_URL", "https://api.jina.ai/v1"))
  122. rerank_model: str = field(default_factory=lambda: os.getenv("RERANK_MODEL", "jina-reranker-v3"))
  123. rerank_api_key: str = field(default_factory=lambda: os.getenv("RERANK_API_KEY", ""))
  124. @property
  125. def rerank_url(self) -> str:
  126. return self.rerank_base_url
  127. # ================= 并发配置 =================
  128. embed_concurrency: int = 64
  129. rerank_concurrency: int = 32
  130. embed_batch_size: int = 64
  131. # ================= 超时配置 =================
  132. cold_start_timeout: int = 300
  133. normal_timeout: int = 180
  134. # ================= 重试配置 =================
  135. api_max_retries: int = 3 # 最大重试次数
  136. api_retry_delay: float = 1.0 # 初始重试延迟(秒),使用指数退避
  137. # ================= 检索配置 =================
  138. vector_top_k: int = 30
  139. bm25_top_k: int = 20
  140. rerank_top_n: int = 10
  141. rrf_k: int = 60
  142. vector_full_scan_max_vectors: int = 500
  143. vector_prefilter_bm25_candidates: int = 200
  144. vector_prefilter_recent_candidates: int = 200
  145. # ================= Graph-RAG 配置 =================
  146. graph_rag_enabled: bool = False
  147. graph_rag_expand_hops: int = 1
  148. graph_rag_max_expanded_entities: int = 30
  149. graph_rag_candidate_limit: int = 150
  150. graph_rag_boost_same_entity: float = 0.2
  151. graph_rag_boost_related_entity: float = 0.1
  152. graph_rag_boost_recency: float = 0.05
  153. relationship_graph_from_index_enabled: bool = True
  154. # ================= 实体提取配置 =================
  155. extraction_confidence_high: float = 0.8
  156. extraction_confidence_medium: float = 0.5
  157. # ================= 列表截断限制 =================
  158. max_disambiguation_warnings: int = 500
  159. max_disambiguation_pending: int = 1000
  160. max_state_changes: int = 2000
  161. context_recent_summaries_window: int = 3
  162. context_recent_meta_window: int = 3
  163. context_alerts_slice: int = 10
  164. context_max_appearing_characters: int = 10
  165. context_max_urgent_foreshadowing: int = 5
  166. context_story_skeleton_interval: int = 20
  167. context_story_skeleton_max_samples: int = 5
  168. context_story_skeleton_snippet_chars: int = 400
  169. context_extra_section_budget: int = 800
  170. context_ranker_enabled: bool = True
  171. context_ranker_recency_weight: float = 0.7
  172. context_ranker_frequency_weight: float = 0.3
  173. context_ranker_hook_bonus: float = 0.2
  174. context_ranker_length_bonus_cap: float = 0.2
  175. context_ranker_alert_critical_keywords: tuple[str, ...] = (
  176. "冲突",
  177. "矛盾",
  178. "critical",
  179. "break",
  180. "违规",
  181. "断裂",
  182. )
  183. context_ranker_debug: bool = False
  184. context_reader_signal_enabled: bool = True
  185. context_reader_signal_recent_limit: int = 5
  186. context_reader_signal_window_chapters: int = 20
  187. context_reader_signal_review_window: int = 5
  188. context_reader_signal_include_debt: bool = False
  189. context_genre_profile_enabled: bool = True
  190. context_genre_profile_max_refs: int = 8
  191. context_genre_profile_fallback: str = "shuangwen"
  192. context_compact_text_enabled: bool = True
  193. context_compact_min_budget: int = 120
  194. context_compact_head_ratio: float = 0.65
  195. context_writing_guidance_enabled: bool = True
  196. context_writing_guidance_max_items: int = 6
  197. context_writing_guidance_low_score_threshold: float = 75.0
  198. context_writing_guidance_hook_diversify: bool = True
  199. context_methodology_enabled: bool = True
  200. context_methodology_genre_whitelist: tuple[str, ...] = ("*",)
  201. context_methodology_label: str = "digital-serial-v1"
  202. context_writing_checklist_enabled: bool = True
  203. context_writing_checklist_min_items: int = 3
  204. context_writing_checklist_max_items: int = 6
  205. context_writing_checklist_default_weight: float = 1.0
  206. context_writing_score_persist_enabled: bool = True
  207. context_writing_score_include_reader_trend: bool = True
  208. context_writing_score_trend_window: int = 10
  209. context_rag_assist_enabled: bool = True
  210. context_rag_assist_top_k: int = 4
  211. context_rag_assist_min_outline_chars: int = 40
  212. context_rag_assist_max_query_chars: int = 120
  213. context_dynamic_budget_enabled: bool = True
  214. context_dynamic_budget_early_chapter: int = 30
  215. context_dynamic_budget_late_chapter: int = 120
  216. context_dynamic_budget_early_core_bonus: float = 0.08
  217. context_dynamic_budget_early_scene_bonus: float = 0.04
  218. context_dynamic_budget_late_global_bonus: float = 0.08
  219. context_dynamic_budget_late_scene_penalty: float = 0.06
  220. context_template_weights_dynamic: dict[str, dict[str, dict[str, float]]] = field(
  221. default_factory=_default_context_template_weights_dynamic
  222. )
  223. context_genre_profile_support_composite: bool = True
  224. context_genre_profile_max_genres: int = 2
  225. context_genre_profile_separators: tuple[str, ...] = (
  226. "+",
  227. "/",
  228. "|",
  229. ",",
  230. ",",
  231. "、",
  232. )
  233. context_use_memory_orchestrator: bool = False
  234. memory_orchestrator_max_items: int = 30
  235. memory_orchestrator_recent_changes_limit: int = 10
  236. memory_orchestrator_source_window: int = 20
  237. memory_compactor_enabled: bool = True
  238. memory_compactor_threshold: int = 500
  239. export_recent_changes_slice: int = 20
  240. export_disambiguation_slice: int = 20
  241. # ================= 查询默认限制 =================
  242. query_recent_chapters_limit: int = 10
  243. query_scenes_by_location_limit: int = 20
  244. query_entity_appearances_limit: int = 50
  245. query_recent_appearances_limit: int = 20
  246. # ================= 伏笔紧急度 =================
  247. foreshadowing_urgency_pending_high: int = 100
  248. foreshadowing_urgency_pending_medium: int = 50
  249. foreshadowing_urgency_target_proximity: int = 5
  250. foreshadowing_urgency_score_high: int = 100
  251. foreshadowing_urgency_score_medium: int = 60
  252. foreshadowing_urgency_score_target: int = 80
  253. foreshadowing_urgency_score_low: int = 20
  254. foreshadowing_urgency_threshold_show: int = 60
  255. foreshadowing_tier_weight_core: float = 3.0
  256. foreshadowing_tier_weight_sub: float = 2.0
  257. foreshadowing_tier_weight_decor: float = 1.0
  258. # ================= 角色活跃度 =================
  259. character_absence_warning: int = 30
  260. character_absence_critical: int = 100
  261. character_candidates_limit: int = 800
  262. # ================= Strand Weave 节奏 =================
  263. strand_quest_max_consecutive: int = 5
  264. strand_fire_max_gap: int = 10
  265. strand_constellation_max_gap: int = 15
  266. strand_quest_ratio_min: int = 55
  267. strand_quest_ratio_max: int = 65
  268. strand_fire_ratio_min: int = 20
  269. strand_fire_ratio_max: int = 30
  270. strand_constellation_ratio_min: int = 10
  271. strand_constellation_ratio_max: int = 20
  272. # ================= 爽点节奏 =================
  273. pacing_segment_size: int = 100
  274. pacing_words_per_point_excellent: int = 1000
  275. pacing_words_per_point_good: int = 1500
  276. pacing_words_per_point_acceptable: int = 2000
  277. # ================= RAG 存储 =================
  278. @property
  279. def rag_db(self) -> Path:
  280. return self.webnovel_dir / "rag.db"
  281. @property
  282. def vector_db(self) -> Path:
  283. return self.webnovel_dir / "vectors.db"
  284. def ensure_dirs(self):
  285. self.webnovel_dir.mkdir(parents=True, exist_ok=True)
  286. @classmethod
  287. def from_project_root(cls, project_root: str | Path) -> "DataModulesConfig":
  288. root = normalize_windows_path(project_root).expanduser().resolve()
  289. # 在构造配置前加载项目级 `.env`,以确保 EMBED_*/RERANK_* 等字段可生效
  290. _load_project_dotenv(root)
  291. return cls(project_root=root)
  292. _default_config: Optional[DataModulesConfig] = None
  293. def get_config(project_root: Optional[Path] = None) -> DataModulesConfig:
  294. global _default_config
  295. if project_root is not None:
  296. return DataModulesConfig.from_project_root(project_root)
  297. if _default_config is None:
  298. # 默认不要盲目以 CWD 作为 project_root(很容易写到错误目录)。
  299. # 使用统一的 project_locator 自动探测:
  300. # - 支持 WEBNOVEL_PROJECT_ROOT
  301. # - 支持 `.claude/.webnovel-current-project` 指针文件
  302. # - 支持从当前目录/父目录寻找 `.webnovel/state.json`
  303. from project_locator import resolve_project_root
  304. root = resolve_project_root()
  305. _default_config = DataModulesConfig.from_project_root(root)
  306. return _default_config
  307. def set_project_root(project_root: str | Path):
  308. global _default_config
  309. _default_config = DataModulesConfig.from_project_root(project_root)