config.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Data Modules - 配置文件
  5. API 配置通过环境变量读取:
  6. - EMBED_BASE_URL, EMBED_MODEL, EMBED_API_KEY
  7. - RERANK_BASE_URL, RERANK_MODEL, RERANK_API_KEY
  8. """
  9. import os
  10. from pathlib import Path
  11. from dataclasses import dataclass, field
  12. from typing import Optional
  13. @dataclass
  14. class DataModulesConfig:
  15. """数据模块配置"""
  16. # ================= 项目路径 =================
  17. project_root: Path = field(default_factory=lambda: Path.cwd())
  18. @property
  19. def webnovel_dir(self) -> Path:
  20. return self.project_root / ".webnovel"
  21. @property
  22. def state_file(self) -> Path:
  23. return self.webnovel_dir / "state.json"
  24. @property
  25. def index_db(self) -> Path:
  26. return self.webnovel_dir / "index.db"
  27. @property
  28. def alias_index_file(self) -> Path:
  29. return self.webnovel_dir / "alias_index.json"
  30. @property
  31. def chapters_dir(self) -> Path:
  32. return self.project_root / "正文"
  33. @property
  34. def settings_dir(self) -> Path:
  35. return self.project_root / "设定集"
  36. @property
  37. def outline_dir(self) -> Path:
  38. return self.project_root / "大纲"
  39. # ================= Embedding API 配置 =================
  40. embed_api_type: str = "openai"
  41. embed_base_url: str = field(default_factory=lambda: os.getenv("EMBED_BASE_URL", "https://api-inference.modelscope.cn/v1"))
  42. embed_model: str = field(default_factory=lambda: os.getenv("EMBED_MODEL", "Qwen/Qwen3-Embedding-8B"))
  43. embed_api_key: str = field(default_factory=lambda: os.getenv("EMBED_API_KEY", ""))
  44. @property
  45. def embed_url(self) -> str:
  46. return self.embed_base_url
  47. # ================= Rerank API 配置 =================
  48. rerank_api_type: str = "openai"
  49. rerank_base_url: str = field(default_factory=lambda: os.getenv("RERANK_BASE_URL", "https://api.jina.ai/v1"))
  50. rerank_model: str = field(default_factory=lambda: os.getenv("RERANK_MODEL", "jina-reranker-v3"))
  51. rerank_api_key: str = field(default_factory=lambda: os.getenv("RERANK_API_KEY", ""))
  52. @property
  53. def rerank_url(self) -> str:
  54. return self.rerank_base_url
  55. # ================= 并发配置 =================
  56. embed_concurrency: int = 64
  57. rerank_concurrency: int = 32
  58. embed_batch_size: int = 64
  59. # ================= 超时配置 =================
  60. cold_start_timeout: int = 300
  61. normal_timeout: int = 180
  62. # ================= 检索配置 =================
  63. vector_top_k: int = 30
  64. bm25_top_k: int = 20
  65. rerank_top_n: int = 10
  66. rrf_k: int = 60
  67. vector_full_scan_max_vectors: int = 500
  68. vector_prefilter_bm25_candidates: int = 200
  69. vector_prefilter_recent_candidates: int = 200
  70. # ================= 实体提取配置 =================
  71. extraction_confidence_high: float = 0.8
  72. extraction_confidence_medium: float = 0.5
  73. # ================= 列表截断限制 =================
  74. max_disambiguation_warnings: int = 500
  75. max_disambiguation_pending: int = 1000
  76. max_state_changes: int = 2000
  77. context_recent_summaries_window: int = 5
  78. context_alerts_slice: int = 10
  79. context_max_appearing_characters: int = 10
  80. context_max_urgent_foreshadowing: int = 5
  81. export_recent_changes_slice: int = 20
  82. export_disambiguation_slice: int = 20
  83. # ================= 查询默认限制 =================
  84. query_recent_chapters_limit: int = 10
  85. query_scenes_by_location_limit: int = 20
  86. query_entity_appearances_limit: int = 50
  87. query_recent_appearances_limit: int = 20
  88. # ================= 伏笔紧急度 =================
  89. foreshadowing_urgency_pending_high: int = 100
  90. foreshadowing_urgency_pending_medium: int = 50
  91. foreshadowing_urgency_target_proximity: int = 5
  92. foreshadowing_urgency_score_high: int = 100
  93. foreshadowing_urgency_score_medium: int = 60
  94. foreshadowing_urgency_score_target: int = 80
  95. foreshadowing_urgency_score_low: int = 20
  96. foreshadowing_urgency_threshold_show: int = 60
  97. foreshadowing_tier_weight_core: float = 3.0
  98. foreshadowing_tier_weight_sub: float = 2.0
  99. foreshadowing_tier_weight_decor: float = 1.0
  100. # ================= 角色活跃度 =================
  101. character_absence_warning: int = 30
  102. character_absence_critical: int = 100
  103. character_candidates_limit: int = 800
  104. # ================= Strand Weave 节奏 =================
  105. strand_quest_max_consecutive: int = 5
  106. strand_fire_max_gap: int = 10
  107. strand_constellation_max_gap: int = 15
  108. strand_quest_ratio_min: int = 55
  109. strand_quest_ratio_max: int = 65
  110. strand_fire_ratio_min: int = 20
  111. strand_fire_ratio_max: int = 30
  112. strand_constellation_ratio_min: int = 10
  113. strand_constellation_ratio_max: int = 20
  114. # ================= 爽点节奏 =================
  115. pacing_segment_size: int = 100
  116. pacing_words_per_point_excellent: int = 1000
  117. pacing_words_per_point_good: int = 1500
  118. pacing_words_per_point_acceptable: int = 2000
  119. # ================= RAG 存储 =================
  120. @property
  121. def rag_db(self) -> Path:
  122. return self.webnovel_dir / "rag.db"
  123. @property
  124. def vector_db(self) -> Path:
  125. return self.webnovel_dir / "vectors.db"
  126. def ensure_dirs(self):
  127. self.webnovel_dir.mkdir(parents=True, exist_ok=True)
  128. @classmethod
  129. def from_project_root(cls, project_root: str | Path) -> "DataModulesConfig":
  130. return cls(project_root=Path(project_root))
  131. _default_config: Optional[DataModulesConfig] = None
  132. def get_config(project_root: Optional[Path] = None) -> DataModulesConfig:
  133. global _default_config
  134. if project_root is not None:
  135. return DataModulesConfig.from_project_root(project_root)
  136. if _default_config is None:
  137. _default_config = DataModulesConfig()
  138. return _default_config
  139. def set_project_root(project_root: str | Path):
  140. global _default_config
  141. _default_config = DataModulesConfig.from_project_root(project_root)