context_ranker.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Context ranker for Context Contract v2.
  5. Goals:
  6. - Prefer recency while keeping frequent entities stable.
  7. - Prioritize high-signal hook/alert items.
  8. - Keep output shape backward compatible (same keys, re-ordered lists).
  9. """
  10. from __future__ import annotations
  11. import math
  12. from typing import Any, Dict, List, Optional
  13. from .config import get_config
  14. class ContextRanker:
  15. """Rank context-pack sections with lightweight deterministic heuristics."""
  16. SUMMARY_HOOK_HINTS = ("?", "?", "悬念", "钩子", "反转", "冲突")
  17. def __init__(self, config=None):
  18. self.config = config or get_config()
  19. def rank_pack(self, pack: Dict[str, Any], chapter: int) -> Dict[str, Any]:
  20. ranked = dict(pack)
  21. core = dict(ranked.get("core") or {})
  22. core["recent_summaries"] = self.rank_recent_summaries(core.get("recent_summaries") or [], chapter)
  23. core["recent_meta"] = self.rank_recent_meta(core.get("recent_meta") or [], chapter)
  24. ranked["core"] = core
  25. scene = dict(ranked.get("scene") or {})
  26. scene["appearing_characters"] = self.rank_appearances(scene.get("appearing_characters") or [], chapter)
  27. ranked["scene"] = scene
  28. ranked["story_skeleton"] = self.rank_story_skeleton(ranked.get("story_skeleton") or [], chapter)
  29. alerts = dict(ranked.get("alerts") or {})
  30. alerts["disambiguation_warnings"] = self.rank_alerts(alerts.get("disambiguation_warnings") or [], chapter)
  31. alerts["disambiguation_pending"] = self.rank_alerts(alerts.get("disambiguation_pending") or [], chapter)
  32. ranked["alerts"] = alerts
  33. meta = dict(ranked.get("meta") or {})
  34. meta.setdefault("context_contract_version", "v2")
  35. meta["ranker"] = {
  36. "enabled": True,
  37. "recency_weight": float(self.config.context_ranker_recency_weight),
  38. "frequency_weight": float(self.config.context_ranker_frequency_weight),
  39. "hook_bonus": float(self.config.context_ranker_hook_bonus),
  40. }
  41. ranked["meta"] = meta
  42. return ranked
  43. def rank_recent_summaries(self, items: List[Dict[str, Any]], current_chapter: int) -> List[Dict[str, Any]]:
  44. scored = []
  45. for raw in items:
  46. item = dict(raw)
  47. chapter = self._as_int(item.get("chapter"))
  48. summary = str(item.get("summary") or "")
  49. recency = self._recency_score(chapter, current_chapter)
  50. frequency = self._length_score(summary)
  51. hook_bonus = float(self.config.context_ranker_hook_bonus) if self._has_hook_hint(summary) else 0.0
  52. score = self._combine_score(recency, frequency, hook_bonus)
  53. scored.append(self._with_debug_score(item, score, recency, frequency, hook_bonus))
  54. scored.sort(key=lambda row: row[0], reverse=True)
  55. return [row[1] for row in scored]
  56. def rank_recent_meta(self, items: List[Dict[str, Any]], current_chapter: int) -> List[Dict[str, Any]]:
  57. scored = []
  58. for raw in items:
  59. item = dict(raw)
  60. chapter = self._as_int(item.get("chapter"))
  61. hook = str(item.get("hook") or "")
  62. hook_bonus = float(self.config.context_ranker_hook_bonus) if hook else 0.0
  63. recency = self._recency_score(chapter, current_chapter)
  64. frequency = self._length_score(hook)
  65. score = self._combine_score(recency, frequency, hook_bonus)
  66. scored.append(self._with_debug_score(item, score, recency, frequency, hook_bonus))
  67. scored.sort(key=lambda row: row[0], reverse=True)
  68. return [row[1] for row in scored]
  69. def rank_appearances(self, items: List[Dict[str, Any]], current_chapter: int) -> List[Dict[str, Any]]:
  70. scored = []
  71. for raw in items:
  72. item = dict(raw)
  73. last_chapter = self._as_int(item.get("last_chapter") or item.get("chapter"))
  74. total = self._as_int(item.get("total")) or 0
  75. warning_penalty = 0.15 if item.get("warning") else 0.0
  76. recency = self._recency_score(last_chapter, current_chapter)
  77. frequency = self._frequency_score(total)
  78. score = self._combine_score(recency, frequency, 0.0) - warning_penalty
  79. scored.append(self._with_debug_score(item, score, recency, frequency, -warning_penalty))
  80. scored.sort(key=lambda row: row[0], reverse=True)
  81. return [row[1] for row in scored]
  82. def rank_story_skeleton(self, items: List[Dict[str, Any]], current_chapter: int) -> List[Dict[str, Any]]:
  83. scored = []
  84. for raw in items:
  85. item = dict(raw)
  86. chapter = self._as_int(item.get("chapter"))
  87. summary = str(item.get("summary") or "")
  88. recency = self._recency_score(chapter, current_chapter)
  89. frequency = self._length_score(summary)
  90. score = self._combine_score(recency, frequency, 0.0)
  91. scored.append(self._with_debug_score(item, score, recency, frequency, 0.0))
  92. scored.sort(key=lambda row: row[0], reverse=True)
  93. return [row[1] for row in scored]
  94. def rank_alerts(self, alerts: List[Any], current_chapter: int) -> List[Any]:
  95. scored = []
  96. keywords = tuple(self.config.context_ranker_alert_critical_keywords)
  97. for raw in alerts:
  98. if isinstance(raw, dict):
  99. item: Any = dict(raw)
  100. chapter = self._as_int(item.get("chapter"))
  101. text = str(item.get("message") or item.get("content") or json_safe(item))
  102. severity = str(item.get("severity") or "").lower()
  103. critical_bonus = 0.3 if severity in {"critical", "high"} else 0.0
  104. else:
  105. item = raw
  106. chapter = None
  107. text = str(raw)
  108. critical_bonus = 0.0
  109. recency = self._recency_score(chapter, current_chapter)
  110. keyword_bonus = 0.3 if any(word and word in text for word in keywords) else 0.0
  111. score = recency + critical_bonus + keyword_bonus
  112. if isinstance(item, dict):
  113. scored.append(self._with_debug_score(item, score, recency, critical_bonus, keyword_bonus))
  114. else:
  115. scored.append((score, item))
  116. scored.sort(key=lambda row: row[0], reverse=True)
  117. return [row[1] for row in scored]
  118. def _combine_score(self, recency: float, frequency: float, bonus: float) -> float:
  119. return (
  120. recency * float(self.config.context_ranker_recency_weight)
  121. + frequency * float(self.config.context_ranker_frequency_weight)
  122. + bonus
  123. )
  124. def _recency_score(self, source_chapter: Optional[int], current_chapter: int) -> float:
  125. if source_chapter is None:
  126. return 0.0
  127. gap = max(0, int(current_chapter) - int(source_chapter))
  128. return 1.0 / (1.0 + gap)
  129. def _frequency_score(self, total: int) -> float:
  130. if total <= 0:
  131. return 0.0
  132. # log scale to avoid over-favoring very frequent entities
  133. return min(1.0, math.log(1.0 + float(total)) / math.log(11.0))
  134. def _length_score(self, text: str) -> float:
  135. if not text:
  136. return 0.0
  137. ratio = min(len(text) / 1200.0, 1.0)
  138. cap = float(self.config.context_ranker_length_bonus_cap)
  139. return ratio * cap
  140. def _has_hook_hint(self, text: str) -> bool:
  141. return any(token in text for token in self.SUMMARY_HOOK_HINTS)
  142. def _as_int(self, value: Any) -> Optional[int]:
  143. if value is None:
  144. return None
  145. try:
  146. return int(value)
  147. except (TypeError, ValueError):
  148. return None
  149. def _with_debug_score(
  150. self,
  151. item: Dict[str, Any],
  152. score: float,
  153. recency: float,
  154. frequency: float,
  155. bonus: float,
  156. ) -> tuple[float, Dict[str, Any]]:
  157. if getattr(self.config, "context_ranker_debug", False):
  158. item["_context_score"] = round(score, 6)
  159. item["_context_score_detail"] = {
  160. "recency": round(recency, 6),
  161. "frequency": round(frequency, 6),
  162. "bonus": round(bonus, 6),
  163. }
  164. return score, item
  165. def json_safe(value: Any) -> str:
  166. try:
  167. import json
  168. return json.dumps(value, ensure_ascii=False)
  169. except Exception:
  170. return str(value)