| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Context ranker for Context Contract v2.
- Goals:
- - Prefer recency while keeping frequent entities stable.
- - Prioritize high-signal hook/alert items.
- - Keep output shape backward compatible (same keys, re-ordered lists).
- """
- from __future__ import annotations
- import math
- from typing import Any, Dict, List, Optional
- from .config import get_config
- class ContextRanker:
- """Rank context-pack sections with lightweight deterministic heuristics."""
- SUMMARY_HOOK_HINTS = ("?", "?", "悬念", "钩子", "反转", "冲突")
- def __init__(self, config=None):
- self.config = config or get_config()
- def rank_pack(self, pack: Dict[str, Any], chapter: int) -> Dict[str, Any]:
- ranked = dict(pack)
- core = dict(ranked.get("core") or {})
- core["recent_summaries"] = self.rank_recent_summaries(core.get("recent_summaries") or [], chapter)
- core["recent_meta"] = self.rank_recent_meta(core.get("recent_meta") or [], chapter)
- ranked["core"] = core
- scene = dict(ranked.get("scene") or {})
- scene["appearing_characters"] = self.rank_appearances(scene.get("appearing_characters") or [], chapter)
- ranked["scene"] = scene
- ranked["story_skeleton"] = self.rank_story_skeleton(ranked.get("story_skeleton") or [], chapter)
- alerts = dict(ranked.get("alerts") or {})
- alerts["disambiguation_warnings"] = self.rank_alerts(alerts.get("disambiguation_warnings") or [], chapter)
- alerts["disambiguation_pending"] = self.rank_alerts(alerts.get("disambiguation_pending") or [], chapter)
- ranked["alerts"] = alerts
- meta = dict(ranked.get("meta") or {})
- meta.setdefault("context_contract_version", "v2")
- meta["ranker"] = {
- "enabled": True,
- "recency_weight": float(self.config.context_ranker_recency_weight),
- "frequency_weight": float(self.config.context_ranker_frequency_weight),
- "hook_bonus": float(self.config.context_ranker_hook_bonus),
- }
- ranked["meta"] = meta
- return ranked
- def rank_recent_summaries(self, items: List[Dict[str, Any]], current_chapter: int) -> List[Dict[str, Any]]:
- scored = []
- for raw in items:
- item = dict(raw)
- chapter = self._as_int(item.get("chapter"))
- summary = str(item.get("summary") or "")
- recency = self._recency_score(chapter, current_chapter)
- frequency = self._length_score(summary)
- hook_bonus = float(self.config.context_ranker_hook_bonus) if self._has_hook_hint(summary) else 0.0
- score = self._combine_score(recency, frequency, hook_bonus)
- scored.append(self._with_debug_score(item, score, recency, frequency, hook_bonus))
- scored.sort(key=lambda row: row[0], reverse=True)
- return [row[1] for row in scored]
- def rank_recent_meta(self, items: List[Dict[str, Any]], current_chapter: int) -> List[Dict[str, Any]]:
- scored = []
- for raw in items:
- item = dict(raw)
- chapter = self._as_int(item.get("chapter"))
- hook = str(item.get("hook") or "")
- hook_bonus = float(self.config.context_ranker_hook_bonus) if hook else 0.0
- recency = self._recency_score(chapter, current_chapter)
- frequency = self._length_score(hook)
- score = self._combine_score(recency, frequency, hook_bonus)
- scored.append(self._with_debug_score(item, score, recency, frequency, hook_bonus))
- scored.sort(key=lambda row: row[0], reverse=True)
- return [row[1] for row in scored]
- def rank_appearances(self, items: List[Dict[str, Any]], current_chapter: int) -> List[Dict[str, Any]]:
- scored = []
- for raw in items:
- item = dict(raw)
- last_chapter = self._as_int(item.get("last_chapter") or item.get("chapter"))
- total = self._as_int(item.get("total")) or 0
- warning_penalty = 0.15 if item.get("warning") else 0.0
- recency = self._recency_score(last_chapter, current_chapter)
- frequency = self._frequency_score(total)
- score = self._combine_score(recency, frequency, 0.0) - warning_penalty
- scored.append(self._with_debug_score(item, score, recency, frequency, -warning_penalty))
- scored.sort(key=lambda row: row[0], reverse=True)
- return [row[1] for row in scored]
- def rank_story_skeleton(self, items: List[Dict[str, Any]], current_chapter: int) -> List[Dict[str, Any]]:
- scored = []
- for raw in items:
- item = dict(raw)
- chapter = self._as_int(item.get("chapter"))
- summary = str(item.get("summary") or "")
- recency = self._recency_score(chapter, current_chapter)
- frequency = self._length_score(summary)
- score = self._combine_score(recency, frequency, 0.0)
- scored.append(self._with_debug_score(item, score, recency, frequency, 0.0))
- scored.sort(key=lambda row: row[0], reverse=True)
- return [row[1] for row in scored]
- def rank_alerts(self, alerts: List[Any], current_chapter: int) -> List[Any]:
- scored = []
- keywords = tuple(self.config.context_ranker_alert_critical_keywords)
- for raw in alerts:
- if isinstance(raw, dict):
- item: Any = dict(raw)
- chapter = self._as_int(item.get("chapter"))
- text = str(item.get("message") or item.get("content") or json_safe(item))
- severity = str(item.get("severity") or "").lower()
- critical_bonus = 0.3 if severity in {"critical", "high"} else 0.0
- else:
- item = raw
- chapter = None
- text = str(raw)
- critical_bonus = 0.0
- recency = self._recency_score(chapter, current_chapter)
- keyword_bonus = 0.3 if any(word and word in text for word in keywords) else 0.0
- score = recency + critical_bonus + keyword_bonus
- if isinstance(item, dict):
- scored.append(self._with_debug_score(item, score, recency, critical_bonus, keyword_bonus))
- else:
- scored.append((score, item))
- scored.sort(key=lambda row: row[0], reverse=True)
- return [row[1] for row in scored]
- def _combine_score(self, recency: float, frequency: float, bonus: float) -> float:
- return (
- recency * float(self.config.context_ranker_recency_weight)
- + frequency * float(self.config.context_ranker_frequency_weight)
- + bonus
- )
- def _recency_score(self, source_chapter: Optional[int], current_chapter: int) -> float:
- if source_chapter is None:
- return 0.0
- gap = max(0, int(current_chapter) - int(source_chapter))
- return 1.0 / (1.0 + gap)
- def _frequency_score(self, total: int) -> float:
- if total <= 0:
- return 0.0
- # log scale to avoid over-favoring very frequent entities
- return min(1.0, math.log(1.0 + float(total)) / math.log(11.0))
- def _length_score(self, text: str) -> float:
- if not text:
- return 0.0
- ratio = min(len(text) / 1200.0, 1.0)
- cap = float(self.config.context_ranker_length_bonus_cap)
- return ratio * cap
- def _has_hook_hint(self, text: str) -> bool:
- return any(token in text for token in self.SUMMARY_HOOK_HINTS)
- def _as_int(self, value: Any) -> Optional[int]:
- if value is None:
- return None
- try:
- return int(value)
- except (TypeError, ValueError):
- return None
- def _with_debug_score(
- self,
- item: Dict[str, Any],
- score: float,
- recency: float,
- frequency: float,
- bonus: float,
- ) -> tuple[float, Dict[str, Any]]:
- if getattr(self.config, "context_ranker_debug", False):
- item["_context_score"] = round(score, 6)
- item["_context_score_detail"] = {
- "recency": round(recency, 6),
- "frequency": round(frequency, 6),
- "bonus": round(bonus, 6),
- }
- return score, item
- def json_safe(value: Any) -> str:
- try:
- import json
- return json.dumps(value, ensure_ascii=False)
- except Exception:
- return str(value)
|