Просмотр исходного кода

feat: add runtime state validator for update and reporting

lingfengQAQ 4 месяцев назад
Родитель
Сommit
782a694e15

+ 249 - 0
.claude/scripts/data_modules/state_validator.py

@@ -0,0 +1,249 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Runtime validators/normalizers for state.json sections.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Any, Dict, List, Mapping, Optional, Sequence
+
+
+FORESHADOWING_STATUS_PENDING = "未回收"
+FORESHADOWING_STATUS_RESOLVED = "已回收"
+
+FORESHADOWING_TIER_CORE = "核心"
+FORESHADOWING_TIER_SUB = "支线"
+FORESHADOWING_TIER_DECOR = "装饰"
+
+FORESHADOWING_PLANTED_KEYS = [
+    "planted_chapter",
+    "added_chapter",
+    "source_chapter",
+    "start_chapter",
+    "chapter",
+]
+
+FORESHADOWING_TARGET_KEYS = [
+    "target_chapter",
+    "due_chapter",
+    "deadline_chapter",
+    "resolve_by_chapter",
+    "target",
+]
+
+_PENDING_STATUS_TEXT = {"未回收", "待回收", "进行中", "未解决", "pending", "active"}
+_RESOLVED_STATUS_TEXT = {"已回收", "已完成", "已解决", "完成", "resolved", "done", "complete"}
+
+_TIER_CORE_TEXT = {"核心", "主线", "core", "main"}
+_TIER_DECOR_TEXT = {"装饰", "次要", "decor", "decoration"}
+
+_PATTERN_FIELDS = [
+    "coolpoint_patterns",
+    "coolpoint_pattern",
+    "cool_point_patterns",
+    "cool_point_pattern",
+    "patterns",
+    "pattern",
+]
+
+_PATTERN_SPLIT_RE = re.compile(r"[、,,/|+;;。]+")
+
+
+def to_positive_int(value: Any) -> Optional[int]:
+    if value is None or isinstance(value, bool):
+        return None
+
+    try:
+        number = int(value)
+        return number if number > 0 else None
+    except (TypeError, ValueError):
+        if isinstance(value, str):
+            matched = re.search(r"\d+", value)
+            if matched:
+                number = int(matched.group(0))
+                return number if number > 0 else None
+    return None
+
+
+def resolve_chapter_field(item: Mapping[str, Any], keys: Sequence[str]) -> Optional[int]:
+    for key in keys:
+        if key in item:
+            chapter = to_positive_int(item.get(key))
+            if chapter is not None:
+                return chapter
+    return None
+
+
+def normalize_foreshadowing_status(
+    raw_status: Any,
+    default: str = FORESHADOWING_STATUS_PENDING,
+) -> str:
+    text = str(raw_status or "").strip()
+    if not text:
+        return default
+
+    text_lower = text.lower()
+    if (
+        text in _RESOLVED_STATUS_TEXT
+        or text_lower in _RESOLVED_STATUS_TEXT
+        or FORESHADOWING_STATUS_RESOLVED in text
+    ):
+        return FORESHADOWING_STATUS_RESOLVED
+
+    if text in _PENDING_STATUS_TEXT or text_lower in _PENDING_STATUS_TEXT:
+        return FORESHADOWING_STATUS_PENDING
+
+    return default
+
+
+def is_resolved_foreshadowing_status(raw_status: Any) -> bool:
+    return normalize_foreshadowing_status(raw_status) == FORESHADOWING_STATUS_RESOLVED
+
+
+def normalize_foreshadowing_tier(
+    raw_tier: Any,
+    default: str = FORESHADOWING_TIER_SUB,
+) -> str:
+    text = str(raw_tier or "").strip()
+    if not text:
+        return default
+
+    text_lower = text.lower()
+    if text in _TIER_CORE_TEXT or text_lower in _TIER_CORE_TEXT:
+        return FORESHADOWING_TIER_CORE
+    if text in _TIER_DECOR_TEXT or text_lower in _TIER_DECOR_TEXT:
+        return FORESHADOWING_TIER_DECOR
+    return default
+
+
+def split_patterns(raw_value: Any) -> List[str]:
+    if raw_value is None:
+        return []
+
+    tokens: List[str] = []
+    if isinstance(raw_value, list):
+        for item in raw_value:
+            text = str(item).strip()
+            if text:
+                tokens.append(text)
+    elif isinstance(raw_value, str):
+        text = raw_value.strip()
+        if not text:
+            return []
+        split_values = [part.strip() for part in _PATTERN_SPLIT_RE.split(text)]
+        tokens.extend([part for part in split_values if part])
+    else:
+        return []
+
+    deduped: List[str] = []
+    seen = set()
+    for token in tokens:
+        if token not in seen:
+            seen.add(token)
+            deduped.append(token)
+    return deduped
+
+
+def count_patterns(raw_value: Any) -> Optional[int]:
+    patterns = split_patterns(raw_value)
+    if not patterns:
+        return None
+    return len(patterns)
+
+
+def normalize_foreshadowing_item(item: Mapping[str, Any]) -> Dict[str, Any]:
+    normalized = dict(item)
+
+    normalized["status"] = normalize_foreshadowing_status(item.get("status"))
+    normalized["tier"] = normalize_foreshadowing_tier(item.get("tier"))
+
+    content = str(item.get("content") or "").strip()
+    if content:
+        normalized["content"] = content
+
+    planted_chapter = resolve_chapter_field(item, FORESHADOWING_PLANTED_KEYS)
+    if planted_chapter is not None:
+        normalized["planted_chapter"] = planted_chapter
+
+    target_chapter = resolve_chapter_field(item, FORESHADOWING_TARGET_KEYS)
+    if target_chapter is not None:
+        normalized["target_chapter"] = target_chapter
+
+    resolved_chapter = resolve_chapter_field(item, ["resolved_chapter", "resolved_at_chapter", "resolved"])
+    if resolved_chapter is not None:
+        normalized["resolved_chapter"] = resolved_chapter
+
+    return normalized
+
+
+def normalize_foreshadowing_list(raw_items: Any) -> List[Dict[str, Any]]:
+    if not isinstance(raw_items, list):
+        return []
+
+    normalized: List[Dict[str, Any]] = []
+    for raw_item in raw_items:
+        if isinstance(raw_item, Mapping):
+            normalized.append(normalize_foreshadowing_item(raw_item))
+    return normalized
+
+
+def normalize_chapter_meta_entry(entry: Mapping[str, Any]) -> Dict[str, Any]:
+    normalized = dict(entry)
+
+    merged_patterns: List[str] = []
+    seen = set()
+    for field_name in _PATTERN_FIELDS:
+        for pattern in split_patterns(entry.get(field_name)):
+            if pattern not in seen:
+                seen.add(pattern)
+                merged_patterns.append(pattern)
+
+    if merged_patterns:
+        normalized["coolpoint_patterns"] = merged_patterns
+
+    return normalized
+
+
+def normalize_chapter_meta(raw_chapter_meta: Any) -> Dict[str, Dict[str, Any]]:
+    if not isinstance(raw_chapter_meta, Mapping):
+        return {}
+
+    normalized: Dict[str, Dict[str, Any]] = {}
+    for chapter_key, chapter_entry in raw_chapter_meta.items():
+        if isinstance(chapter_entry, Mapping):
+            normalized[str(chapter_key)] = normalize_chapter_meta_entry(chapter_entry)
+    return normalized
+
+
+def get_chapter_meta_entry(state: Mapping[str, Any], chapter: int) -> Dict[str, Any]:
+    chapter_meta = state.get("chapter_meta", {})
+    if not isinstance(chapter_meta, Mapping):
+        return {}
+
+    for lookup_key in (f"{chapter:04d}", str(chapter)):
+        value = chapter_meta.get(lookup_key)
+        if isinstance(value, Mapping):
+            return normalize_chapter_meta_entry(value)
+
+    for raw_key, raw_value in chapter_meta.items():
+        if to_positive_int(raw_key) == chapter and isinstance(raw_value, Mapping):
+            return normalize_chapter_meta_entry(raw_value)
+
+    return {}
+
+
+def normalize_state_runtime_sections(state: Dict[str, Any]) -> Dict[str, Any]:
+    if not isinstance(state, dict):
+        return {}
+
+    plot_threads = state.get("plot_threads")
+    if not isinstance(plot_threads, dict):
+        plot_threads = {}
+        state["plot_threads"] = plot_threads
+    plot_threads["foreshadowing"] = normalize_foreshadowing_list(plot_threads.get("foreshadowing"))
+
+    state["chapter_meta"] = normalize_chapter_meta(state.get("chapter_meta", {}))
+    return state
+

+ 107 - 0
.claude/scripts/data_modules/tests/test_state_validator.py

@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from data_modules.state_validator import (
+    FORESHADOWING_STATUS_PENDING,
+    FORESHADOWING_STATUS_RESOLVED,
+    FORESHADOWING_TIER_CORE,
+    FORESHADOWING_TIER_DECOR,
+    FORESHADOWING_TIER_SUB,
+    count_patterns,
+    get_chapter_meta_entry,
+    is_resolved_foreshadowing_status,
+    normalize_chapter_meta,
+    normalize_foreshadowing_item,
+    normalize_foreshadowing_status,
+    normalize_foreshadowing_tier,
+    normalize_state_runtime_sections,
+    resolve_chapter_field,
+    split_patterns,
+    to_positive_int,
+)
+
+
+def test_to_positive_int_and_resolve_chapter_field():
+    assert to_positive_int(12) == 12
+    assert to_positive_int("ch-18") == 18
+    assert to_positive_int(0) is None
+    assert to_positive_int("no number") is None
+
+    item = {"added_chapter": "第15章", "target": "200"}
+    assert resolve_chapter_field(item, ["planted_chapter", "added_chapter"]) == 15
+    assert resolve_chapter_field(item, ["target_chapter", "target"]) == 200
+
+
+def test_status_and_tier_normalization():
+    assert normalize_foreshadowing_status("pending") == FORESHADOWING_STATUS_PENDING
+    assert normalize_foreshadowing_status("resolved") == FORESHADOWING_STATUS_RESOLVED
+    assert normalize_foreshadowing_status("") == FORESHADOWING_STATUS_PENDING
+    assert is_resolved_foreshadowing_status("已回收") is True
+    assert is_resolved_foreshadowing_status("active") is False
+
+    assert normalize_foreshadowing_tier("core") == FORESHADOWING_TIER_CORE
+    assert normalize_foreshadowing_tier("decoration") == FORESHADOWING_TIER_DECOR
+    assert normalize_foreshadowing_tier("unknown") == FORESHADOWING_TIER_SUB
+
+
+def test_pattern_split_and_count():
+    assert split_patterns(["A", " A ", "B", ""]) == ["A", "B"]
+    assert split_patterns("A, B / C|A") == ["A", "B", "C"]
+    assert count_patterns("A,B,C") == 3
+    assert count_patterns(123) is None
+
+
+def test_normalize_foreshadowing_item_and_chapter_meta_entry():
+    item = {
+        "content": "  遗迹钥匙  ",
+        "status": "pending",
+        "tier": "main",
+        "added_chapter": "第30章",
+        "target": "120",
+    }
+    normalized_item = normalize_foreshadowing_item(item)
+    assert normalized_item["content"] == "遗迹钥匙"
+    assert normalized_item["status"] == FORESHADOWING_STATUS_PENDING
+    assert normalized_item["tier"] == FORESHADOWING_TIER_CORE
+    assert normalized_item["planted_chapter"] == 30
+    assert normalized_item["target_chapter"] == 120
+
+    state = {
+        "chapter_meta": {
+            "0003": {"coolpoint_pattern": "反杀, 掉马"},
+            "7": {"patterns": ["翻车", "反杀"]},
+        }
+    }
+    meta3 = get_chapter_meta_entry(state, 3)
+    assert meta3["coolpoint_patterns"] == ["反杀", "掉马"]
+
+    meta7 = get_chapter_meta_entry(state, 7)
+    assert meta7["coolpoint_patterns"] == ["翻车", "反杀"]
+
+
+def test_normalize_state_runtime_sections():
+    state = {
+        "plot_threads": {
+            "foreshadowing": [
+                {"content": "伏笔A", "status": "active", "tier": "decor", "chapter": 11, "target": 99},
+                "invalid",
+            ]
+        },
+        "chapter_meta": {
+            1: {"cool_point_pattern": "打脸|翻车"},
+            "bad": "invalid",
+        },
+    }
+
+    normalized = normalize_state_runtime_sections(state)
+    assert len(normalized["plot_threads"]["foreshadowing"]) == 1
+    first = normalized["plot_threads"]["foreshadowing"][0]
+    assert first["status"] == FORESHADOWING_STATUS_PENDING
+    assert first["tier"] == FORESHADOWING_TIER_DECOR
+    assert first["planted_chapter"] == 11
+    assert first["target_chapter"] == 99
+
+    chapter_meta = normalize_chapter_meta(normalized["chapter_meta"])
+    assert "1" in chapter_meta
+    assert chapter_meta["1"]["coolpoint_patterns"] == ["打脸", "翻车"]
+

+ 26 - 47
.claude/scripts/status_reporter.py

@@ -93,27 +93,29 @@ from chapter_paths import extract_chapter_num_from_filename
 try:
     from data_modules.config import get_config, DataModulesConfig
     from data_modules.index_manager import IndexManager
+    from data_modules.state_validator import (
+        get_chapter_meta_entry,
+        is_resolved_foreshadowing_status,
+        normalize_foreshadowing_tier,
+        normalize_state_runtime_sections,
+        resolve_chapter_field,
+        to_positive_int,
+    )
 except ImportError:
     from scripts.data_modules.config import get_config, DataModulesConfig
     from scripts.data_modules.index_manager import IndexManager
+    from scripts.data_modules.state_validator import (
+        get_chapter_meta_entry,
+        is_resolved_foreshadowing_status,
+        normalize_foreshadowing_tier,
+        normalize_state_runtime_sections,
+        resolve_chapter_field,
+        to_positive_int,
+    )
 
 def _is_resolved_foreshadowing_status(raw_status: Any) -> bool:
     """判断伏笔是否已回收(兼容历史字段与同义词)。"""
-    if raw_status is None:
-        return False
-
-    status = str(raw_status).strip()
-    if not status:
-        return False
-
-    status_lower = status.lower()
-    if status in {"已回收", "已完成", "已解决", "完成"}:
-        return True
-    if status_lower in {"resolved", "done", "complete"}:
-        return True
-    if "已回收" in status:
-        return True
-    return False
+    return is_resolved_foreshadowing_status(raw_status)
 
 def _enable_windows_utf8_stdio() -> None:
     """在 Windows 下启用 UTF-8 输出;pytest 环境跳过以避免捕获冲突。"""
@@ -172,43 +174,28 @@ class StatusReporter:
         with open(self.state_file, 'r', encoding='utf-8') as f:
             self.state = json.load(f)
 
+        if isinstance(self.state, dict):
+            self.state = normalize_state_runtime_sections(self.state)
+
         return True
 
     def _to_positive_int(self, value: Any) -> Optional[int]:
         """将输入解析为正整数;解析失败返回 None。"""
-        if value is None or isinstance(value, bool):
-            return None
-
-        try:
-            number = int(value)
-            return number if number > 0 else None
-        except (TypeError, ValueError):
-            if isinstance(value, str):
-                match = re.search(r"\d+", value)
-                if match:
-                    number = int(match.group(0))
-                    return number if number > 0 else None
-        return None
+        return to_positive_int(value)
 
     def _normalize_foreshadowing_tier(self, raw_tier: Any) -> Tuple[str, float]:
         """标准化伏笔层级并返回对应权重。"""
-        text = str(raw_tier or "").strip()
-        lower = text.lower()
+        tier = normalize_foreshadowing_tier(raw_tier)
 
-        if text in {"核心", "主线"} or lower in {"core", "main"}:
+        if tier == "核心":
             return "核心", self.config.foreshadowing_tier_weight_core
-        if text in {"装饰", "次要"} or lower in {"decor", "decoration"}:
+        if tier == "装饰":
             return "装饰", self.config.foreshadowing_tier_weight_decor
         return "支线", self.config.foreshadowing_tier_weight_sub
 
     def _resolve_chapter_field(self, item: Dict[str, Any], keys: List[str]) -> Optional[int]:
         """按候选键顺序读取章节号。"""
-        for key in keys:
-            if key in item:
-                chapter = self._to_positive_int(item.get(key))
-                if chapter is not None:
-                    return chapter
-        return None
+        return resolve_chapter_field(item, keys)
 
     def _collect_foreshadowing_records(self) -> List[Dict[str, Any]]:
         """收集未回收伏笔,并基于真实字段构建分析记录。"""
@@ -311,15 +298,7 @@ class StatusReporter:
         """读取指定章节的 chapter_meta(支持 0001/1 两种键)。"""
         if not self.state:
             return {}
-        chapter_meta = self.state.get("chapter_meta", {})
-        if not isinstance(chapter_meta, dict):
-            return {}
-
-        for key in (f"{chapter:04d}", str(chapter)):
-            value = chapter_meta.get(key)
-            if isinstance(value, dict):
-                return value
-        return {}
+        return get_chapter_meta_entry(self.state, chapter)
 
     def _parse_pattern_count(self, raw_value: Any) -> Optional[int]:
         """解析爽点模式数量,解析失败返回 None。"""

+ 7 - 12
.claude/scripts/update_state.py

@@ -57,6 +57,10 @@ from typing import Dict, Any, Optional
 # ============================================================================
 from security_utils import create_secure_directory, atomic_write_json, restore_from_backup
 from project_locator import resolve_state_file
+from data_modules.state_validator import (
+    normalize_foreshadowing_status,
+    normalize_state_runtime_sections,
+)
 
 # Windows 编码兼容性修复
 if sys.platform == 'win32':
@@ -129,6 +133,7 @@ class StateUpdater:
             tracker.setdefault("chapters_since_switch", 0)
             tracker.setdefault("history", [])
 
+        normalize_state_runtime_sections(state)
         return True
 
     def load(self) -> bool:
@@ -261,18 +266,7 @@ class StateUpdater:
                 return
 
         # 归一化状态,避免 "待回收/进行中/active/pending" 等混用导致下游过滤漏掉
-        raw_status = "" if status is None else str(status).strip()
-        raw_status_lower = raw_status.lower()
-        if raw_status in {"已回收", "已完成", "已解决", "完成"} or raw_status_lower in {"resolved", "done", "complete"}:
-            status = "已回收"
-        elif (
-            raw_status in {"未回收", "待回收", "进行中", "未解决"}
-            or raw_status_lower in {"active", "pending"}
-            or not raw_status
-        ):
-            status = "未回收"
-        else:
-            status = "未回收"
+        status = normalize_foreshadowing_status(status)
 
         planted_chapter = int(self.state.get("progress", {}).get("current_chapter", 0) or 0)
         if planted_chapter <= 0:
@@ -302,6 +296,7 @@ class StateUpdater:
                 item["status"] = "已回收"
                 item["resolved_chapter"] = chapter
                 item["resolved_at"] = datetime.now().strftime("%Y-%m-%d")
+                normalize_state_runtime_sections(self.state)
                 print(f"📝 回收伏笔: {content}(第{chapter}章)")
                 return