Преглед на файлове

feat: add GENRE_CANONICAL constants and resolve_genre() to reference_search

lingfengQAQ преди 2 месеца
родител
ревизия
a0880cf03d
променени са 2 файла, в които са добавени 100 реда и са изтрити 0 реда
  1. 68 0
      webnovel-writer/scripts/reference_search.py
  2. 32 0
      webnovel-writer/scripts/tests/test_reference_search.py

+ 68 - 0
webnovel-writer/scripts/reference_search.py

@@ -82,6 +82,74 @@ def _genre_matches(row: Dict[str, str], genre: Optional[str]) -> bool:
     return genre in _split_multi_value(cell)
     return genre in _split_multi_value(cell)
 
 
 
 
+# ---------------------------------------------------------------------------
+# Genre canonical list & platform tag mapping
+# ---------------------------------------------------------------------------
+
+GENRE_CANONICAL: set[str] = {
+    "都市", "玄幻", "仙侠", "奇幻", "科幻",
+    "历史", "悬疑", "游戏", "古言", "现言",
+    "幻言", "年代", "种田", "快穿", "衍生",
+}
+
+PLATFORM_TO_CANONICAL: Dict[str, str] = {
+    # 男频
+    "都市日常": "都市", "都市修真": "都市", "都市高武": "都市",
+    "战神赘婿": "都市", "都市种田": "都市", "都市脑洞": "都市",
+    "传统玄幻": "玄幻", "玄幻脑洞": "玄幻",
+    "东方仙侠": "仙侠",
+    "西方奇幻": "奇幻",
+    "科幻末世": "科幻",
+    "历史古代": "历史", "历史脑洞": "历史", "抗战谍战": "历史",
+    "悬疑脑洞": "悬疑", "悬疑灵异": "悬疑",
+    "游戏体育": "游戏",
+    "动漫衍生": "衍生", "男频衍生": "衍生",
+    # 女频
+    "古风世情": "古言", "宫斗宅斗": "古言", "古言脑洞": "古言",
+    "现言脑洞": "现言", "青春甜宠": "现言", "星光璀璨": "现言",
+    "职场婚恋": "现言", "豪门总裁": "现言",
+    "玄幻言情": "幻言",
+    "年代": "年代", "民国言情": "年代",
+    "种田": "种田",
+    "快穿": "快穿",
+    "女频悬疑": "悬疑",
+    "女频衍生": "衍生",
+}
+
+# Legacy values that appeared in old CSV data → canonical mapping.
+# Used by resolve_genre() during the migration period.
+_LEGACY_GENRE_MAP: Dict[str, str] = {
+    "东方仙侠": "仙侠", "西方奇幻": "奇幻", "科幻末世": "科幻",
+    "都市日常": "都市", "都市修真": "都市", "都市高武": "都市",
+    "历史古代": "历史",
+    "谍战": "历史", "军事": "历史", "武侠": "历史",
+    "刑侦": "悬疑", "惊悚": "悬疑", "推理": "悬疑", "规则怪谈": "悬疑",
+    "末世": "科幻", "赛博朋克": "科幻",
+    "网游": "游戏", "电竞": "游戏", "竞技": "游戏", "体育": "游戏",
+    "轻小说": "衍生", "同人": "衍生",
+    "校园": "现言", "青春": "现言", "娱乐圈": "现言", "职场": "现言",
+    "高武": "都市",
+}
+
+
+def resolve_genre(genre: Optional[str]) -> Optional[str]:
+    """Resolve a user-facing genre string to its canonical form.
+
+    Accepts canonical genres, platform tags, and legacy values.
+    Returns the canonical genre string, or the original input if unresolvable.
+    """
+    if genre is None:
+        return None
+    g = genre.strip()
+    if g in GENRE_CANONICAL or g == "全部":
+        return g
+    if g in PLATFORM_TO_CANONICAL:
+        return PLATFORM_TO_CANONICAL[g]
+    if g in _LEGACY_GENRE_MAP:
+        return _LEGACY_GENRE_MAP[g]
+    return g  # unresolvable — pass through
+
+
 # ---------------------------------------------------------------------------
 # ---------------------------------------------------------------------------
 # CSV_CONFIG – per-table metadata registry
 # CSV_CONFIG – per-table metadata registry
 # ---------------------------------------------------------------------------
 # ---------------------------------------------------------------------------

+ 32 - 0
webnovel-writer/scripts/tests/test_reference_search.py

@@ -251,3 +251,35 @@ class TestPerTableSearchCols:
         assert out2["status"] == "success"
         assert out2["status"] == "success"
         assert out1["data"]["total"] >= 1
         assert out1["data"]["total"] >= 1
         assert out2["data"]["total"] >= 1
         assert out2["data"]["total"] >= 1
+
+
+class TestGenreCanonical:
+    def test_canonical_genres_has_15_entries(self):
+        from reference_search import GENRE_CANONICAL
+        assert len(GENRE_CANONICAL) == 15
+        expected = {
+            "都市", "玄幻", "仙侠", "奇幻", "科幻",
+            "历史", "悬疑", "游戏", "古言", "现言",
+            "幻言", "年代", "种田", "快穿", "衍生",
+        }
+        assert GENRE_CANONICAL == expected
+
+    def test_platform_to_canonical_maps_all_tags(self):
+        from reference_search import PLATFORM_TO_CANONICAL
+        # 34 unique tags (some tags like 科幻末世, 悬疑脑洞, 游戏体育 appear in both male/female)
+        assert len(PLATFORM_TO_CANONICAL) == 34
+        # Every value must be a canonical genre
+        from reference_search import GENRE_CANONICAL
+        for tag, canonical in PLATFORM_TO_CANONICAL.items():
+            assert canonical in GENRE_CANONICAL, f"{tag} -> {canonical} not in GENRE_CANONICAL"
+
+    def test_platform_to_canonical_spot_checks(self):
+        from reference_search import PLATFORM_TO_CANONICAL
+        assert PLATFORM_TO_CANONICAL["都市日常"] == "都市"
+        assert PLATFORM_TO_CANONICAL["战神赘婿"] == "都市"
+        assert PLATFORM_TO_CANONICAL["东方仙侠"] == "仙侠"
+        assert PLATFORM_TO_CANONICAL["西方奇幻"] == "奇幻"
+        assert PLATFORM_TO_CANONICAL["古风世情"] == "古言"
+        assert PLATFORM_TO_CANONICAL["豪门总裁"] == "现言"
+        assert PLATFORM_TO_CANONICAL["快穿"] == "快穿"
+        assert PLATFORM_TO_CANONICAL["科幻末世"] == "科幻"