Kaynağa Gözat

feat(genres): add taxonomy resolver and migrate search

lingfengQAQ 2 hafta önce
ebeveyn
işleme
d8c474452d

+ 57 - 0
webnovel-writer/references/taxonomy/genre-index.csv

@@ -0,0 +1,57 @@
+label,canonical_genre,label_type,template_file,route_tags,trope_tags,format_tags,aliases,notes
+都市,都市,canonical,,,,,,
+都市日常,都市,platform,都市日常.md,都市日常,,,,
+都市异能,都市,platform,都市异能.md,都市异能,,,都市修真;现代异能;超凡都市,
+高武,都市,platform,高武.md,高武,,,都市高武;全球高武,
+都市脑洞,都市,platform,都市脑洞.md,都市脑洞,,,都市奇闻,
+都市种田,都市,platform,,都市种田,,,,
+战神赘婿,都市,platform,,都市赘婿流,,,赘婿逆袭;上门女婿;废婿翻身;都市赘婿,
+现实题材,都市,preset,现实题材.md,,,,现实主义;现实向,
+直播文,都市,format,直播文.md,,,直播文,直播;直播带货;主播,
+黑暗题材,玄幻,trope,黑暗题材.md,,黑暗题材,,黑暗流;反派流,
+玄幻,玄幻,canonical,修仙.md,,,,玄幻修仙;修仙/玄幻,
+传统玄幻,玄幻,platform,,传统玄幻,,,,
+玄幻脑洞,玄幻,platform,,玄幻脑洞,,,,
+系统流,玄幻,trope,系统流.md,,系统流,,系统;系统文,
+多子多福,玄幻,trope,多子多福.md,,多子多福,,多子多福系统;后宫系统,
+学院流,玄幻,route,,学院流,,,入学考核;同届竞争;校园成长,
+仙侠,仙侠,canonical,修仙.md,传统修真,,,修仙;修真;传统修真;宗门流;东方仙侠,
+奇幻,奇幻,canonical,西幻.md,,,,西方奇幻;魔法;骑士,
+西幻,奇幻,platform,西幻.md,西幻冒险,,,西幻冒险,
+科幻,科幻,canonical,科幻.md,,,,赛博朋克,
+末世,科幻,platform,末世.md,末世求生,,,科幻末世;丧尸;废土;灾变生存;末世基地,
+历史,历史,canonical,,,,,,
+历史古代,历史,platform,历史古代.md,历史古代,,,古代历史;争霸权谋,
+历史脑洞,历史,platform,历史脑洞.md,历史脑洞,,,,
+抗战谍战,历史,platform,抗战谍战.md,抗战谍战,,,谍战;军事,
+穿越流,历史,route,,穿越流,,,穿越;穿书;异世重开;历史权谋;官场,
+武侠,历史,legacy,,,,,,
+悬疑,悬疑,canonical,,,,,,
+悬疑脑洞,悬疑,platform,悬疑脑洞.md,悬疑脑洞,,,,
+悬疑灵异,悬疑,platform,悬疑灵异.md,悬疑灵异,,,惊悚;灵异,
+女频悬疑,悬疑,platform,女频悬疑.md,女频悬疑,,,情感悬疑,
+规则怪谈,悬疑,route,规则怪谈.md,规则怪谈,,,规则动物园;规则怪谈动物园;规则类,
+克苏鲁,悬疑,preset,克苏鲁.md,克苏鲁诡秘,,,克系;克系悬疑;诡秘;不可名状,
+无限流,悬疑,route,无限流.md,副本流,,,无限副本;副本流;关卡挑战;多世界任务,
+悬疑推理,悬疑,route,,悬疑推理,,,刑侦;本格;探案;推理,
+游戏,游戏,canonical,,,,,,
+游戏体育,游戏,platform,游戏体育.md,游戏体育,,,网游;竞技;体育;体育竞技,
+电竞,游戏,platform,电竞.md,游戏电竞,,,电竞文;游戏电竞;电子竞技,
+古言,古言,canonical,古言.md,,,,古风世情;古言脑洞,
+宫斗宅斗,古言,platform,宫斗宅斗.md,宫斗宅斗,,,宫斗;宅斗;嫡庶,
+现言,现言,canonical,,,,,,
+现言脑洞,现言,platform,现言脑洞.md,现言脑洞,,,,
+青春甜宠,现言,platform,青春甜宠.md,青春甜宠,,,青春;校园;甜宠;校园甜文;轻甜,
+豪门总裁,现言,platform,豪门总裁.md,豪门总裁,,,豪门;总裁,
+职场婚恋,现言,platform,职场婚恋.md,职场婚恋,,,职场;办公室恋情,
+狗血言情,现言,trope,狗血言情.md,,狗血言情,,虐文;狗血,
+替身文,现言,trope,替身文.md,,替身文,,替身;白月光,
+知乎短篇,现言,format,知乎短篇.md,,,知乎短篇,知乎体;知乎盐选;第一人称短篇;小程序短篇,
+娱乐圈,现言,platform,,娱乐圈,,,明星文;星光璀璨;综艺文,
+幻言,幻言,canonical,幻想言情.md,,,,玄幻言情;幻想言情;仙侠言情;宿命恋,
+年代,年代,canonical,年代.md,,,,年代文;四合院;工厂,
+民国言情,年代,platform,民国言情.md,年代民国,,,年代民国;近现代,
+种田,种田,canonical,种田.md,种田经营,,,基建;经营;慢生活,
+快穿,快穿,canonical,,快穿任务,,,攻略系统;小世界;穿梭任务;任务者;原主,
+衍生,衍生,canonical,,,,,,
+同人衍生,衍生,platform,,同人衍生,,,同人;二创;动漫衍生;原作;OOC;轻小说;男频衍生;女频衍生,

+ 30 - 0
webnovel-writer/scripts/data_modules/tests/test_story_system_engine.py

@@ -8,6 +8,7 @@ from pathlib import Path
 import pytest
 
 from data_modules.story_system_engine import StorySystemEngine, StorySystemRoutingError
+from reference_search import GENRE_CANONICAL
 
 
 def _write_csv(path, headers, rows):
@@ -249,6 +250,35 @@ def test_story_system_routes_chinese_rules_mystery_to_canonical_suspense():
     assert route["route_source"] != "default_seed_fallback"
 
 
+def test_story_system_routes_every_real_route_row():
+    csv_dir = Path(__file__).resolve().parents[3] / "references" / "csv"
+    engine = StorySystemEngine(csv_dir=csv_dir)
+    route_rows = engine._load_csv_rows("题材与调性推理")
+
+    assert route_rows
+    for row in route_rows:
+        aliases = (
+            engine._split_multi_value(row.get("关键词"))
+            + engine._split_multi_value(row.get("意图与同义词"))
+            + engine._split_multi_value(row.get("题材别名"))
+        )
+        query = next((value for value in aliases if value), row.get("题材/流派") or row.get("canonical_genre") or "")
+        contract = engine.build(query=query, genre=None, chapter=None)
+        route = contract["master_setting"]["route"]
+
+        canonical = route["canonical_genre"]
+        assert canonical in GENRE_CANONICAL or canonical == "全部"
+        if canonical == "全部":
+            assert route["genre_filter"] == ""
+        else:
+            assert route["genre_filter"] == canonical
+        assert route["route_source"] in {
+            "keyword_or_alias_match",
+            "explicit_genre_fallback",
+            "inferred_genre_fallback",
+        }
+
+
 def test_story_system_rejects_english_explicit_genre_even_when_query_routes():
     csv_dir = Path(__file__).resolve().parents[3] / "references" / "csv"
 

+ 268 - 0
webnovel-writer/scripts/genre_taxonomy.py

@@ -0,0 +1,268 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""Shared genre taxonomy resolver.
+
+The taxonomy intentionally separates two namespaces:
+
+- canonical_genre: stable 15-value enum for CSV filtering and Story System.
+- template_files: init-only preset templates under templates/genres/.
+"""
+
+from __future__ import annotations
+
+import csv
+import re
+import unicodedata
+from dataclasses import dataclass, field
+from functools import lru_cache
+from pathlib import Path
+from typing import Iterable, Optional
+
+
+GENRE_CANONICAL: set[str] = {
+    "都市", "玄幻", "仙侠", "奇幻", "科幻",
+    "历史", "悬疑", "游戏", "古言", "现言",
+    "幻言", "年代", "种田", "快穿", "衍生",
+}
+
+_INPUT_SPLIT_RE = re.compile(r"[++/、,,|]+|与")
+_VALUE_SPLIT_RE = re.compile(r"[;;|]+")
+_HIGH_PRIORITY_TYPES = {"route", "platform", "canonical", "preset", "legacy"}
+_TYPE_PRIORITY = {
+    "route": 0,
+    "platform": 1,
+    "canonical": 2,
+    "preset": 3,
+    "legacy": 4,
+    "format": 5,
+    "trope": 6,
+}
+
+
+@dataclass(frozen=True)
+class GenreEntry:
+    label: str
+    canonical_genre: str
+    label_type: str
+    template_file: str = ""
+    route_tags: tuple[str, ...] = ()
+    trope_tags: tuple[str, ...] = ()
+    format_tags: tuple[str, ...] = ()
+    aliases: tuple[str, ...] = ()
+    notes: str = ""
+
+    @property
+    def lookup_labels(self) -> tuple[str, ...]:
+        return (self.label, *self.aliases)
+
+
+@dataclass
+class GenreResolution:
+    raw_label: str
+    canonical_genre: str = ""
+    matched_labels: list[str] = field(default_factory=list)
+    template_files: list[str] = field(default_factory=list)
+    route_tags: list[str] = field(default_factory=list)
+    trope_tags: list[str] = field(default_factory=list)
+    format_tags: list[str] = field(default_factory=list)
+    unresolved: list[str] = field(default_factory=list)
+    warnings: list[str] = field(default_factory=list)
+
+
+@dataclass(frozen=True)
+class GenreTaxonomy:
+    entries: tuple[GenreEntry, ...]
+    lookup: dict[str, GenreEntry]
+
+
+def default_taxonomy_path() -> Path:
+    return Path(__file__).resolve().parent.parent / "references" / "taxonomy" / "genre-index.csv"
+
+
+def _split_list(value: object) -> tuple[str, ...]:
+    text = str(value or "").strip()
+    if not text:
+        return ()
+    return tuple(part.strip() for part in _VALUE_SPLIT_RE.split(text) if part.strip())
+
+
+def _normalize_lookup_key(value: object) -> str:
+    text = unicodedata.normalize("NFKC", str(value or "")).strip().lower()
+    return re.sub(r"\s+", "", text)
+
+
+def _read_taxonomy(path: Path) -> tuple[GenreEntry, ...]:
+    with path.open("r", encoding="utf-8-sig", newline="") as f:
+        reader = csv.DictReader(f)
+        rows = list(reader)
+
+    entries: list[GenreEntry] = []
+    for line_no, row in enumerate(rows, start=2):
+        label = str(row.get("label") or "").strip()
+        canonical = str(row.get("canonical_genre") or "").strip()
+        if not label:
+            raise ValueError(f"{path}: line {line_no} missing label")
+        if canonical not in GENRE_CANONICAL and canonical != "全部":
+            raise ValueError(f"{path}: line {line_no} invalid canonical_genre {canonical!r}")
+        entries.append(
+            GenreEntry(
+                label=label,
+                canonical_genre=canonical,
+                label_type=str(row.get("label_type") or "").strip(),
+                template_file=str(row.get("template_file") or "").strip(),
+                route_tags=_split_list(row.get("route_tags")),
+                trope_tags=_split_list(row.get("trope_tags")),
+                format_tags=_split_list(row.get("format_tags")),
+                aliases=_split_list(row.get("aliases")),
+                notes=str(row.get("notes") or "").strip(),
+            )
+        )
+    return tuple(entries)
+
+
+def _build_lookup(entries: Iterable[GenreEntry]) -> dict[str, GenreEntry]:
+    lookup: dict[str, GenreEntry] = {}
+    for entry in entries:
+        seen_for_entry: set[str] = set()
+        for label in entry.lookup_labels:
+            key = _normalize_lookup_key(label)
+            if not key or key in seen_for_entry:
+                continue
+            seen_for_entry.add(key)
+            existing = lookup.get(key)
+            if existing is not None and existing != entry:
+                raise ValueError(
+                    f"genre taxonomy duplicate label/alias {label!r}: "
+                    f"{existing.label!r} vs {entry.label!r}"
+                )
+            lookup[key] = entry
+    return lookup
+
+
+@lru_cache(maxsize=8)
+def load_genre_taxonomy(index_path: Optional[str] = None) -> GenreTaxonomy:
+    path = Path(index_path) if index_path else default_taxonomy_path()
+    entries = _read_taxonomy(path)
+    return GenreTaxonomy(entries=entries, lookup=_build_lookup(entries))
+
+
+def split_genre_input(raw: str) -> list[str]:
+    text = str(raw or "").strip()
+    if not text:
+        return []
+    tokens = [part.strip() for part in _INPUT_SPLIT_RE.split(text) if part.strip()]
+    return tokens or [text]
+
+
+def _append_unique(values: list[str], additions: Iterable[str]) -> None:
+    seen = set(values)
+    for value in additions:
+        if value and value not in seen:
+            seen.add(value)
+            values.append(value)
+
+
+def _choose_canonical(entries: list[GenreEntry], warnings: list[str]) -> str:
+    if not entries:
+        return ""
+    high = [entry for entry in entries if entry.label_type in _HIGH_PRIORITY_TYPES]
+    candidates = high or entries
+    candidates = sorted(candidates, key=lambda entry: _TYPE_PRIORITY.get(entry.label_type, 99))
+    canonical = candidates[0].canonical_genre
+    high_canonicals = {entry.canonical_genre for entry in high if entry.canonical_genre != canonical}
+    if high_canonicals:
+        warnings.append("ambiguous_canonical")
+    return canonical
+
+
+def resolve_genre_input(raw_label: Optional[str], *, index_path: Optional[str] = None) -> GenreResolution:
+    raw = str(raw_label or "").strip()
+    resolution = GenreResolution(raw_label=raw)
+    if not raw:
+        return resolution
+    if raw == "全部":
+        resolution.canonical_genre = "全部"
+        resolution.matched_labels.append("全部")
+        return resolution
+
+    taxonomy = load_genre_taxonomy(index_path)
+    matched: list[GenreEntry] = []
+    matched_entry_ids: set[tuple[str, str, str]] = set()
+
+    def add_match(entry: GenreEntry, matched_label: str) -> None:
+        identity = (entry.label, entry.canonical_genre, entry.template_file)
+        if identity in matched_entry_ids:
+            return
+        matched_entry_ids.add(identity)
+        matched.append(entry)
+        resolution.matched_labels.append(matched_label)
+
+    raw_key = _normalize_lookup_key(raw)
+    exact = taxonomy.lookup.get(raw_key)
+    if exact is not None:
+        add_match(exact, raw)
+    else:
+        unresolved_tokens: list[str] = []
+        for token in split_genre_input(raw):
+            token_key = _normalize_lookup_key(token)
+            entry = taxonomy.lookup.get(token_key)
+            if entry is None:
+                unresolved_tokens.append(token)
+                continue
+            add_match(entry, token)
+
+        if not matched:
+            lookup_items = sorted(taxonomy.lookup.items(), key=lambda item: len(item[0]), reverse=True)
+            consumed: set[str] = set()
+            for key, entry in lookup_items:
+                if len(key) < 2 or key in consumed:
+                    continue
+                if key in raw_key:
+                    add_match(entry, entry.label)
+                    consumed.add(key)
+        if not matched:
+            resolution.unresolved = unresolved_tokens or [raw]
+
+    resolution.canonical_genre = _choose_canonical(matched, resolution.warnings)
+    for entry in matched:
+        _append_unique(resolution.route_tags, entry.route_tags)
+        _append_unique(resolution.trope_tags, entry.trope_tags)
+        _append_unique(resolution.format_tags, entry.format_tags)
+        if entry.template_file:
+            _append_unique(resolution.template_files, [entry.template_file])
+    return resolution
+
+
+def resolve_canonical_genre(genre: Optional[str], *, index_path: Optional[str] = None) -> Optional[str]:
+    if genre is None:
+        return None
+    raw = str(genre).strip()
+    if not raw:
+        return raw
+    resolved = resolve_genre_input(raw, index_path=index_path)
+    return resolved.canonical_genre or raw
+
+
+def resolve_template_files(genre: Optional[str], *, index_path: Optional[str] = None) -> list[str]:
+    return resolve_genre_input(genre, index_path=index_path).template_files
+
+
+def resolve_template_stems(genre: Optional[str], *, index_path: Optional[str] = None) -> list[str]:
+    stems: list[str] = []
+    for template_file in resolve_template_files(genre, index_path=index_path):
+        stem = Path(template_file).stem
+        if stem and stem not in stems:
+            stems.append(stem)
+    return stems
+
+
+def normalize_genre_label_for_profile(genre: str, *, index_path: Optional[str] = None) -> str:
+    raw = str(genre or "").strip()
+    if not raw:
+        return ""
+    resolved = resolve_genre_input(raw, index_path=index_path)
+    if resolved.template_files:
+        return Path(resolved.template_files[0]).stem
+    if resolved.matched_labels:
+        return resolved.matched_labels[0]
+    return raw

+ 5 - 57
webnovel-writer/scripts/reference_search.py

@@ -21,6 +21,8 @@ import sys
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
+from genre_taxonomy import GENRE_CANONICAL, resolve_canonical_genre
+
 
 # ---------------------------------------------------------------------------
 # CSV loading
@@ -107,54 +109,9 @@ def _table_visible_for_search(table_name: str, skill: str, explicit_table: bool)
 
 
 # ---------------------------------------------------------------------------
-# Genre canonical list & platform tag mapping
+# Genre canonical resolution
 # ---------------------------------------------------------------------------
 
-GENRE_CANONICAL: set[str] = {
-    "都市", "玄幻", "仙侠", "奇幻", "科幻",
-    "历史", "悬疑", "游戏", "古言", "现言",
-    "幻言", "年代", "种田", "快穿", "衍生",
-}
-
-PLATFORM_TO_CANONICAL: Dict[str, str] = {
-    # 男频
-    "都市日常": "都市", "都市修真": "都市", "都市高武": "都市",
-    "战神赘婿": "都市", "都市种田": "都市", "都市脑洞": "都市",
-    "传统玄幻": "玄幻", "玄幻脑洞": "玄幻",
-    "东方仙侠": "仙侠",
-    "西方奇幻": "奇幻",
-    "科幻末世": "科幻",
-    "历史古代": "历史", "历史脑洞": "历史", "抗战谍战": "历史",
-    "悬疑脑洞": "悬疑", "悬疑灵异": "悬疑",
-    "游戏体育": "游戏",
-    "动漫衍生": "衍生", "男频衍生": "衍生",
-    # 女频
-    "古风世情": "古言", "宫斗宅斗": "古言", "古言脑洞": "古言",
-    "现言脑洞": "现言", "青春甜宠": "现言", "星光璀璨": "现言",
-    "职场婚恋": "现言", "豪门总裁": "现言",
-    "玄幻言情": "幻言",
-    "年代": "年代", "民国言情": "年代",
-    "种田": "种田",
-    "快穿": "快穿",
-    "女频悬疑": "悬疑",
-    "女频衍生": "衍生",
-}
-
-# Legacy values that appeared in old CSV data → canonical mapping.
-# Used by resolve_genre() during the migration period.
-_LEGACY_GENRE_MAP: Dict[str, str] = {
-    "东方仙侠": "仙侠", "西方奇幻": "奇幻", "科幻末世": "科幻",
-    "都市日常": "都市", "都市修真": "都市", "都市高武": "都市",
-    "历史古代": "历史",
-    "谍战": "历史", "军事": "历史", "武侠": "历史",
-    "刑侦": "悬疑", "惊悚": "悬疑", "推理": "悬疑", "规则怪谈": "悬疑",
-    "末世": "科幻", "赛博朋克": "科幻",
-    "网游": "游戏", "电竞": "游戏", "竞技": "游戏", "体育": "游戏",
-    "轻小说": "衍生", "同人": "衍生",
-    "校园": "现言", "青春": "现言", "娱乐圈": "现言", "职场": "现言",
-    "高武": "都市",
-}
-
 
 def resolve_genre(genre: Optional[str]) -> Optional[str]:
     """Resolve a user-facing genre string to its canonical form.
@@ -164,14 +121,7 @@ def resolve_genre(genre: Optional[str]) -> Optional[str]:
     """
     if genre is None:
         return None
-    g = genre.strip()
-    if g in GENRE_CANONICAL or g == "全部":
-        return g
-    if g in PLATFORM_TO_CANONICAL:
-        return PLATFORM_TO_CANONICAL[g]
-    if g in _LEGACY_GENRE_MAP:
-        return _LEGACY_GENRE_MAP[g]
-    return g  # unresolvable — pass through
+    return resolve_canonical_genre(genre)
 
 
 # ---------------------------------------------------------------------------
@@ -423,8 +373,6 @@ def search(
 
     Returns a result dict suitable for JSON serialisation.
     """
-    resolved = resolve_genre(genre)
-
     if not csv_dir.is_dir():
         return {
             "status": "error",
@@ -454,7 +402,7 @@ def search(
         if not _table_visible_for_search(tbl_name, skill, explicit_table=table is not None):
             continue
         for row in rows:
-            if _skill_matches(row, skill) and _genre_matches(row, resolved):
+            if _skill_matches(row, skill) and _genre_matches(row, genre):
                 candidates.append((tbl_name, row))
 
     if not candidates:

+ 40 - 19
webnovel-writer/scripts/tests/test_reference_search.py

@@ -292,25 +292,28 @@ class TestGenreCanonical:
         }
         assert GENRE_CANONICAL == expected
 
-    def test_platform_to_canonical_maps_all_tags(self):
-        from reference_search import PLATFORM_TO_CANONICAL
-        # 34 unique tags (some tags like 科幻末世, 悬疑脑洞, 游戏体育 appear in both male/female)
-        assert len(PLATFORM_TO_CANONICAL) == 34
-        # Every value must be a canonical genre
-        from reference_search import GENRE_CANONICAL
-        for tag, canonical in PLATFORM_TO_CANONICAL.items():
-            assert canonical in GENRE_CANONICAL, f"{tag} -> {canonical} not in GENRE_CANONICAL"
-
-    def test_platform_to_canonical_spot_checks(self):
-        from reference_search import PLATFORM_TO_CANONICAL
-        assert PLATFORM_TO_CANONICAL["都市日常"] == "都市"
-        assert PLATFORM_TO_CANONICAL["战神赘婿"] == "都市"
-        assert PLATFORM_TO_CANONICAL["东方仙侠"] == "仙侠"
-        assert PLATFORM_TO_CANONICAL["西方奇幻"] == "奇幻"
-        assert PLATFORM_TO_CANONICAL["古风世情"] == "古言"
-        assert PLATFORM_TO_CANONICAL["豪门总裁"] == "现言"
-        assert PLATFORM_TO_CANONICAL["快穿"] == "快穿"
-        assert PLATFORM_TO_CANONICAL["科幻末世"] == "科幻"
+    def test_taxonomy_index_covers_genre_templates(self):
+        from genre_taxonomy import load_genre_taxonomy
+        templates_dir = Path(__file__).resolve().parents[2] / "templates" / "genres"
+        template_files = {path.name for path in templates_dir.glob("*.md")}
+        referenced = {
+            entry.template_file
+            for entry in load_genre_taxonomy().entries
+            if entry.template_file
+        }
+        assert len(template_files) == 37
+        assert template_files <= referenced
+
+    def test_taxonomy_spot_checks_platform_and_legacy_inputs(self):
+        from reference_search import resolve_genre
+        assert resolve_genre("都市日常") == "都市"
+        assert resolve_genre("战神赘婿") == "都市"
+        assert resolve_genre("东方仙侠") == "仙侠"
+        assert resolve_genre("西方奇幻") == "奇幻"
+        assert resolve_genre("古风世情") == "古言"
+        assert resolve_genre("豪门总裁") == "现言"
+        assert resolve_genre("快穿") == "快穿"
+        assert resolve_genre("科幻末世") == "科幻"
 
     def test_resolve_genre_canonical_passthrough(self):
         from reference_search import resolve_genre
@@ -330,6 +333,24 @@ class TestGenreCanonical:
         assert resolve_genre("刑侦") == "悬疑"
         assert resolve_genre("网游") == "游戏"
 
+    def test_resolve_genre_keeps_template_namespace_separate(self):
+        from genre_taxonomy import resolve_genre_input
+        xuanhuan = resolve_genre_input("玄幻")
+        assert xuanhuan.canonical_genre == "玄幻"
+        assert xuanhuan.template_files == ["修仙.md"]
+
+        xianxia = resolve_genre_input("修仙")
+        assert xianxia.canonical_genre == "仙侠"
+        assert xianxia.template_files == ["修仙.md"]
+
+    def test_resolve_composite_natural_language_genre(self):
+        from genre_taxonomy import resolve_genre_input
+        resolved = resolve_genre_input("知乎短篇风的规则怪谈")
+        assert resolved.canonical_genre == "悬疑"
+        assert resolved.template_files == ["规则怪谈.md", "知乎短篇.md"]
+        assert "规则怪谈" in resolved.route_tags
+        assert "知乎短篇" in resolved.format_tags
+
     def test_search_with_platform_tag_genre(self):
         """--genre 都市日常 should match rows with 适用题材=都市."""
         out = run_search(

+ 36 - 0
webnovel-writer/scripts/validate_csv.py

@@ -18,6 +18,7 @@ from typing import Any, Dict, List, Optional
 
 sys.path.insert(0, str(Path(__file__).resolve().parent))
 from reference_search import CSV_CONFIG, GENRE_CANONICAL, split_multi_value
+from genre_taxonomy import default_taxonomy_path, load_genre_taxonomy
 
 
 _CHINESE_COMMA_RE = re.compile(r",")
@@ -46,12 +47,47 @@ def _read_csv(path: Path) -> tuple[List[str], List[Dict[str, Any]]]:
     return headers, rows
 
 
+def _validate_genre_taxonomy(errors: List[str], warnings: List[str]) -> None:
+    taxonomy_path = default_taxonomy_path()
+    if not taxonomy_path.exists():
+        errors.append(f"[genre-index] 文件不存在: {taxonomy_path}")
+        return
+    try:
+        taxonomy = load_genre_taxonomy(str(taxonomy_path))
+    except Exception as exc:
+        errors.append(f"[genre-index] 加载失败: {exc}")
+        return
+
+    templates_dir = Path(__file__).resolve().parent.parent / "templates" / "genres"
+    template_files = {path.name for path in templates_dir.glob("*.md")}
+    referenced_files = {entry.template_file for entry in taxonomy.entries if entry.template_file}
+
+    missing_templates = sorted(referenced_files - template_files)
+    if missing_templates:
+        errors.append(f"[genre-index] template_file 不存在: {', '.join(missing_templates)}")
+
+    unreferenced_templates = sorted(template_files - referenced_files)
+    if unreferenced_templates:
+        errors.append(f"[genre-index] 模板未被 index 覆盖: {', '.join(unreferenced_templates)}")
+
+    for entry in taxonomy.entries:
+        if entry.template_file and not entry.template_file.endswith(".md"):
+            errors.append(f"[genre-index] {entry.label} template_file 应以 .md 结尾: {entry.template_file}")
+        if entry.canonical_genre != "全部" and entry.canonical_genre not in GENRE_CANONICAL:
+            errors.append(f"[genre-index] {entry.label} canonical_genre 不合法: {entry.canonical_genre}")
+
+    if len(template_files) != 37:
+        warnings.append(f"[genre-index] 当前模板数量为 {len(template_files)},预期 37")
+
+
 def validate(csv_dir: Path) -> Dict[str, List[str]]:
     errors: List[str] = []
     warnings: List[str] = []
     all_ids: Dict[str, str] = {}
     valid_genres = GENRE_CANONICAL | {"全部"}
 
+    _validate_genre_taxonomy(errors, warnings)
+
     for table_name, config in CSV_CONFIG.items():
         csv_path = csv_dir / config["file"]
         if not csv_path.exists():