Browse Source

fix: support composite genre filters

lingfengQAQ 1 month ago
parent
commit
ebb49d7713

+ 42 - 9
webnovel-writer/scripts/data_modules/story_system_engine.py

@@ -7,7 +7,13 @@ import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
-from reference_search import CSV_CONFIG, GENRE_CANONICAL, resolve_genre, search as search_reference
+from reference_search import (
+    CSV_CONFIG,
+    GENRE_CANONICAL,
+    resolve_genre,
+    search as search_reference,
+    split_multi_value,
+)
 
 from .story_contracts import merge_anti_patterns
 
@@ -88,7 +94,7 @@ class StorySystemEngine:
         canonical_genre = str(route.get("meta", {}).get("canonical_genre", "") or "").strip()
         reasoning = self._load_reasoning(canonical_genre)
         if not reasoning and genre:
-            fallback_genre = resolve_genre(genre) or genre
+            fallback_genre = self._primary_resolved_genre(genre) or genre
             if fallback_genre != canonical_genre:
                 reasoning = self._load_reasoning(fallback_genre)
         ranked = self._apply_reasoning(reasoning, base_context, dynamic_context, chapter_directive)
@@ -184,11 +190,11 @@ class StorySystemEngine:
             raise self._routing_error(query=query, genre=genre, route_rows=route_rows)
 
         primary_genre = str(matched.get("题材/流派") or genre or "").strip()
-        explicit_canonical = resolve_genre(genre)
+        explicit_canonical = self._primary_resolved_genre(genre)
         canonical_genre = str(matched.get("canonical_genre") or "").strip()
         row_canonicals = [
             resolved
-            for raw in self._split_multi_value(matched.get("适用题材"))
+            for raw in self._split_genre_value(matched.get("适用题材"))
             for resolved in [resolve_genre(raw) or str(raw or "").strip()]
             if resolved and resolved != "全部"
         ]
@@ -301,6 +307,24 @@ class StorySystemEngine:
     def _split_multi_value(self, raw: Any) -> List[str]:
         return [item.strip() for item in re.split(r"[|;;]+", str(raw or "")) if item.strip()]
 
+    def _split_genre_value(self, raw: Any) -> List[str]:
+        return split_multi_value(raw)
+
+    def _resolve_genre_values(self, raw: Any) -> List[str]:
+        return [
+            resolved
+            for token in self._split_genre_value(raw)
+            for resolved in [resolve_genre(token)]
+            if resolved
+        ]
+
+    def _primary_resolved_genre(self, raw: Any) -> Optional[str]:
+        resolved_values = self._resolve_genre_values(raw)
+        for value in resolved_values:
+            if value in GENRE_CANONICAL or value == "全部":
+                return value
+        return resolved_values[0] if resolved_values else None
+
     def _expand_query(self, query: str, default_query: str, chapter_query: str = "") -> str:
         items: List[str] = []
         for candidate in [query, chapter_query, *self._split_multi_value(default_query)]:
@@ -323,14 +347,23 @@ class StorySystemEngine:
         return " ".join(parts)
 
     def _fallback_row_for_genre(self, rows: List[Dict[str, Any]], genre: str) -> Dict[str, Any] | None:
-        genre_text = self._normalize_text(resolve_genre(genre) or genre)
+        genre_texts = {
+            self._normalize_text(value)
+            for value in self._resolve_genre_values(genre)
+            if value
+        }
         for row in rows:
             candidates = (
-                self._split_multi_value(row.get("适用题材"))
-                + self._split_multi_value(row.get("题材/流派"))
-                + self._split_multi_value(row.get("canonical_genre"))
+                self._split_genre_value(row.get("适用题材"))
+                + self._split_genre_value(row.get("题材/流派"))
+                + self._split_genre_value(row.get("canonical_genre"))
             )
-            if any(self._normalize_text(candidate) == genre_text for candidate in candidates):
+            resolved_candidates = {
+                self._normalize_text(resolve_genre(candidate) or candidate)
+                for candidate in candidates
+                if candidate
+            }
+            if genre_texts.intersection(resolved_candidates):
                 return row
         return None
 

+ 50 - 0
webnovel-writer/scripts/data_modules/tests/test_story_system_engine.py

@@ -673,3 +673,53 @@ def test_story_system_reference_matching_combines_priority_and_chapter_keywords(
     assert [row["编号"] for row in selected[:2]] == ["FIN-001", "TR-001"]
     trace_by_id = {row["id"]: row for row in contract["chapter_brief"]["source_trace"]}
     assert trace_by_id["FIN-001"]["combined_rank_score"] > trace_by_id["TR-001"]["combined_rank_score"]
+
+
+def test_story_system_composite_platform_genre_filters_dynamic_context():
+    csv_dir = _make_local_tmp_path() / "csv"
+    csv_dir.mkdir()
+    _write_csv(
+        csv_dir / "题材与调性推理.csv",
+        [
+            "编号", "适用技能", "分类", "层级", "关键词", "意图与同义词", "适用题材",
+            "大模型指令", "核心摘要", "详细展开", "题材/流派", "canonical_genre", "题材别名", "核心调性",
+            "节奏策略", "毒点", "推荐基础检索表", "推荐动态检索表", "默认查询词",
+        ],
+        [
+            {
+                "编号": "GR-CITY", "适用技能": "story-system", "分类": "题材路由", "层级": "知识补充",
+                "关键词": "", "意图与同义词": "", "适用题材": "都市", "大模型指令": "",
+                "核心摘要": "", "详细展开": "", "题材/流派": "都市脑洞", "canonical_genre": "都市",
+                "题材别名": "", "核心调性": "", "节奏策略": "", "毒点": "",
+                "推荐基础检索表": "", "推荐动态检索表": "场景写法", "默认查询词": "",
+            }
+        ],
+    )
+    _write_csv(
+        csv_dir / "场景写法.csv",
+        ["编号", "适用技能", "分类", "层级", "关键词", "适用题材", "适用场景", "核心摘要", "毒点"],
+        [
+            {
+                "编号": "FOOD-001", "适用技能": "write", "分类": "场景", "层级": "知识补充",
+                "关键词": "美食|店铺", "适用题材": "都市", "适用场景": "深夜食堂",
+                "核心摘要": "都市美食场景要写烟火气。", "毒点": "",
+            },
+            {
+                "编号": "ELIXIR-001", "适用技能": "write", "分类": "场景", "层级": "知识补充",
+                "关键词": "美食|丹药", "适用题材": "仙侠", "适用场景": "炼丹",
+                "核心摘要": "仙侠炼丹场景强调灵材。", "毒点": "",
+            },
+        ],
+    )
+
+    contract = StorySystemEngine(csv_dir).build(
+        query="美食 店铺",
+        genre="都市脑洞,美食,甜宠",
+        chapter=1,
+    )
+
+    route = contract["master_setting"]["route"]
+    assert route["canonical_genre"] == "都市"
+    assert route["route_source"] == "explicit_genre_fallback"
+    selected_ids = [row["编号"] for row in contract["chapter_brief"]["dynamic_context"]]
+    assert selected_ids == ["FOOD-001"]

+ 14 - 5
webnovel-writer/scripts/reference_search.py

@@ -57,15 +57,19 @@ def load_tables(csv_dir: Path, table: Optional[str] = None) -> Dict[str, List[Di
 # Filtering
 # ---------------------------------------------------------------------------
 
-_MULTI_VALUE_SPLIT_RE = re.compile(r"[|,,]+")
+_MULTI_VALUE_SPLIT_RE = re.compile(r"[|,,、;;]+")
 _INTERNAL_TABLE_ROLES = {"route", "reasoning"}
 
 
-def _split_multi_value(cell: str) -> List[str]:
+def split_multi_value(cell: Any) -> List[str]:
     """Split list-like cells while remaining compatible with legacy comma data."""
     if not cell:
         return []
-    return [part.strip() for part in _MULTI_VALUE_SPLIT_RE.split(cell) if part.strip()]
+    return [part.strip() for part in _MULTI_VALUE_SPLIT_RE.split(str(cell)) if part.strip()]
+
+
+def _split_multi_value(cell: Any) -> List[str]:
+    return split_multi_value(cell)
 
 
 def _skill_matches(row: Dict[str, str], skill: str) -> bool:
@@ -84,9 +88,14 @@ def _genre_matches(row: Dict[str, str], genre: Optional[str]) -> bool:
     cell = row.get("适用题材", "")
     if cell.strip() == "全部":
         return True
-    resolved_genre = resolve_genre(genre)
+    requested_genres = [
+        resolved
+        for raw in _split_multi_value(genre)
+        for resolved in [resolve_genre(raw)]
+        if resolved
+    ]
     cell_genres = [resolve_genre(v) for v in _split_multi_value(cell)]
-    return resolved_genre in cell_genres
+    return any(resolved in cell_genres for resolved in requested_genres)
 
 
 def _table_visible_for_search(table_name: str, skill: str, explicit_table: bool) -> bool:

+ 10 - 0
webnovel-writer/scripts/tests/test_reference_search.py

@@ -187,6 +187,16 @@ class TestSkillAndGenreFiltering:
         ids = [r["编号"] for r in out["data"]["results"]]
         assert "TS-001" in ids
 
+        out = run_search(
+            "--csv-dir", str(temp_dir),
+            "--skill", "write",
+            "--table", "兼容测试",
+            "--query", "旧格式查询",
+            "--genre", "东方仙侠,都市脑洞",
+        )
+        ids = [r["编号"] for r in out["data"]["results"]]
+        assert "TS-001" in ids
+
 
 class TestErrorHandling:
     """Test error cases."""

+ 2 - 5
webnovel-writer/scripts/validate_csv.py

@@ -17,10 +17,9 @@ from typing import Any, Dict, List, Optional
 
 
 sys.path.insert(0, str(Path(__file__).resolve().parent))
-from reference_search import CSV_CONFIG, GENRE_CANONICAL
+from reference_search import CSV_CONFIG, GENRE_CANONICAL, split_multi_value
 
 
-_MULTI_SPLIT_RE = re.compile(r"[|,,]+")
 _CHINESE_COMMA_RE = re.compile(r",")
 _MULTI_VALUE_COLUMNS = ("适用技能", "关键词", "意图与同义词", "适用题材")
 _ROUTE_TABLE = "题材与调性推理"
@@ -32,9 +31,7 @@ _VALID_LEVELS = {"提醒", "缺陷补偿", "知识补充"}
 
 
 def _split_multi_value(cell: str) -> List[str]:
-    if not cell:
-        return []
-    return [part.strip() for part in _MULTI_SPLIT_RE.split(cell) if part.strip()]
+    return split_multi_value(cell)
 
 
 def _default_csv_dir() -> Path: