| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Genre profile parsing helpers for ContextManager.
- """
- from __future__ import annotations
- import re
- from typing import List
- from .genre_aliases import normalize_genre_token
- def parse_genre_tokens(
- genre_raw: str,
- *,
- support_composite: bool,
- separators: tuple[str, ...],
- ) -> List[str]:
- text = str(genre_raw or "").strip()
- if not text:
- return []
- if not support_composite:
- normalized_single = normalize_genre_token(text)
- return [normalized_single] if normalized_single else [text]
- pattern = "|".join(re.escape(str(token)) for token in separators if str(token))
- if not pattern:
- normalized_single = normalize_genre_token(text)
- return [normalized_single] if normalized_single else [text]
- tokens = [chunk.strip() for chunk in re.split(pattern, text) if chunk and chunk.strip()]
- deduped: List[str] = []
- seen = set()
- for token in tokens:
- normalized_token = normalize_genre_token(token)
- if not normalized_token:
- continue
- lower = normalized_token.lower()
- if lower in seen:
- continue
- seen.add(lower)
- deduped.append(normalized_token)
- if deduped:
- return deduped
- fallback_token = normalize_genre_token(text)
- return [fallback_token] if fallback_token else [text]
- def extract_genre_section(text: str, genre: str) -> str:
- if not text:
- return ""
- lines = text.splitlines()
- capture: List[str] = []
- active = False
- target = genre.strip().lower()
- for line in lines:
- normalized = line.strip().lower()
- if normalized.startswith("## ") or normalized.startswith("### "):
- if active:
- break
- active = target in normalized
- if active:
- capture.append(line)
- continue
- if active:
- capture.append(line)
- if capture:
- return "\n".join(capture).strip()
- return "\n".join(lines[:80]).strip()
- def extract_markdown_refs(text: str, max_items: int = 8) -> List[str]:
- if not text:
- return []
- refs: List[str] = []
- for line in text.splitlines():
- row = line.strip().lstrip("-*").strip()
- if not row or row.startswith("#"):
- continue
- refs.append(row)
- if len(refs) >= max(1, max_items):
- break
- return refs
- def build_composite_genre_hints(genres: List[str], refs: List[str]) -> List[str]:
- if len(genres) <= 1:
- return []
- primary = genres[0]
- secondaries = genres[1:]
- hints: List[str] = []
- hints.append(
- f"以“{primary}”作为主引擎推进主线,每章至少保留1处“{'/'.join(secondaries)}”特征表达。"
- )
- if refs:
- hints.append(f"复合题材执行参考:{refs[0]}")
- hints.append("主辅题材冲突时,优先保证主题材读者承诺,辅题材用于制造新鲜感。")
- return hints
|