haiany
/
webnovel-writer
espejo de https://github.com/lingfengQAQ/webnovel-writer.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Genre profile parsing helpers for ContextManager.
"""

from __future__ import annotations

import re
from typing import List

from .genre_aliases import normalize_genre_token


def parse_genre_tokens(
    genre_raw: str,
    *,
    support_composite: bool,
    separators: tuple[str, ...],
) -> List[str]:
    text = str(genre_raw or "").strip()
    if not text:
        return []

    if not support_composite:
        normalized_single = normalize_genre_token(text)
        return [normalized_single] if normalized_single else [text]

    pattern = "|".join(re.escape(str(token)) for token in separators if str(token))
    if not pattern:
        normalized_single = normalize_genre_token(text)
        return [normalized_single] if normalized_single else [text]

    tokens = [chunk.strip() for chunk in re.split(pattern, text) if chunk and chunk.strip()]
    deduped: List[str] = []
    seen = set()
    for token in tokens:
        normalized_token = normalize_genre_token(token)
        if not normalized_token:
            continue
        lower = normalized_token.lower()
        if lower in seen:
            continue
        seen.add(lower)
        deduped.append(normalized_token)
    if deduped:
        return deduped

    fallback_token = normalize_genre_token(text)
    return [fallback_token] if fallback_token else [text]


def extract_genre_section(text: str, genre: str) -> str:
    if not text:
        return ""
    lines = text.splitlines()
    capture: List[str] = []
    active = False
    target = genre.strip().lower()

    for line in lines:
        normalized = line.strip().lower()
        if normalized.startswith("## ") or normalized.startswith("### "):
            if active:
                break
            active = target in normalized
            if active:
                capture.append(line)
            continue
        if active:
            capture.append(line)

    if capture:
        return "\n".join(capture).strip()

    return "\n".join(lines[:80]).strip()


def extract_markdown_refs(text: str, max_items: int = 8) -> List[str]:
    if not text:
        return []
    refs: List[str] = []
    for line in text.splitlines():
        row = line.strip().lstrip("-*").strip()
        if not row or row.startswith("#"):
            continue
        refs.append(row)
        if len(refs) >= max(1, max_items):
            break
    return refs


def build_composite_genre_hints(genres: List[str], refs: List[str]) -> List[str]:
    if len(genres) <= 1:
        return []

    primary = genres[0]
    secondaries = genres[1:]
    hints: List[str] = []
    hints.append(
        f"以“{primary}”作为主引擎推进主线，每章至少保留1处“{'/'.join(secondaries)}”特征表达。"
    )
    if refs:
        hints.append(f"复合题材执行参考：{refs[0]}")
    hints.append("主辅题材冲突时，优先保证主题材读者承诺，辅题材用于制造新鲜感。")
    return hints