Просмотр исходного кода

feat: add vector_projection_writer for event/entity embedding

lingfengQAQ 2 месяцев назад
Родитель
Сommit
29c8ac1f4a

+ 3 - 0
webnovel-writer/scripts/data_modules/chapter_commit_service.py

@@ -71,6 +71,7 @@ class ChapterCommitService:
                 "index": "pending",
                 "summary": "pending",
                 "memory": "pending",
+                "vector": "pending",
             },
         }
 
@@ -100,12 +101,14 @@ class ChapterCommitService:
         from .memory_projection_writer import MemoryProjectionWriter
         from .state_projection_writer import StateProjectionWriter
         from .summary_projection_writer import SummaryProjectionWriter
+        from .vector_projection_writer import VectorProjectionWriter
 
         writers = {
             "state": StateProjectionWriter(self.project_root),
             "index": IndexProjectionWriter(self.project_root),
             "summary": SummaryProjectionWriter(self.project_root),
             "memory": MemoryProjectionWriter(self.project_root),
+            "vector": VectorProjectionWriter(self.project_root),
         }
         required_writers = set(EventProjectionRouter().required_writers(payload))
         for name, writer in writers.items():

+ 67 - 0
webnovel-writer/scripts/data_modules/tests/test_vector_projection_writer.py

@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""VectorProjectionWriter 单元测试。"""
+from data_modules.vector_projection_writer import VectorProjectionWriter
+
+
+def test_event_to_text_formats_power_breakthrough():
+    writer = VectorProjectionWriter.__new__(VectorProjectionWriter)
+    event = {
+        "event_type": "power_breakthrough",
+        "chapter": 47,
+        "subject": "韩立",
+        "payload": {"field": "realm", "new": "筑基初期"},
+    }
+    text = writer._event_to_text(event)
+    assert "第47章" in text
+    assert "韩立" in text
+    assert "筑基初期" in text
+
+
+def test_delta_to_text_formats_relationship():
+    writer = VectorProjectionWriter.__new__(VectorProjectionWriter)
+    delta = {
+        "from_entity": "韩立",
+        "to_entity": "陈巧倩",
+        "relationship_type": "合作",
+        "chapter": 47,
+    }
+    text = writer._delta_to_text(delta)
+    assert "第47章" in text
+    assert "韩立" in text
+    assert "陈巧倩" in text
+    assert "合作" in text
+
+
+def test_collect_chunks_from_commit():
+    writer = VectorProjectionWriter.__new__(VectorProjectionWriter)
+    payload = {
+        "meta": {"chapter": 47, "status": "accepted"},
+        "accepted_events": [
+            {
+                "event_type": "power_breakthrough",
+                "chapter": 47,
+                "subject": "韩立",
+                "payload": {"field": "realm", "new": "筑基初期"},
+            },
+        ],
+        "entity_deltas": [
+            {
+                "from_entity": "韩立",
+                "to_entity": "陈巧倩",
+                "relationship_type": "合作",
+                "chapter": 47,
+            },
+        ],
+    }
+    chunks = writer._collect_chunks(payload)
+    assert len(chunks) == 2
+    assert chunks[0]["chunk_type"] == "event"
+    assert chunks[1]["chunk_type"] == "entity_delta"
+
+
+def test_rejected_commit_returns_not_applied():
+    writer = VectorProjectionWriter.__new__(VectorProjectionWriter)
+    writer.project_root = None
+    result = writer.apply({"meta": {"status": "rejected", "chapter": 1}})
+    assert result["applied"] is False

+ 121 - 0
webnovel-writer/scripts/data_modules/vector_projection_writer.py

@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import asyncio
+import logging
+from pathlib import Path
+from typing import Any, Dict, List
+
+logger = logging.getLogger(__name__)
+
+
+class VectorProjectionWriter:
+    def __init__(self, project_root: Path):
+        self.project_root = Path(project_root)
+
+    def apply(self, commit_payload: dict) -> dict:
+        if commit_payload["meta"]["status"] != "accepted":
+            return {"applied": False, "writer": "vector", "reason": "commit_rejected"}
+
+        chunks = self._collect_chunks(commit_payload)
+        if not chunks:
+            return {"applied": False, "writer": "vector", "reason": "no_chunks"}
+
+        try:
+            stored = self._store_chunks(chunks)
+            return {"applied": stored > 0, "writer": "vector", "stored": stored}
+        except Exception as exc:
+            logger.warning("vector_projection_failed: %s", exc)
+            return {"applied": False, "writer": "vector", "reason": f"error:{exc}"}
+
+    def _collect_chunks(self, commit_payload: dict) -> List[Dict[str, Any]]:
+        chunks: List[Dict[str, Any]] = []
+        chapter = int(commit_payload.get("meta", {}).get("chapter") or 0)
+
+        for event in commit_payload.get("accepted_events") or []:
+            if not isinstance(event, dict):
+                continue
+            text = self._event_to_text(event)
+            if text:
+                evt_chapter = int(event.get("chapter") or chapter)
+                chunks.append({
+                    "chapter": evt_chapter,
+                    "scene_index": 0,
+                    "content": text,
+                    "chunk_type": "event",
+                    "parent_chunk_id": f"ch{evt_chapter:04d}_summary",
+                    "source_file": f"commit:chapter_{evt_chapter:03d}",
+                })
+
+        for delta in commit_payload.get("entity_deltas") or []:
+            if not isinstance(delta, dict):
+                continue
+            text = self._delta_to_text(delta)
+            if text:
+                d_chapter = int(delta.get("chapter") or chapter)
+                chunks.append({
+                    "chapter": d_chapter,
+                    "scene_index": 0,
+                    "content": text,
+                    "chunk_type": "entity_delta",
+                    "parent_chunk_id": f"ch{d_chapter:04d}_summary",
+                    "source_file": f"commit:chapter_{d_chapter:03d}",
+                })
+
+        return chunks
+
+    def _event_to_text(self, event: dict) -> str:
+        chapter = int(event.get("chapter") or 0)
+        subject = str(event.get("subject") or "").strip()
+        event_type = str(event.get("event_type") or "").strip()
+        payload = event.get("payload") or {}
+
+        if event_type == "power_breakthrough":
+            new_val = str(payload.get("new") or payload.get("to") or "").strip()
+            return f"第{chapter}章:{subject}突破至{new_val}" if new_val else ""
+        elif event_type == "character_state_changed":
+            field = str(payload.get("field") or "").strip()
+            new_val = str(payload.get("new") or payload.get("to") or "").strip()
+            return f"第{chapter}章:{subject}的{field}变为{new_val}" if field and new_val else ""
+        elif event_type == "relationship_changed":
+            to_entity = str(payload.get("to_entity") or payload.get("to") or "").strip()
+            rel_type = str(payload.get("relationship_type") or payload.get("type") or "").strip()
+            return f"第{chapter}章:{subject}与{to_entity}关系变为{rel_type}" if to_entity else ""
+        elif event_type in ("world_rule_revealed", "world_rule_broken"):
+            desc = str(payload.get("description") or payload.get("rule") or "").strip()
+            action = "揭示" if "revealed" in event_type else "打破"
+            return f"第{chapter}章:{action}世界规则——{desc}" if desc else ""
+        elif event_type == "artifact_obtained":
+            name = str(payload.get("name") or subject or "").strip()
+            owner = str(payload.get("owner") or payload.get("holder") or "").strip()
+            return f"第{chapter}章:{owner}获得{name}" if owner else f"第{chapter}章:获得{name}"
+        return ""
+
+    def _delta_to_text(self, delta: dict) -> str:
+        chapter = int(delta.get("chapter") or 0)
+        from_e = str(delta.get("from_entity") or "").strip()
+        to_e = str(delta.get("to_entity") or "").strip()
+        rel = str(delta.get("relationship_type") or "").strip()
+
+        if from_e and to_e and rel:
+            return f"第{chapter}章:{from_e}与{to_e}关系变为{rel}"
+
+        entity_id = str(delta.get("entity_id") or "").strip()
+        canonical = str(delta.get("canonical_name") or entity_id).strip()
+        if entity_id:
+            return f"第{chapter}章:实体变更——{canonical}"
+        return ""
+
+    def _store_chunks(self, chunks: List[Dict[str, Any]]) -> int:
+        from .config import DataModulesConfig
+        from .rag_adapter import RAGAdapter
+
+        config = DataModulesConfig.from_project_root(self.project_root)
+        adapter = RAGAdapter(config)
+        try:
+            stored = asyncio.run(adapter.store_chunks(chunks))
+            return stored
+        except Exception as exc:
+            logger.warning("vector_store_failed: %s", exc)
+            return 0