فهرست منبع

feat: project summaries and scenes to vectors safely

lingfengQAQ 3 هفته پیش
والد
کامیت
9907ae78a8

+ 37 - 0
webnovel-writer/scripts/data_modules/tests/test_vector_projection_writer.py

@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """VectorProjectionWriter 单元测试。"""
+import pytest
+
 from data_modules.vector_projection_writer import VectorProjectionWriter
 
 
@@ -123,3 +125,38 @@ def test_rejected_commit_returns_not_applied():
     writer.project_root = None
     result = writer.apply({"meta": {"status": "rejected", "chapter": 1}})
     assert result["applied"] is False
+
+
+def test_collect_chunks_includes_summary_and_scenes():
+    writer = VectorProjectionWriter.__new__(VectorProjectionWriter)
+    payload = {
+        "meta": {"chapter": 47, "status": "accepted"},
+        "summary_text": "韩立在坊市发现丹方线索。",
+        "scenes": [
+            {"index": 1, "summary": "韩立入坊市观察摊位", "location": "坊市"},
+            {"scene_index": 2, "content": "陈巧倩暗中提醒韩立有人跟踪。"},
+        ],
+        "accepted_events": [],
+        "entity_deltas": [],
+    }
+
+    chunks = writer._collect_chunks(payload)
+    by_type = {}
+    for chunk in chunks:
+        by_type.setdefault(chunk["chunk_type"], []).append(chunk)
+
+    assert by_type["summary"][0]["chunk_id"] == "ch0047_summary"
+    assert by_type["summary"][0]["parent_chunk_id"] is None
+    assert by_type["scene"][0]["parent_chunk_id"] == "ch0047_summary"
+    assert by_type["scene"][0]["content"].startswith("坊市:")
+    assert any(chunk["scene_index"] == 2 for chunk in by_type["scene"])
+
+
+@pytest.mark.asyncio
+async def test_run_store_coro_works_inside_active_event_loop():
+    writer = VectorProjectionWriter.__new__(VectorProjectionWriter)
+
+    async def store():
+        return 3
+
+    assert writer._run_store_coro(store()) == 3

+ 58 - 2
webnovel-writer/scripts/data_modules/vector_projection_writer.py

@@ -5,6 +5,8 @@ from __future__ import annotations
 import asyncio
 import hashlib
 import logging
+import threading
+from collections.abc import Coroutine
 from pathlib import Path
 from typing import Any, Dict, List
 
@@ -36,6 +38,19 @@ class VectorProjectionWriter:
 
         chunk_counts: Dict[str, int] = {}
 
+        summary_text = str(commit_payload.get("summary_text") or "").strip()
+        summary_chunk_id = f"ch{chapter:04d}_summary" if chapter > 0 else ""
+        if chapter > 0 and summary_text:
+            chunks.append({
+                "chunk_id": summary_chunk_id,
+                "chapter": chapter,
+                "scene_index": 0,
+                "content": summary_text,
+                "chunk_type": "summary",
+                "parent_chunk_id": None,
+                "source_file": f"commit:chapter_{chapter:03d}",
+            })
+
         for event in commit_payload.get("accepted_events") or []:
             if not isinstance(event, dict):
                 continue
@@ -72,6 +87,27 @@ class VectorProjectionWriter:
                     "source_file": f"commit:chapter_{d_chapter:03d}",
                 })
 
+        for idx, scene in enumerate(commit_payload.get("scenes") or [], start=1):
+            if not isinstance(scene, dict):
+                continue
+            scene_index = int(scene.get("scene_index") or scene.get("index") or idx)
+            text = str(scene.get("summary") or scene.get("content") or "").strip()
+            location = str(scene.get("location") or "").strip()
+            if location and text:
+                text = f"{location}:{text}"
+            if not text:
+                continue
+            chunk_id = self._chunk_id("scene", chapter, scene_index)
+            chunks.append({
+                "chunk_id": chunk_id,
+                "chapter": chapter,
+                "scene_index": scene_index,
+                "content": text,
+                "chunk_type": "scene",
+                "parent_chunk_id": summary_chunk_id or None,
+                "source_file": f"commit:chapter_{chapter:03d}",
+            })
+
         return chunks
 
     def _unique_chunk_id(
@@ -167,6 +203,27 @@ class VectorProjectionWriter:
             return f"第{chapter}章:实体变更——{canonical}"
         return ""
 
+    def _run_store_coro(self, coro: Coroutine[Any, Any, int]) -> int:
+        try:
+            asyncio.get_running_loop()
+        except RuntimeError:
+            return int(asyncio.run(coro) or 0)
+
+        result: Dict[str, Any] = {}
+
+        def runner() -> None:
+            try:
+                result["value"] = asyncio.run(coro)
+            except Exception as exc:
+                result["error"] = exc
+
+        thread = threading.Thread(target=runner, daemon=True)
+        thread.start()
+        thread.join()
+        if "error" in result:
+            raise result["error"]
+        return int(result.get("value") or 0)
+
     def _store_chunks(self, chunks: List[Dict[str, Any]]) -> int:
         from .config import DataModulesConfig
         from .rag_adapter import RAGAdapter
@@ -174,8 +231,7 @@ class VectorProjectionWriter:
         config = DataModulesConfig.from_project_root(self.project_root)
         adapter = RAGAdapter(config)
         try:
-            stored = asyncio.run(adapter.store_chunks(chunks))
-            return stored
+            return self._run_store_coro(adapter.store_chunks(chunks))
         except Exception as exc:
             logger.warning("vector_store_failed: %s", exc)
             return 0