Sfoglia il codice sorgente

fix: 修复review发现的关键问题

- snapshot_manager: version check移入lock内,防止竞态;catch JSONDecodeError
- store.py: upsert_item/mark_status加FileLock保护read-modify-write原子性
- writer.py: ID哈希从SHA1/12字符升级为SHA256/16字符降低碰撞风险
- writer.py: timeline subject统一截断至64字符,与其他category一致
- schemas.py: normalize_data_agent_output操作副本,不再修改调用方原始dict
- index_manager: debt_events FK加ON DELETE CASCADE防止孤儿记录
- test: CLI测试确保state.json存在以通过project root校验
lingfengQAQ 2 mesi fa
parent
commit
2e465d97f1

+ 1 - 1
webnovel-writer/scripts/data_modules/index_manager.py

@@ -499,7 +499,7 @@ class IndexManager(IndexChapterMixin, IndexEntityMixin, IndexDebtMixin, IndexRea
                     chapter INTEGER NOT NULL,
                     note TEXT,
                     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                    FOREIGN KEY (debt_id) REFERENCES chase_debt(id)
+                    FOREIGN KEY (debt_id) REFERENCES chase_debt(id) ON DELETE CASCADE
                 )
             """)
 

+ 41 - 36
webnovel-writer/scripts/data_modules/memory/store.py

@@ -27,11 +27,14 @@ try:
 except ImportError:  # pragma: no cover
     from scripts.security_utils import atomic_write_json, read_json_safe
 
+from filelock import FileLock
+
 
 class ScratchpadManager:
     def __init__(self, config: DataModulesConfig | None = None):
         self.config = config or get_config()
         self.path = Path(self.config.scratchpad_file)
+        self._lock = FileLock(str(self.path) + ".lock", timeout=30)
 
     def load(self) -> ScratchpadData:
         if not self.path.exists():
@@ -41,7 +44,7 @@ class ScratchpadManager:
             return ScratchpadData.empty()
         return ScratchpadData.from_dict(payload)
 
-    def save(self, data: ScratchpadData) -> None:
+    def save(self, data: ScratchpadData, _use_lock: bool = True) -> None:
         self.config.ensure_dirs()
         if bool(getattr(self.config, "memory_compactor_enabled", True)):
             threshold = max(1, int(getattr(self.config, "memory_compactor_threshold", 500)))
@@ -53,7 +56,7 @@ class ScratchpadManager:
         payload.setdefault("meta", {})
         payload["meta"]["last_updated"] = now_iso()
         payload["meta"]["total_items"] = data.count_items()
-        atomic_write_json(self.path, payload, use_lock=True, backup=True)
+        atomic_write_json(self.path, payload, use_lock=_use_lock, backup=True)
 
     def _key_for(self, item: MemoryItem) -> tuple[Any, ...]:
         rule = CATEGORY_KEY_RULES.get(item.category)
@@ -66,31 +69,32 @@ class ScratchpadManager:
 
     def upsert_item(self, item: MemoryItem) -> Dict[str, int]:
         normalized = item.normalized()
-        data = self.load()
-        bucket = CATEGORY_TO_BUCKET[normalized.category]
-        rows: List[MemoryItem] = list(getattr(data, bucket))
-        target_key = self._key_for(normalized)
-
-        outdated = 0
-        replaced_existing = False
-        new_rows: List[MemoryItem] = []
-        for row in rows:
-            row_key = self._key_for(row)
-            if row_key == target_key and row.id != normalized.id:
-                # 同 key 旧值降级为 outdated,保留审计轨迹
-                if row.status != "outdated":
-                    row = MemoryItem(**{**asdict(row), "status": "outdated", "updated_at": now_iso()})
-                    outdated += 1
-                replaced_existing = True
-            elif row.id == normalized.id:
-                replaced_existing = True
-                continue
-            new_rows.append(row)
+        with self._lock:
+            data = self.load()
+            bucket = CATEGORY_TO_BUCKET[normalized.category]
+            rows: List[MemoryItem] = list(getattr(data, bucket))
+            target_key = self._key_for(normalized)
+
+            outdated = 0
+            replaced_existing = False
+            new_rows: List[MemoryItem] = []
+            for row in rows:
+                row_key = self._key_for(row)
+                if row_key == target_key and row.id != normalized.id:
+                    # 同 key 旧值降级为 outdated,保留审计轨迹
+                    if row.status != "outdated":
+                        row = MemoryItem(**{**asdict(row), "status": "outdated", "updated_at": now_iso()})
+                        outdated += 1
+                    replaced_existing = True
+                elif row.id == normalized.id:
+                    replaced_existing = True
+                    continue
+                new_rows.append(row)
 
-        normalized.updated_at = normalized.updated_at or now_iso()
-        new_rows.append(normalized)
-        setattr(data, bucket, new_rows)
-        self.save(data)
+            normalized.updated_at = normalized.updated_at or now_iso()
+            new_rows.append(normalized)
+            setattr(data, bucket, new_rows)
+            self.save(data, _use_lock=False)
 
         return {
             "added": 0 if replaced_existing else 1,
@@ -101,16 +105,17 @@ class ScratchpadManager:
     def mark_status(self, item_id: str, status: str) -> bool:
         if not item_id:
             return False
-        data = self.load()
-        updated = False
-        for bucket in BUCKET_TO_CATEGORY:
-            rows: List[MemoryItem] = getattr(data, bucket)
-            for i, row in enumerate(rows):
-                if row.id == item_id:
-                    rows[i] = MemoryItem(**{**asdict(row), "status": status, "updated_at": now_iso()})
-                    updated = True
-        if updated:
-            self.save(data)
+        with self._lock:
+            data = self.load()
+            updated = False
+            for bucket in BUCKET_TO_CATEGORY:
+                rows: List[MemoryItem] = getattr(data, bucket)
+                for i, row in enumerate(rows):
+                    if row.id == item_id:
+                        rows[i] = MemoryItem(**{**asdict(row), "status": status, "updated_at": now_iso()})
+                        updated = True
+            if updated:
+                self.save(data, _use_lock=False)
         return updated
 
     def query(

+ 2 - 2
webnovel-writer/scripts/data_modules/memory/writer.py

@@ -20,7 +20,7 @@ class MemoryWriter:
 
     def _item_id(self, category: str, subject: str, field: str, chapter: int) -> str:
         raw = f"{category}|{subject}|{field}|{chapter}"
-        digest = hashlib.sha1(raw.encode("utf-8")).hexdigest()[:12]
+        digest = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
         return f"mem-{category}-{digest}"
 
     def _upsert(self, item: MemoryItem, stats: Dict[str, Any]) -> None:
@@ -145,7 +145,7 @@ class MemoryWriter:
                 id=self._item_id("timeline", event, str(source_chapter), chapter),
                 layer="semantic",
                 category="timeline",
-                subject=event,
+                subject=event[:64],
                 field="event",
                 value=event,
                 payload={"time_hint": row.get("time_hint"), "event_type": row.get("event_type")},

+ 5 - 1
webnovel-writer/scripts/data_modules/schemas.py

@@ -150,6 +150,9 @@ def normalize_data_agent_output(payload: Dict[str, Any]) -> Dict[str, Any]:
     if not isinstance(payload, dict):
         return {}
 
+    # 操作副本,避免修改调用方原始数据
+    payload = dict(payload)
+
     def _ensure_list(key: str):
         value = payload.get(key)
         if value is None:
@@ -175,6 +178,8 @@ def normalize_data_agent_output(payload: Dict[str, Any]) -> Dict[str, Any]:
     elif not isinstance(memory_facts, dict):
         payload["memory_facts"] = {}
     else:
+        memory_facts = dict(memory_facts)
+        payload["memory_facts"] = memory_facts
         for key in ["timeline_events", "world_rules", "open_loops", "reader_promises"]:
             value = memory_facts.get(key)
             if value is None:
@@ -184,5 +189,4 @@ def normalize_data_agent_output(payload: Dict[str, Any]) -> Dict[str, Any]:
 
     payload.setdefault("scenes_chunked", 0)
 
-
     return payload

+ 8 - 5
webnovel-writer/scripts/data_modules/snapshot_manager.py

@@ -73,11 +73,14 @@ class SnapshotManager:
         with lock:
             if not path.exists():
                 return None
-            data = json.loads(path.read_text(encoding="utf-8"))
-        version = str(data.get("version", ""))
-        if version != self.version:
-            raise SnapshotVersionMismatch(self.version, version)
-        return data
+            try:
+                data = json.loads(path.read_text(encoding="utf-8"))
+            except (json.JSONDecodeError, OSError):
+                return None
+            version = str(data.get("version", ""))
+            if version != self.version:
+                raise SnapshotVersionMismatch(self.version, version)
+            return data
 
     def delete_snapshot(self, chapter: int) -> bool:
         path = self._snapshot_path(chapter)

+ 4 - 0
webnovel-writer/scripts/data_modules/tests/test_state_manager_extra.py

@@ -498,6 +498,10 @@ def test_sync_protagonist_from_string_and_empty_updates(temp_project):
 
 
 def test_state_manager_cli_commands(temp_project, monkeypatch, capsys):
+    # CLI 的 resolve_project_root 需要 state.json 存在
+    if not temp_project.state_file.exists():
+        temp_project.state_file.write_text("{}", encoding="utf-8")
+
     idx = IndexManager(temp_project)
     idx.upsert_entity(
         EntityMeta(