Просмотр исходного кода

fix: harden status merge and json arg boundaries

lingfengQAQ 3 недель назад
Родитель
Сommit
e85a352a2a

+ 12 - 11
webnovel-writer/scripts/data_modules/index_manager.py

@@ -970,6 +970,7 @@ def main():
         config = DataModulesConfig.from_project_root(resolved_root)
 
     manager = IndexManager(config)
+    json_base_dir = manager.config.project_root
     tool_name = f"index_manager:{args.command or 'unknown'}"
 
     def _append_timing(
@@ -1030,8 +1031,8 @@ def main():
         emit_success(scenes, message="scenes")
 
     elif args.command == "process-chapter":
-        entities = load_json_arg(args.entities, base_dir=args.project_root)
-        scenes = load_json_arg(args.scenes, base_dir=args.project_root)
+        entities = load_json_arg(args.entities, base_dir=json_base_dir)
+        scenes = load_json_arg(args.scenes, base_dir=json_base_dir)
         stats = manager.process_chapter_data(
             chapter=args.chapter,
             title=args.title,
@@ -1132,7 +1133,7 @@ def main():
 
     elif args.command == "record-relationship-event":
         try:
-            data = load_json_arg(args.data, base_dir=args.project_root)
+            data = load_json_arg(args.data, base_dir=json_base_dir)
         except (TypeError, ValueError, json.JSONDecodeError):
             emit_error("INVALID_RELATIONSHIP_EVENT", "关系事件 JSON 无效")
         else:
@@ -1156,7 +1157,7 @@ def main():
                 emit_error("INVALID_RELATIONSHIP_EVENT", "关系事件参数无效,未写入")
 
     elif args.command == "upsert-entity":
-        data = load_json_arg(args.data, base_dir=args.project_root)
+        data = load_json_arg(args.data, base_dir=json_base_dir)
         entity = EntityMeta(
             id=data["id"],
             type=data["type"],
@@ -1173,7 +1174,7 @@ def main():
         emit_success({"id": entity.id, "created": is_new}, message="entity_upserted")
 
     elif args.command == "upsert-relationship":
-        data = load_json_arg(args.data, base_dir=args.project_root)
+        data = load_json_arg(args.data, base_dir=json_base_dir)
         rel = RelationshipMeta(
             from_entity=data["from_entity"],
             to_entity=data["to_entity"],
@@ -1188,7 +1189,7 @@ def main():
         )
 
     elif args.command == "record-state-change":
-        data = load_json_arg(args.data, base_dir=args.project_root)
+        data = load_json_arg(args.data, base_dir=json_base_dir)
         change = StateChangeMeta(
             entity_id=data["entity_id"],
             field=data["field"],
@@ -1224,7 +1225,7 @@ def main():
         emit_success(rows, message="invalid_list")
 
     elif args.command == "save-review-metrics":
-        data = load_json_arg(args.data, base_dir=args.project_root)
+        data = load_json_arg(args.data, base_dir=json_base_dir)
         metrics = ReviewMetrics(
             start_chapter=data["start_chapter"],
             end_chapter=data["end_chapter"],
@@ -1250,7 +1251,7 @@ def main():
         emit_success(stats, message="review_trend_stats")
 
     elif args.command == "save-writing-checklist-score":
-        data = load_json_arg(args.data, base_dir=args.project_root)
+        data = load_json_arg(args.data, base_dir=json_base_dir)
         metrics = WritingChecklistScoreMeta(
             chapter=data["chapter"],
             template=data.get("template", "plot"),
@@ -1346,7 +1347,7 @@ def main():
             emit_success(result, message="debt_payment", chapter=args.chapter)
 
     elif args.command == "create-override-contract":
-        data = load_json_arg(args.data, base_dir=args.project_root)
+        data = load_json_arg(args.data, base_dir=json_base_dir)
         contract = OverrideContractMeta(
             chapter=data["chapter"],
             constraint_type=data["constraint_type"],
@@ -1361,7 +1362,7 @@ def main():
         emit_success({"id": contract_id}, message="override_contract_created")
 
     elif args.command == "create-debt":
-        data = load_json_arg(args.data, base_dir=args.project_root)
+        data = load_json_arg(args.data, base_dir=json_base_dir)
         debt = ChaseDebtMeta(
             debt_type=data["debt_type"],
             original_amount=data.get("original_amount", 1.0),
@@ -1383,7 +1384,7 @@ def main():
             emit_error("NOT_FOUND", f"未找到 Override Contract #{args.contract_id}")
 
     elif args.command == "save-chapter-reading-power":
-        data = load_json_arg(args.data, base_dir=args.project_root)
+        data = load_json_arg(args.data, base_dir=json_base_dir)
         meta = ChapterReadingPowerMeta(
             chapter=data["chapter"],
             hook_type=data.get("hook_type", ""),

+ 36 - 8
webnovel-writer/scripts/data_modules/state_manager.py

@@ -111,6 +111,7 @@ class StateManager:
 
     # 章节状态机(单调递进)
     CHAPTER_STATUS_ORDER = ["chapter_drafted", "chapter_reviewed", "chapter_committed"]
+    REJECTED_CHAPTER_STATUS = "chapter_rejected"
 
     def __init__(self, config=None, enable_sqlite_sync: bool = True):
         """
@@ -294,7 +295,22 @@ class StateManager:
                     if not isinstance(chapter_status, dict):
                         chapter_status = {}
                         progress["chapter_status"] = chapter_status
-                    chapter_status.update(self._pending_chapter_status)
+
+                    def _status_rank(value: str) -> int:
+                        if value == self.REJECTED_CHAPTER_STATUS:
+                            return -1
+                        try:
+                            return self.CHAPTER_STATUS_ORDER.index(value)
+                        except ValueError:
+                            return -1
+
+                    for chapter, pending_status in self._pending_chapter_status.items():
+                        current_status = str(chapter_status.get(chapter) or "")
+                        if current_status == pending_status:
+                            continue
+                        if current_status and _status_rank(pending_status) < _status_rank(current_status):
+                            continue
+                        chapter_status[chapter] = pending_status
                     progress["last_updated"] = self._now_progress_timestamp()
 
                 # v5.1 引入: 强制使用 SQLite 模式,移除大数据字段
@@ -658,13 +674,16 @@ class StateManager:
 
     def set_chapter_status(self, chapter: int, status: str) -> None:
         """设置章节状态(单调递进,不可回退)。"""
-        if status not in self.CHAPTER_STATUS_ORDER:
-            raise ValueError(f"无效状态: {status},有效值: {self.CHAPTER_STATUS_ORDER}")
+        valid_statuses = set(self.CHAPTER_STATUS_ORDER + [self.REJECTED_CHAPTER_STATUS])
+        if status not in valid_statuses:
+            raise ValueError(
+                f"无效状态: {status},有效值: {self.CHAPTER_STATUS_ORDER + [self.REJECTED_CHAPTER_STATUS]}"
+            )
 
         current = self.get_chapter_status(chapter)
         if current is not None:
-            current_idx = self.CHAPTER_STATUS_ORDER.index(current)
-            new_idx = self.CHAPTER_STATUS_ORDER.index(status)
+            current_idx = self._chapter_status_rank(current)
+            new_idx = self._chapter_status_rank(status)
             if new_idx < current_idx:
                 raise ValueError(
                     f"章节 {chapter} 状态不可回退: {current} -> {status}"
@@ -678,6 +697,14 @@ class StateManager:
         self._pending_chapter_status[str(chapter)] = status
         self.save_state()
 
+    def _chapter_status_rank(self, status: str) -> int:
+        if status == self.REJECTED_CHAPTER_STATUS:
+            return -1
+        try:
+            return self.CHAPTER_STATUS_ORDER.index(status)
+        except ValueError:
+            return -1
+
     def _save_state(self) -> None:
         """直接持久化当前内存状态到 state.json(轻量写入,不走 pending 合并)。"""
         self.config.ensure_dirs()
@@ -1344,7 +1371,7 @@ def main():
     status_set_parser = subparsers.add_parser("set-chapter-status")
     status_set_parser.add_argument("--chapter", type=int, required=True)
     status_set_parser.add_argument("--status", required=True,
-        choices=["chapter_drafted", "chapter_reviewed", "chapter_committed"])
+        choices=["chapter_rejected", "chapter_drafted", "chapter_reviewed", "chapter_committed"])
 
     argv = normalize_global_project_root(sys.argv[1:])
     args = parser.parse_args(argv)
@@ -1364,12 +1391,13 @@ def main():
                 "INVALID_PROJECT_ROOT",
                 str(exc),
                 suggestion="请传入包含 .webnovel/state.json 的书项目根目录,或先通过 webnovel.py 解析 project_root。",
-            )
+                )
             raise SystemExit(1) from exc
         config = DataModulesConfig.from_project_root(resolved_root)
 
     manager = StateManager(config)
     logger = IndexManager(config)
+    json_base_dir = manager.config.project_root
     tool_name = f"state_manager:{args.command or 'unknown'}"
 
     def _append_timing(success: bool, *, error_code: str | None = None, error_message: str | None = None, chapter: int | None = None):
@@ -1422,7 +1450,7 @@ def main():
         emit_success(payload, message="entities")
 
     elif args.command == "process-chapter":
-        data = load_json_arg(args.data, base_dir=args.project_root)
+        data = load_json_arg(args.data, base_dir=json_base_dir)
         validated = None
         last_exc = None
         for _ in range(3):

+ 78 - 47
webnovel-writer/scripts/data_modules/state_projection_writer.py

@@ -7,17 +7,49 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any
 
-from .story_contracts import read_json_if_exists, write_json
+import filelock
+
+from .story_contracts import read_json_if_exists
 
 try:
     from chapter_paths import find_chapter_file
 except ImportError:  # pragma: no cover
     from scripts.chapter_paths import find_chapter_file
 
+try:
+    from security_utils import atomic_write_json
+except ImportError:  # pragma: no cover
+    from scripts.security_utils import atomic_write_json
+
+
+class _LockedState:
+    def __init__(self, state_path: Path, lock_path: Path):
+        self.state_path = state_path
+        self.lock_path = lock_path
+        self.state: dict[str, Any] = {}
+        self._lock: filelock.FileLock | None = None
+
+    def __enter__(self) -> dict[str, Any]:
+        self._lock = filelock.FileLock(str(self.lock_path), timeout=10)
+        self._lock.acquire()
+        self.state = read_json_if_exists(self.state_path) or {}
+        return self.state
+
+    def __exit__(self, exc_type, exc, tb) -> bool:
+        try:
+            if exc_type is None:
+                atomic_write_json(self.state_path, self.state, use_lock=False, backup=True)
+        finally:
+            if self._lock is not None:
+                self._lock.release()
+        return False
+
 
 class StateProjectionWriter:
     def __init__(self, project_root: Path):
         self.project_root = Path(project_root)
+        self.state_path = self.project_root / ".webnovel" / "state.json"
+        self.lock_path = self.state_path.with_suffix(self.state_path.suffix + ".lock")
 
     def apply(self, commit_payload: dict) -> dict:
         chapter = int(commit_payload.get("meta", {}).get("chapter") or 0)
@@ -25,62 +57,58 @@ class StateProjectionWriter:
 
         if status == "rejected":
             if chapter > 0:
-                state_path = self.project_root / ".webnovel" / "state.json"
-                state = read_json_if_exists(state_path) or {}
-                progress = state.setdefault("progress", {})
-                chapter_status = progress.setdefault("chapter_status", {})
-                chapter_status[str(chapter)] = "chapter_rejected"
-                write_json(state_path, state)
+                with self._locked_state() as state:
+                    progress = state.setdefault("progress", {})
+                    chapter_status = progress.setdefault("chapter_status", {})
+                    chapter_status[str(chapter)] = "chapter_rejected"
             return {"applied": True, "writer": "state", "reason": "commit_rejected_status_updated"}
 
         if status != "accepted":
             return {"applied": False, "writer": "state", "reason": f"unknown_status:{status}"}
 
-        state_path = self.project_root / ".webnovel" / "state.json"
-        state = read_json_if_exists(state_path) or {}
-        entity_state = state.setdefault("entity_state", {})
-        progress = state.setdefault("progress", {})
-        chapter_status = progress.setdefault("chapter_status", {})
+        with self._locked_state() as state:
+            entity_state = state.setdefault("entity_state", {})
+            progress = state.setdefault("progress", {})
+            chapter_status = progress.setdefault("chapter_status", {})
+
+            protagonist_ids = self._collect_protagonist_ids(commit_payload, state)
+
+            applied_count = 0
+            for delta in self._collect_state_deltas(commit_payload):
+                entity_id = str(delta.get("entity_id") or "").strip()
+                field = str(delta.get("field") or "").strip()
+                if not entity_id or not field:
+                    continue
+                new_value = delta.get("new")
+                entity_bucket = entity_state.setdefault(entity_id, {})
+                self._set_path(entity_bucket, field, new_value)
+                if entity_id in protagonist_ids:
+                    self._set_path(state.setdefault("protagonist_state", {}), field, new_value)
+                applied_count += 1
 
-        protagonist_ids = self._collect_protagonist_ids(commit_payload, state)
+            if chapter > 0:
+                old_current = self._safe_int(progress.get("current_chapter"))
+                old_total = self._safe_int(progress.get("total_words"))
+                old_status = chapter_status.get(str(chapter))
 
-        applied_count = 0
-        for delta in self._collect_state_deltas(commit_payload):
-            entity_id = str(delta.get("entity_id") or "").strip()
-            field = str(delta.get("field") or "").strip()
-            if not entity_id or not field:
-                continue
-            new_value = delta.get("new")
-            entity_bucket = entity_state.setdefault(entity_id, {})
-            self._set_path(entity_bucket, field, new_value)
-            if entity_id in protagonist_ids:
-                self._set_path(state.setdefault("protagonist_state", {}), field, new_value)
-            applied_count += 1
-
-        if chapter > 0:
-            old_current = self._safe_int(progress.get("current_chapter"))
-            old_total = self._safe_int(progress.get("total_words"))
-            old_status = chapter_status.get(str(chapter))
-
-            chapter_status[str(chapter)] = "chapter_committed"
-            progress["current_chapter"] = max(old_current, chapter)
-
-            projected_total = self._project_total_words(chapter_status)
-            if projected_total > 0:
-                progress["total_words"] = projected_total
-            else:
-                progress["total_words"] = old_total
+                chapter_status[str(chapter)] = "chapter_committed"
+                progress["current_chapter"] = max(old_current, chapter)
 
-            if (
-                old_status != "chapter_committed"
-                or progress.get("current_chapter") != old_current
-                or progress.get("total_words") != old_total
-            ):
-                progress["last_updated"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                projected_total = self._project_total_words(chapter_status)
+                if projected_total > 0:
+                    progress["total_words"] = projected_total
+                else:
+                    progress["total_words"] = old_total
 
-        strand_applied = self._apply_strand_tracker(state, chapter, commit_payload)
+                if (
+                    old_status != "chapter_committed"
+                    or progress.get("current_chapter") != old_current
+                    or progress.get("total_words") != old_total
+                ):
+                    progress["last_updated"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+            strand_applied = self._apply_strand_tracker(state, chapter, commit_payload)
 
-        write_json(state_path, state)
         return {
             "applied": applied_count > 0 or chapter > 0,
             "writer": "state",
@@ -88,6 +116,9 @@ class StateProjectionWriter:
             "strand_tracker": strand_applied,
         }
 
+    def _locked_state(self):
+        return _LockedState(self.state_path, self.lock_path)
+
     def _collect_state_deltas(self, commit_payload: dict) -> list[dict]:
         deltas = [
             self._normalize_state_delta(delta)

+ 14 - 0
webnovel-writer/scripts/data_modules/tests/test_chapter_status.py

@@ -51,6 +51,20 @@ def test_set_chapter_status_monotonic(state_project):
         sm.set_chapter_status(5, "chapter_drafted")
 
 
+def test_set_chapter_status_allows_rework_after_rejected(state_project):
+    state_file = state_project / ".webnovel" / "state.json"
+    state_file.write_text(
+        json.dumps({"progress": {"chapter_status": {"5": "chapter_rejected"}}}, ensure_ascii=False),
+        encoding="utf-8",
+    )
+    sm = _make_manager(state_project)
+    sm._load_state()
+
+    sm.set_chapter_status(5, "chapter_drafted")
+
+    assert sm.get_chapter_status(5) == "chapter_drafted"
+
+
 def test_set_chapter_status_progression(state_project):
     sm = _make_manager(state_project)
     sm._load_state()

+ 12 - 0
webnovel-writer/scripts/data_modules/tests/test_coverage_boost.py

@@ -83,6 +83,18 @@ def test_load_json_arg_rejects_file_outside_base_dir(tmp_path):
         load_json_arg(f"@{outside}", base_dir=project)
 
 
+def test_load_json_arg_rejects_relative_escape_outside_base_dir(tmp_path):
+    project = tmp_path / "project"
+    project.mkdir()
+    outside_dir = tmp_path / "outside"
+    outside_dir.mkdir()
+    secret = outside_dir / "secret.json"
+    secret.write_text('{"secret": true}', encoding="utf-8")
+
+    with pytest.raises(ValueError, match="outside allowed directory"):
+        load_json_arg("@../outside/secret.json", base_dir=project)
+
+
 def test_load_json_arg_allows_file_inside_base_dir(tmp_path):
     project = tmp_path / "project"
     project.mkdir()

+ 45 - 0
webnovel-writer/scripts/data_modules/tests/test_data_modules.py

@@ -34,6 +34,7 @@ from data_modules.index_manager import (
     ReviewMetrics,
     WritingChecklistScoreMeta,
 )
+from project_locator import write_current_project_pointer
 
 
 @pytest.fixture
@@ -1338,6 +1339,50 @@ class TestIndexManager:
 
         capsys.readouterr()
 
+    def test_index_manager_cli_rejects_json_file_outside_resolved_book_root(
+        self, tmp_path, monkeypatch
+    ):
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+        (workspace / ".git").mkdir()
+        (workspace / ".claude").mkdir()
+        project_root = workspace / "book"
+        config = DataModulesConfig.from_project_root(project_root)
+        config.ensure_dirs()
+        _ensure_project_state(config)
+        write_current_project_pointer(project_root, workspace_root=workspace)
+
+        entities = project_root / "entities.json"
+        outside_scenes = workspace / "scenes.json"
+        entities.write_text("[]", encoding="utf-8")
+        outside_scenes.write_text("[]", encoding="utf-8")
+
+        monkeypatch.setattr(
+            sys,
+            "argv",
+            [
+                "index_manager",
+                "--project-root",
+                str(workspace),
+                "process-chapter",
+                "--chapter",
+                "2",
+                "--title",
+                "试炼",
+                "--location",
+                "秘境",
+                "--word-count",
+                "1200",
+                "--entities",
+                f"@{entities}",
+                "--scenes",
+                f"@{outside_scenes}",
+            ],
+        )
+
+        with pytest.raises(ValueError, match="outside allowed directory"):
+            index_manager_module.main()
+
 
 class TestStyleSampler:
     """风格样本测试"""

+ 61 - 0
webnovel-writer/scripts/data_modules/tests/test_state_manager_extra.py

@@ -13,6 +13,7 @@ import pytest
 
 from data_modules.state_manager import StateManager, EntityState
 from data_modules.index_manager import IndexManager, EntityMeta
+from project_locator import write_current_project_pointer
 
 
 @pytest.fixture
@@ -425,6 +426,28 @@ def test_set_chapter_status_preserves_existing_disk_state(temp_project):
     assert saved["disambiguation_warnings"] == [{"chapter": 4, "mention": "宗主"}]
 
 
+def test_set_chapter_status_does_not_rollback_same_chapter_disk_state(temp_project):
+    temp_project.state_file.write_text(
+        json.dumps({"progress": {"chapter_status": {}}}, ensure_ascii=False),
+        encoding="utf-8",
+    )
+    manager = StateManager(temp_project, enable_sqlite_sync=False)
+
+    temp_project.state_file.write_text(
+        json.dumps(
+            {"progress": {"chapter_status": {"5": "chapter_reviewed"}}},
+            ensure_ascii=False,
+        ),
+        encoding="utf-8",
+    )
+
+    manager.set_chapter_status(5, "chapter_drafted")
+
+    saved = json.loads(temp_project.state_file.read_text(encoding="utf-8"))
+    assert saved["progress"]["chapter_status"]["5"] == "chapter_reviewed"
+    assert manager.get_chapter_status(5) == "chapter_reviewed"
+
+
 def test_sync_to_sqlite_exceptions_and_no_sql_manager(temp_project, monkeypatch):
     manager = StateManager(temp_project)
     manager._pending_progress_chapter = 1
@@ -589,6 +612,44 @@ def test_state_manager_cli_commands(temp_project, monkeypatch, capsys):
     assert out["status"] == "success"
 
 
+def test_state_manager_cli_rejects_json_file_outside_resolved_book_root(tmp_path, monkeypatch):
+    from data_modules.config import DataModulesConfig
+
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+    (workspace / ".git").mkdir()
+    (workspace / ".claude").mkdir()
+    project_root = workspace / "book"
+    cfg = DataModulesConfig.from_project_root(project_root)
+    cfg.ensure_dirs()
+    if not cfg.state_file.exists():
+        cfg.state_file.write_text("{}", encoding="utf-8")
+    write_current_project_pointer(project_root, workspace_root=workspace)
+
+    outside = workspace / "outside.json"
+    outside.write_text('{"entities_appeared": []}', encoding="utf-8")
+
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        [
+            "state_manager",
+            "--project-root",
+            str(workspace),
+            "process-chapter",
+            "--chapter",
+            "1",
+            "--data",
+            f"@{outside}",
+        ],
+    )
+
+    from data_modules import state_manager as sm
+
+    with pytest.raises(ValueError, match="outside allowed directory"):
+        sm.main()
+
+
 def test_state_manager_cli_rejects_invalid_project_root(monkeypatch, tmp_path, capsys):
     monkeypatch.delenv("WEBNOVEL_PROJECT_ROOT", raising=False)
     monkeypatch.delenv("CLAUDE_PROJECT_DIR", raising=False)