ソースを参照

feat: 强化RAG迁移与测试工具链

lingfengQAQ 4 ヶ月 前
コミット
55621197dd

+ 157 - 57
.claude/scripts/data_modules/rag_adapter.py

@@ -15,6 +15,7 @@ import sqlite3
 import json
 import json
 import math
 import math
 import logging
 import logging
+import shutil
 from pathlib import Path
 from pathlib import Path
 
 
 from runtime_compat import enable_windows_utf8_stdio
 from runtime_compat import enable_windows_utf8_stdio
@@ -25,6 +26,7 @@ import re
 from contextlib import contextmanager
 from contextlib import contextmanager
 import itertools
 import itertools
 import time
 import time
+from datetime import datetime
 
 
 from .config import get_config
 from .config import get_config
 from .api_client import get_client
 from .api_client import get_client
@@ -34,6 +36,19 @@ from .observability import safe_log_tool_call
 
 
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
 
 
+RAG_SCHEMA_VERSION = "2"
+VECTOR_REQUIRED_COLUMNS = (
+    "chunk_id",
+    "chapter",
+    "scene_index",
+    "content",
+    "embedding",
+    "parent_chunk_id",
+    "chunk_type",
+    "source_file",
+    "created_at",
+)
+
 
 
 @dataclass
 @dataclass
 class SearchResult:
 class SearchResult:
@@ -73,73 +88,158 @@ class RAGAdapter:
     def _init_db(self):
     def _init_db(self):
         """初始化向量数据库"""
         """初始化向量数据库"""
         self.config.ensure_dirs()
         self.config.ensure_dirs()
+        needs_migration, existing_cols = self._inspect_vectors_schema()
+        if needs_migration:
+            backup_path = self._backup_vector_db(reason="schema_migration")
+            try:
+                with self._get_conn() as conn:
+                    cursor = conn.cursor()
+                    self._rebuild_vectors_table(cursor, existing_cols)
+                    conn.commit()
+                logger.warning(
+                    "vectors 表结构已迁移(备份: %s)",
+                    str(backup_path),
+                )
+            except Exception:
+                try:
+                    self._restore_vector_db_from_backup(backup_path)
+                    logger.error("vectors 表迁移失败,已从备份恢复: %s", str(backup_path))
+                except Exception as restore_exc:
+                    logger.exception("vectors 表迁移失败,且恢复备份失败: %s", restore_exc)
+                raise
 
 
         with self._get_conn() as conn:
         with self._get_conn() as conn:
             cursor = conn.cursor()
             cursor = conn.cursor()
+            self._ensure_schema_meta(cursor)
+            self._ensure_tables(cursor)
+            conn.commit()
 
 
-            def _table_columns(table_name: str) -> set[str]:
-                cursor.execute(f"PRAGMA table_info({table_name})")
-                return {row[1] for row in cursor.fetchall()}
-
-            required_cols = {
-                "chunk_id",
-                "chapter",
-                "scene_index",
-                "content",
-                "embedding",
-                "parent_chunk_id",
-                "chunk_type",
-                "source_file",
-                "created_at",
-            }
+    def _table_exists(self, cursor, table_name: str) -> bool:
+        cursor.execute(
+            "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?",
+            (table_name,),
+        )
+        return cursor.fetchone() is not None
 
 
-            if "vectors" in {r[0] for r in cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")}:  # type: ignore
-                cols = _table_columns("vectors")
-                if not required_cols.issubset(cols):
-                    cursor.execute("DROP TABLE IF EXISTS vectors")
-                    cursor.execute("DROP TABLE IF EXISTS bm25_index")
-                    cursor.execute("DROP TABLE IF EXISTS doc_stats")
+    def _table_columns(self, cursor, table_name: str) -> set[str]:
+        cursor.execute(f"PRAGMA table_info({table_name})")
+        return {row[1] for row in cursor.fetchall()}
 
 
-            # 向量存储表
-            cursor.execute("""
-                CREATE TABLE IF NOT EXISTS vectors (
-                    chunk_id TEXT PRIMARY KEY,
-                    chapter INTEGER,
-                    scene_index INTEGER,
-                    content TEXT,
-                    embedding BLOB,
-                    parent_chunk_id TEXT,
-                    chunk_type TEXT DEFAULT 'scene',
-                    source_file TEXT,
-                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-                )
-            """)
+    def _inspect_vectors_schema(self) -> tuple[bool, set[str]]:
+        with self._get_conn() as conn:
+            cursor = conn.cursor()
+            if not self._table_exists(cursor, "vectors"):
+                return False, set()
+            cols = self._table_columns(cursor, "vectors")
+            required_cols = set(VECTOR_REQUIRED_COLUMNS)
+            return (not required_cols.issubset(cols), cols)
+
+    def _backup_vector_db(self, reason: str) -> Path:
+        db_path = Path(self.config.vector_db)
+        if not db_path.exists():
+            raise FileNotFoundError(f"vectors.db 不存在: {db_path}")
+        backup_dir = self.config.webnovel_dir / "backups"
+        backup_dir.mkdir(parents=True, exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        backup_path = backup_dir / f"vectors.db.{reason}.v{RAG_SCHEMA_VERSION}.{timestamp}.bak"
+        shutil.copy2(db_path, backup_path)
+        return backup_path
+
+    def _restore_vector_db_from_backup(self, backup_path: Path) -> None:
+        db_path = Path(self.config.vector_db)
+        shutil.copy2(backup_path, db_path)
+
+    def _rebuild_vectors_table(self, cursor, existing_cols: set[str]) -> None:
+        if not self._table_exists(cursor, "vectors"):
+            return
+
+        cursor.execute("DROP TABLE IF EXISTS vectors_migrating")
+        cursor.execute("""
+            CREATE TABLE vectors_migrating (
+                chunk_id TEXT PRIMARY KEY,
+                chapter INTEGER,
+                scene_index INTEGER,
+                content TEXT,
+                embedding BLOB,
+                parent_chunk_id TEXT,
+                chunk_type TEXT DEFAULT 'scene',
+                source_file TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        """)
 
 
-            # BM25 倒排索引表
-            cursor.execute("""
-                CREATE TABLE IF NOT EXISTS bm25_index (
-                    term TEXT,
-                    chunk_id TEXT,
-                    tf REAL,
-                    PRIMARY KEY (term, chunk_id)
-                )
-            """)
+        copy_columns = [
+            col
+            for col in VECTOR_REQUIRED_COLUMNS
+            if col in existing_cols
+        ]
+        if copy_columns:
+            cols_sql = ", ".join(copy_columns)
+            cursor.execute(
+                f"INSERT OR REPLACE INTO vectors_migrating ({cols_sql}) SELECT {cols_sql} FROM vectors"
+            )
 
 
-            # 文档统计表
-            cursor.execute("""
-                CREATE TABLE IF NOT EXISTS doc_stats (
-                    chunk_id TEXT PRIMARY KEY,
-                    doc_length INTEGER
-                )
-            """)
+        cursor.execute("DROP TABLE vectors")
+        cursor.execute("ALTER TABLE vectors_migrating RENAME TO vectors")
 
 
-            # 创建索引
-            cursor.execute("CREATE INDEX IF NOT EXISTS idx_vectors_chapter ON vectors(chapter)")
-            cursor.execute("CREATE INDEX IF NOT EXISTS idx_vectors_parent ON vectors(parent_chunk_id)")
-            cursor.execute("CREATE INDEX IF NOT EXISTS idx_vectors_type ON vectors(chunk_type)")
-            cursor.execute("CREATE INDEX IF NOT EXISTS idx_bm25_term ON bm25_index(term)")
+    def _ensure_schema_meta(self, cursor) -> None:
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS rag_schema_meta (
+                key TEXT PRIMARY KEY,
+                value TEXT NOT NULL,
+                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        """)
+        cursor.execute(
+            """
+            INSERT INTO rag_schema_meta (key, value, updated_at)
+            VALUES ('schema_version', ?, CURRENT_TIMESTAMP)
+            ON CONFLICT(key) DO UPDATE SET
+                value = excluded.value,
+                updated_at = CURRENT_TIMESTAMP
+            """,
+            (RAG_SCHEMA_VERSION,),
+        )
 
 
-            conn.commit()
+    def _ensure_tables(self, cursor) -> None:
+        # 向量存储表
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS vectors (
+                chunk_id TEXT PRIMARY KEY,
+                chapter INTEGER,
+                scene_index INTEGER,
+                content TEXT,
+                embedding BLOB,
+                parent_chunk_id TEXT,
+                chunk_type TEXT DEFAULT 'scene',
+                source_file TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        """)
+
+        # BM25 倒排索引表
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS bm25_index (
+                term TEXT,
+                chunk_id TEXT,
+                tf REAL,
+                PRIMARY KEY (term, chunk_id)
+            )
+        """)
+
+        # 文档统计表
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS doc_stats (
+                chunk_id TEXT PRIMARY KEY,
+                doc_length INTEGER
+            )
+        """)
+
+        # 创建索引
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_vectors_chapter ON vectors(chapter)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_vectors_parent ON vectors(parent_chunk_id)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_vectors_type ON vectors(chunk_type)")
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_bm25_term ON bm25_index(term)")
 
 
     @contextmanager
     @contextmanager
     def _get_conn(self):
     def _get_conn(self):

+ 16 - 0
.claude/scripts/data_modules/tests/test_extract_chapter_context.py

@@ -37,6 +37,22 @@ def test_extract_state_summary_accepts_dominant_key(tmp_path):
     assert "Ch11:fire" in text
     assert "Ch11:fire" in text
 
 
 
 
+def test_extract_chapter_outline_supports_hyphen_filename(tmp_path):
+    scripts_dir = Path(__file__).resolve().parents[2]
+    if str(scripts_dir) not in sys.path:
+        sys.path.insert(0, str(scripts_dir))
+
+    from extract_chapter_context import extract_chapter_outline
+
+    outline_dir = tmp_path / "大纲"
+    outline_dir.mkdir(parents=True, exist_ok=True)
+    (outline_dir / "第1卷-详细大纲.md").write_text("### 第1章:测试标题\n测试大纲", encoding="utf-8")
+
+    outline = extract_chapter_outline(tmp_path, 1)
+    assert "### 第1章:测试标题" in outline
+    assert "测试大纲" in outline
+
+
 def test_build_chapter_context_payload_includes_contract_sections(tmp_path):
 def test_build_chapter_context_payload_includes_contract_sections(tmp_path):
     scripts_dir = Path(__file__).resolve().parents[2]
     scripts_dir = Path(__file__).resolve().parents[2]
     if str(scripts_dir) not in sys.path:
     if str(scripts_dir) not in sys.path:

+ 48 - 0
.claude/scripts/data_modules/tests/test_rag_adapter.py

@@ -8,6 +8,7 @@ import sys
 import json
 import json
 import asyncio
 import asyncio
 import logging
 import logging
+import sqlite3
 
 
 import pytest
 import pytest
 
 
@@ -172,6 +173,53 @@ def test_recent_and_fetch_vectors(temp_project):
     assert len(rows) == 1
     assert len(rows) == 1
 
 
 
 
+def test_init_db_migrates_legacy_vectors_schema(tmp_path, monkeypatch):
+    cfg = DataModulesConfig.from_project_root(tmp_path)
+    cfg.ensure_dirs()
+    monkeypatch.setattr(rag_module, "get_client", lambda config: StubClient())
+
+    # 旧结构:缺少 parent_chunk_id/chunk_type/source_file/created_at
+    with sqlite3.connect(str(cfg.vector_db)) as conn:
+        cursor = conn.cursor()
+        cursor.execute(
+            """
+            CREATE TABLE vectors (
+                chunk_id TEXT PRIMARY KEY,
+                chapter INTEGER,
+                scene_index INTEGER,
+                content TEXT,
+                embedding BLOB
+            )
+            """
+        )
+        cursor.execute(
+            """
+            INSERT INTO vectors (chunk_id, chapter, scene_index, content, embedding)
+            VALUES (?, ?, ?, ?, ?)
+            """,
+            ("ch0001_s1", 1, 1, "旧数据", b""),
+        )
+        conn.commit()
+
+    adapter = RAGAdapter(cfg)
+
+    with adapter._get_conn() as conn:
+        cursor = conn.cursor()
+        cursor.execute("PRAGMA table_info(vectors)")
+        cols = {row[1] for row in cursor.fetchall()}
+        assert {"parent_chunk_id", "chunk_type", "source_file", "created_at"}.issubset(cols)
+        cursor.execute("SELECT COUNT(*) FROM vectors")
+        assert cursor.fetchone()[0] == 1
+        cursor.execute("SELECT chunk_type FROM vectors WHERE chunk_id = ?", ("ch0001_s1",))
+        row = cursor.fetchone()
+        assert row is not None
+        assert row[0] == "scene"
+
+    backup_dir = cfg.webnovel_dir / "backups"
+    backups = list(backup_dir.glob("vectors.db.schema_migration.v*.bak"))
+    assert backups
+
+
 def test_rag_adapter_cli(temp_project, monkeypatch, capsys):
 def test_rag_adapter_cli(temp_project, monkeypatch, capsys):
     # stats
     # stats
     def run_cli(args):
     def run_cli(args):

+ 9 - 3
.claude/scripts/extract_chapter_context.py

@@ -54,10 +54,16 @@ def find_project_root(start_path: Path | None = None) -> Path:
 def extract_chapter_outline(project_root: Path, chapter_num: int) -> str:
 def extract_chapter_outline(project_root: Path, chapter_num: int) -> str:
     """Extract chapter outline segment from volume outline file."""
     """Extract chapter outline segment from volume outline file."""
     volume_num = (chapter_num - 1) // 50 + 1
     volume_num = (chapter_num - 1) // 50 + 1
-    outline_file = project_root / "大纲" / f"第{volume_num}卷 详细大纲.md"
+    outline_candidates = [
+        project_root / "大纲" / f"第{volume_num}卷-详细大纲.md",
+        project_root / "大纲" / f"第{volume_num}卷 详细大纲.md",
+        project_root / "大纲" / f"第{volume_num}卷详细大纲.md",
+    ]
+    outline_file = next((p for p in outline_candidates if p.exists()), None)
 
 
-    if not outline_file.exists():
-        return f"⚠️ 大纲文件不存在: {outline_file}"
+    if outline_file is None:
+        tried = " / ".join(str(p) for p in outline_candidates)
+        return f"⚠️ 大纲文件不存在,已尝试: {tried}"
 
 
     content = outline_file.read_text(encoding="utf-8")
     content = outline_file.read_text(encoding="utf-8")
 
 

+ 240 - 0
.claude/scripts/quality_trend_report.py

@@ -0,0 +1,240 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+quality_trend_report.py - 生成章节质量趋势报告(离线)
+
+数据来源:
+- index.db.review_metrics
+- index.db.writing_checklist_scores
+"""
+
+from __future__ import annotations
+
+import argparse
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List
+
+from runtime_compat import enable_windows_utf8_stdio
+
+try:
+    from project_locator import resolve_project_root
+except ImportError:  # pragma: no cover
+    from scripts.project_locator import resolve_project_root
+
+try:
+    from data_modules.config import DataModulesConfig
+    from data_modules.index_manager import IndexManager
+except ImportError:  # pragma: no cover
+    from scripts.data_modules.config import DataModulesConfig
+    from scripts.data_modules.index_manager import IndexManager
+
+
+def _to_float(value: Any, default: float = 0.0) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def _to_int(value: Any, default: int = 0) -> int:
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def _percent(value: float) -> str:
+    return f"{value * 100:.1f}%"
+
+
+def _build_review_rows(records: List[Dict[str, Any]]) -> List[str]:
+    if not records:
+        return ["| - | - | - | - | - | - |", "| - | - | - | - | - | - |"]
+
+    rows: List[str] = []
+    sorted_records = sorted(
+        records,
+        key=lambda x: (_to_int(x.get("end_chapter")), _to_int(x.get("start_chapter"))),
+    )
+    for row in sorted_records:
+        severities = row.get("severity_counts") or {}
+        critical = _to_int(severities.get("critical"))
+        high = _to_int(severities.get("high"))
+        medium = _to_int(severities.get("medium"))
+        low = _to_int(severities.get("low"))
+        range_text = f"{_to_int(row.get('start_chapter'))}-{_to_int(row.get('end_chapter'))}"
+        score = _to_float(row.get("overall_score"))
+        rows.append(
+            f"| {range_text} | {score:.1f} | {critical} | {high} | {medium} | {low} |"
+        )
+    return rows
+
+
+def _build_checklist_rows(records: List[Dict[str, Any]]) -> List[str]:
+    if not records:
+        return ["| - | - | - | - |"]
+
+    rows: List[str] = []
+    sorted_records = sorted(records, key=lambda x: _to_int(x.get("chapter")))
+    for row in sorted_records:
+        chapter = _to_int(row.get("chapter"))
+        score = _to_float(row.get("score"))
+        completion = _to_float(row.get("completion_rate"))
+        required_items = _to_int(row.get("required_items"))
+        completed_required = _to_int(row.get("completed_required"))
+        if required_items > 0:
+            required_rate = completed_required / required_items
+        else:
+            required_rate = 1.0
+        rows.append(
+            f"| {chapter} | {score:.1f} | {_percent(completion)} | {_percent(required_rate)} |"
+        )
+    return rows
+
+
+def _build_risk_flags(
+    review_trend: Dict[str, Any],
+    checklist_trend: Dict[str, Any],
+) -> List[str]:
+    flags: List[str] = []
+
+    overall_avg = _to_float(review_trend.get("overall_avg"))
+    if overall_avg < 75 and review_trend.get("count", 0) > 0:
+        flags.append(f"审查均分偏低({overall_avg:.1f}),建议优先回看低分区间。")
+
+    severity_totals = review_trend.get("severity_totals") or {}
+    critical_total = _to_int(severity_totals.get("critical"))
+    high_total = _to_int(severity_totals.get("high"))
+    if critical_total > 0:
+        flags.append(f"存在 {critical_total} 个 critical 问题,建议设为最高修复优先级。")
+    elif high_total >= 5:
+        flags.append(f"high 问题累计 {high_total} 个,建议做批量修复专项。")
+
+    score_avg = _to_float(checklist_trend.get("score_avg"))
+    if checklist_trend.get("count", 0) > 0 and score_avg < 80:
+        flags.append(f"写作清单平均分偏低({score_avg:.1f}),建议加强执行清单落地。")
+
+    completion_avg = _to_float(checklist_trend.get("completion_avg"))
+    if checklist_trend.get("count", 0) > 0 and completion_avg < 0.7:
+        flags.append(f"写作清单完成率仅 {_percent(completion_avg)},建议减少每章可选项数量。")
+
+    if not flags:
+        flags.append("近期质量指标整体稳定,暂无高优先级风险。")
+
+    return flags
+
+
+def build_quality_report(
+    project_root: Path,
+    manager: IndexManager,
+    *,
+    limit: int,
+) -> str:
+    review_records = manager.get_recent_review_metrics(limit=limit)
+    review_trend = manager.get_review_trend_stats(last_n=limit)
+    checklist_records = manager.get_recent_writing_checklist_scores(limit=limit)
+    checklist_trend = manager.get_writing_checklist_score_trend(last_n=limit)
+
+    now_text = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    overall_avg = _to_float(review_trend.get("overall_avg"))
+    review_count = _to_int(review_trend.get("count"))
+    checklist_count = _to_int(checklist_trend.get("count"))
+    checklist_score_avg = _to_float(checklist_trend.get("score_avg"))
+    checklist_completion_avg = _to_float(checklist_trend.get("completion_avg"))
+
+    dimension_avg = review_trend.get("dimension_avg") or {}
+    severity_totals = review_trend.get("severity_totals") or {}
+    risk_flags = _build_risk_flags(review_trend, checklist_trend)
+
+    lines: List[str] = []
+    lines.append("# 质量趋势报告")
+    lines.append("")
+    lines.append(f"- 生成时间: {now_text}")
+    lines.append(f"- 项目路径: `{project_root}`")
+    lines.append(f"- 统计窗口: 最近 {limit} 条记录")
+    lines.append("")
+    lines.append("## 总览")
+    lines.append("")
+    lines.append(f"- 审查记录数: {review_count}")
+    lines.append(f"- 审查均分: {overall_avg:.1f}")
+    lines.append(f"- 清单评分记录数: {checklist_count}")
+    lines.append(f"- 清单平均分: {checklist_score_avg:.1f}")
+    lines.append(f"- 清单平均完成率: {_percent(checklist_completion_avg)}")
+    lines.append("")
+
+    lines.append("## 审查区间趋势")
+    lines.append("")
+    lines.append("| 区间 | 总分 | Critical | High | Medium | Low |")
+    lines.append("|---|---:|---:|---:|---:|---:|")
+    lines.extend(_build_review_rows(review_records))
+    lines.append("")
+
+    lines.append("## 维度均分")
+    lines.append("")
+    lines.append("| 维度 | 平均分 |")
+    lines.append("|---|---:|")
+    if dimension_avg:
+        for key in sorted(dimension_avg.keys()):
+            lines.append(f"| {key} | {_to_float(dimension_avg.get(key)):.1f} |")
+    else:
+        lines.append("| - | - |")
+    lines.append("")
+
+    lines.append("## 严重级别汇总")
+    lines.append("")
+    lines.append("| 等级 | 数量 |")
+    lines.append("|---|---:|")
+    for level in ("critical", "high", "medium", "low"):
+        lines.append(f"| {level} | {_to_int(severity_totals.get(level))} |")
+    lines.append("")
+
+    lines.append("## 写作清单趋势")
+    lines.append("")
+    lines.append("| 章节 | 分数 | 完成率 | 必做完成率 |")
+    lines.append("|---:|---:|---:|---:|")
+    lines.extend(_build_checklist_rows(checklist_records))
+    lines.append("")
+
+    lines.append("## 风险提示")
+    lines.append("")
+    for item in risk_flags:
+        lines.append(f"- {item}")
+    lines.append("")
+
+    return "\n".join(lines)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="生成离线质量趋势报告(基于 index.db)")
+    parser.add_argument("--project-root", type=str, help="项目根目录(可选,不传则自动探测)")
+    parser.add_argument("--limit", type=int, default=20, help="统计最近 N 条记录(默认 20)")
+    parser.add_argument("--output", type=str, help="输出文件路径(默认 .webnovel/reports/quality-trend.md)")
+    args = parser.parse_args()
+
+    if args.project_root:
+        project_root = Path(args.project_root).expanduser().resolve()
+    else:
+        project_root = resolve_project_root()
+
+    cfg = DataModulesConfig.from_project_root(project_root)
+    manager = IndexManager(cfg)
+
+    limit = max(1, int(args.limit))
+    output_path = (
+        Path(args.output).expanduser().resolve()
+        if args.output
+        else (cfg.webnovel_dir / "reports" / "quality-trend.md")
+    )
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    report = build_quality_report(project_root, manager, limit=limit)
+    output_path.write_text(report, encoding="utf-8")
+    print(f"✅ 已生成质量趋势报告: {output_path}")
+
+
+if __name__ == "__main__":
+    import sys
+    if sys.platform == "win32":
+        enable_windows_utf8_stdio()
+    main()

+ 44 - 0
.claude/scripts/run_tests.ps1

@@ -0,0 +1,44 @@
+param(
+    [ValidateSet("smoke", "full")]
+    [string]$Mode = "smoke",
+    [string]$ProjectRoot = ""
+)
+
+$ErrorActionPreference = "Stop"
+
+if ([string]::IsNullOrWhiteSpace($ProjectRoot)) {
+    $ProjectRoot = (Resolve-Path (Join-Path $PSScriptRoot "..\\..")).Path
+} else {
+    $ProjectRoot = (Resolve-Path $ProjectRoot).Path
+}
+
+Set-Location $ProjectRoot
+
+$tmpRoot = Join-Path $ProjectRoot ".tmp\\pytest"
+New-Item -ItemType Directory -Path $tmpRoot -Force | Out-Null
+
+$env:TMP = $tmpRoot
+$env:TEMP = $tmpRoot
+$env:PYTHONPATH = ".claude/scripts"
+
+$baseTemp = Join-Path $tmpRoot ("run-" + $Mode)
+
+Write-Host "ProjectRoot: $ProjectRoot"
+Write-Host "TMP/TEMP: $tmpRoot"
+Write-Host "Mode: $Mode"
+
+if ($Mode -eq "smoke") {
+    python -m pytest -q `
+        .claude/scripts/data_modules/tests/test_extract_chapter_context.py `
+        .claude/scripts/data_modules/tests/test_rag_adapter.py `
+        --basetemp $baseTemp `
+        --no-cov `
+        -p no:cacheprovider
+    exit $LASTEXITCODE
+}
+
+python -m pytest -q `
+    .claude/scripts/data_modules/tests `
+    --basetemp $baseTemp `
+    -p no:cacheprovider
+exit $LASTEXITCODE

+ 18 - 1
README.md

@@ -698,6 +698,23 @@ python -m data_modules.index_manager get-recent-review-metrics --limit 5 --proje
 python -m data_modules.index_manager get-review-trend-stats --last-n 5 --project-root "."
 python -m data_modules.index_manager get-review-trend-stats --last-n 5 --project-root "."
 ```
 ```
 
 
+### 质量趋势看板(离线报告)
+
+```bash
+# 生成最近20条记录的质量趋势报告
+python .claude/scripts/quality_trend_report.py --project-root "." --limit 20
+```
+
+### 测试入口脚本
+
+```bash
+# 快速回归(推荐)
+pwsh .claude/scripts/run_tests.ps1 -Mode smoke
+
+# 全量 data_modules 测试
+pwsh .claude/scripts/run_tests.ps1 -Mode full
+```
+
 ### 健康报告(status_reporter)
 ### 健康报告(status_reporter)
 
 
 ```bash
 ```bash
@@ -803,7 +820,7 @@ git checkout ch0045
 - **webnovel-learn skill**:从会话提取成功模式写入 project_memory.json
 - **webnovel-learn skill**:从会话提取成功模式写入 project_memory.json
 - **CLI 统一输出**:CLIResponse 标准化 JSON 输出格式
 - **CLI 统一输出**:CLIResponse 标准化 JSON 输出格式
 - **Pydantic Schema**:DataAgentOutput 等结构化验证
 - **Pydantic Schema**:DataAgentOutput 等结构化验证
-- **不向前兼容**:vectors.db 表结构变更时自动 DROP+CREATE
+- **向量库安全迁移**:vectors.db 表结构变更时自动备份并执行事务迁移,失败可回滚
 
 
 ### Context Contract v2(阶段 E)
 ### Context Contract v2(阶段 E)