artifact_validator.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. from __future__ import annotations
  4. import json
  5. from pathlib import Path
  6. from typing import Any
  7. from pydantic import ValidationError
  8. from .chapter_commit_schema import (
  9. DisambiguationResult,
  10. ExtractionResult,
  11. FulfillmentResult,
  12. ReviewResult,
  13. )
  14. SCHEMA_VERSION = "webnovel-artifact-validator/v1"
  15. ERROR_SCHEMA = "schema_error"
  16. ERROR_MISSING = "missing_artifact"
  17. ERROR_BLOCKING_REVIEW = "blocking_review"
  18. ERROR_MISSED_OUTLINE_NODE = "missed_outline_node"
  19. ERROR_PENDING_DISAMBIGUATION = "pending_disambiguation"
  20. ERROR_PROJECTION_FAILURE = "projection_failure"
  21. ARTIFACT_SCHEMAS = {
  22. "review_result": ReviewResult,
  23. "fulfillment_result": FulfillmentResult,
  24. "disambiguation_result": DisambiguationResult,
  25. "extraction_result": ExtractionResult,
  26. }
  27. def _issue(
  28. issue_type: str,
  29. *,
  30. message: str,
  31. severity: str = "blocker",
  32. path: str = "",
  33. field: str = "",
  34. impact: str = "",
  35. repair: str = "",
  36. ) -> dict[str, str]:
  37. return {
  38. "type": issue_type,
  39. "severity": severity,
  40. "message": message,
  41. "path": path,
  42. "field": field,
  43. "impact": impact,
  44. "repair": repair,
  45. }
  46. def _empty_report(artifact: str, path: str = "") -> dict[str, Any]:
  47. return {
  48. "schema_version": SCHEMA_VERSION,
  49. "artifact": artifact,
  50. "path": path,
  51. "ok": True,
  52. "errors": [],
  53. "warnings": [],
  54. "payload": None,
  55. }
  56. def _read_json_artifact(path: str | Path) -> tuple[Any, dict[str, Any] | None]:
  57. artifact_path = Path(path)
  58. if not artifact_path.is_file():
  59. return None, _issue(
  60. ERROR_MISSING,
  61. message=f"artifact missing: {artifact_path}",
  62. path=str(artifact_path),
  63. impact="提交前 artifact 不完整,无法可靠生成 chapter commit。",
  64. repair="重新运行 reviewer/data-agent,或按 schema 补齐该 JSON 文件。",
  65. )
  66. try:
  67. return json.loads(artifact_path.read_text(encoding="utf-8")), None
  68. except json.JSONDecodeError as exc:
  69. return None, _issue(
  70. ERROR_SCHEMA,
  71. message=f"invalid JSON: {exc}",
  72. path=str(artifact_path),
  73. impact="artifact 无法被 runtime 读取。",
  74. repair="修复 JSON 格式,确保文件为 UTF-8。",
  75. )
  76. except OSError as exc:
  77. return None, _issue(
  78. ERROR_SCHEMA,
  79. message=f"artifact read failed: {exc}",
  80. path=str(artifact_path),
  81. impact="artifact 无法被 runtime 读取。",
  82. repair="检查文件权限和路径是否正确。",
  83. )
  84. def _schema_error_message(exc: Exception) -> str:
  85. if isinstance(exc, ValidationError):
  86. return "; ".join(str(error.get("msg") or "") for error in exc.errors()) or str(exc)
  87. return str(exc)
  88. def _policy_issues(artifact: str, payload: dict[str, Any], path: str) -> list[dict[str, str]]:
  89. issues: list[dict[str, str]] = []
  90. if artifact == "review_result":
  91. blocking_count = int(payload.get("blocking_count") or 0)
  92. if blocking_count > 0:
  93. issues.append(
  94. _issue(
  95. ERROR_BLOCKING_REVIEW,
  96. message=f"review_result has {blocking_count} blocking issue(s)",
  97. path=path,
  98. field="blocking_count",
  99. impact="存在阻断级审查问题时不应进入提交。",
  100. repair="先定点修复 blocking issue,或让用户明确裁决后再继续。",
  101. )
  102. )
  103. elif artifact == "fulfillment_result":
  104. missed = payload.get("missed_nodes") or []
  105. if missed:
  106. issues.append(
  107. _issue(
  108. ERROR_MISSED_OUTLINE_NODE,
  109. message=f"fulfillment_result missed {len(missed)} planned node(s)",
  110. path=path,
  111. field="missed_nodes",
  112. impact="大纲必须节点未覆盖,提交会把偏离章节固化为事实。",
  113. repair="补写遗漏节点,或经用户裁决修改本章规划。",
  114. )
  115. )
  116. elif artifact == "disambiguation_result":
  117. pending = payload.get("pending") or []
  118. if pending:
  119. issues.append(
  120. _issue(
  121. ERROR_PENDING_DISAMBIGUATION,
  122. message=f"disambiguation_result has {len(pending)} pending item(s)",
  123. path=path,
  124. field="pending",
  125. impact="未消歧实体会污染角色、关系和事件投影。",
  126. repair="人工确认 pending 项,或把低置信实体从 extraction 中移除。",
  127. )
  128. )
  129. return issues
  130. def validate_artifact_payload(artifact: str, payload: Any, *, path: str = "") -> dict[str, Any]:
  131. if artifact not in ARTIFACT_SCHEMAS:
  132. raise ValueError(f"unknown artifact: {artifact}")
  133. report = _empty_report(artifact, path)
  134. schema = ARTIFACT_SCHEMAS[artifact]
  135. try:
  136. model = schema.model_validate(payload)
  137. except Exception as exc:
  138. report["errors"].append(
  139. _issue(
  140. ERROR_SCHEMA,
  141. message=_schema_error_message(exc),
  142. path=path,
  143. impact="artifact 字段形状不符合 chapter commit 权威 schema。",
  144. repair="按 chapter_commit_schema.py 的顶层字段要求修正,不要包 fulfillment/disambiguation/extraction 外层。",
  145. )
  146. )
  147. report["ok"] = False
  148. return report
  149. normalized = model.model_dump()
  150. report["payload"] = normalized
  151. report["errors"].extend(_policy_issues(artifact, normalized, path))
  152. report["ok"] = not any(item.get("severity") == "blocker" for item in report["errors"])
  153. return report
  154. def validate_artifact_file(artifact: str, path: str | Path) -> dict[str, Any]:
  155. report = _empty_report(artifact, str(path))
  156. payload, error = _read_json_artifact(path)
  157. if error:
  158. report["errors"].append(error)
  159. report["ok"] = False
  160. return report
  161. return validate_artifact_payload(artifact, payload, path=str(path))
  162. def validate_review_result(path: str | Path) -> dict[str, Any]:
  163. return validate_artifact_file("review_result", path)
  164. def validate_fulfillment_result(path: str | Path) -> dict[str, Any]:
  165. return validate_artifact_file("fulfillment_result", path)
  166. def validate_disambiguation_result(path: str | Path) -> dict[str, Any]:
  167. return validate_artifact_file("disambiguation_result", path)
  168. def validate_extraction_result(path: str | Path) -> dict[str, Any]:
  169. return validate_artifact_file("extraction_result", path)
  170. def merge_reports(reports: list[dict[str, Any]], *, artifact: str = "chapter_commit_inputs") -> dict[str, Any]:
  171. errors: list[dict[str, Any]] = []
  172. warnings: list[dict[str, Any]] = []
  173. payloads: dict[str, Any] = {}
  174. for report in reports:
  175. errors.extend(report.get("errors") or [])
  176. warnings.extend(report.get("warnings") or [])
  177. if report.get("payload") is not None:
  178. payloads[str(report.get("artifact"))] = report.get("payload")
  179. return {
  180. "schema_version": SCHEMA_VERSION,
  181. "artifact": artifact,
  182. "ok": not any(item.get("severity") == "blocker" for item in errors),
  183. "errors": errors,
  184. "warnings": warnings,
  185. "payloads": payloads,
  186. "reports": reports,
  187. }
  188. def validate_commit_artifact_files(
  189. *,
  190. review_result: str | Path,
  191. fulfillment_result: str | Path,
  192. disambiguation_result: str | Path,
  193. extraction_result: str | Path,
  194. ) -> dict[str, Any]:
  195. return merge_reports(
  196. [
  197. validate_review_result(review_result),
  198. validate_fulfillment_result(fulfillment_result),
  199. validate_disambiguation_result(disambiguation_result),
  200. validate_extraction_result(extraction_result),
  201. ]
  202. )
  203. def validate_chapter_commit(path: str | Path) -> dict[str, Any]:
  204. commit_path = Path(path)
  205. report = _empty_report("chapter_commit", str(commit_path))
  206. payload, error = _read_json_artifact(commit_path)
  207. if error:
  208. report["errors"].append(error)
  209. report["ok"] = False
  210. return report
  211. if not isinstance(payload, dict):
  212. report["errors"].append(
  213. _issue(
  214. ERROR_SCHEMA,
  215. message="chapter_commit must be a JSON object",
  216. path=str(commit_path),
  217. impact="commit 文件无法作为事实主链读取。",
  218. repair="从备份恢复 commit,或重新执行 chapter-commit。",
  219. )
  220. )
  221. report["ok"] = False
  222. return report
  223. nested_reports = []
  224. for artifact in ARTIFACT_SCHEMAS:
  225. if artifact not in payload:
  226. report["errors"].append(
  227. _issue(
  228. ERROR_SCHEMA,
  229. message=f"chapter_commit missing {artifact}",
  230. path=str(commit_path),
  231. field=artifact,
  232. impact="commit 文件缺少提交 artifact 快照。",
  233. repair="重新执行 chapter-commit 生成完整 commit。",
  234. )
  235. )
  236. continue
  237. nested_reports.append(validate_artifact_payload(artifact, payload.get(artifact), path=str(commit_path)))
  238. projection_status = payload.get("projection_status") or {}
  239. if isinstance(projection_status, dict):
  240. for writer, status in projection_status.items():
  241. if str(status).startswith("failed:"):
  242. report["errors"].append(
  243. _issue(
  244. ERROR_PROJECTION_FAILURE,
  245. message=f"projection {writer} failed: {status}",
  246. path=str(commit_path),
  247. field=f"projection_status.{writer}",
  248. impact="提交事实已生成,但 read-model 投影不完整。",
  249. repair="修复失败原因后补跑 projection retry/replay。",
  250. )
  251. )
  252. merged = merge_reports(nested_reports, artifact="chapter_commit_nested")
  253. report["errors"].extend(merged["errors"])
  254. report["warnings"].extend(merged["warnings"])
  255. report["payload"] = payload
  256. report["ok"] = not any(item.get("severity") == "blocker" for item in report["errors"])
  257. return report