test_validate_csv.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """Tests for validate_csv.py."""
  4. import subprocess
  5. import sys
  6. from pathlib import Path
  7. import csv
  8. import uuid
  9. SCRIPT = str(Path(__file__).resolve().parents[1] / "validate_csv.py")
  10. CSV_DIR = str(Path(__file__).resolve().parents[2] / "references" / "csv")
  11. def _make_local_tmp_path() -> Path:
  12. base_dir = Path.home() / ".codex" / "memories" / "validate_csv_cases"
  13. base_dir.mkdir(parents=True, exist_ok=True)
  14. tmp_dir = base_dir / f"case_{uuid.uuid4().hex}"
  15. tmp_dir.mkdir()
  16. return tmp_dir
  17. def run_validate(*args: str) -> subprocess.CompletedProcess:
  18. return subprocess.run(
  19. [sys.executable, SCRIPT, "--csv-dir", CSV_DIR, *args],
  20. capture_output=True,
  21. text=True,
  22. )
  23. class TestValidateCsvRuns:
  24. def test_script_runs_without_crash(self):
  25. result = run_validate()
  26. assert result.returncode in (0, 1)
  27. assert "Traceback" not in result.stderr
  28. def test_json_output_mode(self):
  29. import json
  30. result = run_validate("--format", "json")
  31. assert result.returncode in (0, 1)
  32. data = json.loads(result.stdout)
  33. assert "errors" in data
  34. assert "warnings" in data
  35. def test_current_csv_data_has_no_errors_or_warnings(self):
  36. import json
  37. result = run_validate("--format", "json")
  38. assert result.returncode == 0, result.stderr
  39. data = json.loads(result.stdout)
  40. assert data["errors"] == []
  41. assert data["warnings"] == []
  42. def test_phase2_row_count_thresholds(self):
  43. csv_dir = Path(CSV_DIR)
  44. with open(csv_dir / "题材与调性推理.csv", "r", encoding="utf-8-sig", newline="") as f:
  45. route_rows = list(csv.DictReader(f))
  46. with open(csv_dir / "裁决规则.csv", "r", encoding="utf-8-sig", newline="") as f:
  47. reasoning_rows = list(csv.DictReader(f))
  48. assert len(route_rows) >= 16
  49. assert len(reasoning_rows) >= 14
  50. def test_detects_extra_csv_fields(self):
  51. tmp_path = _make_local_tmp_path()
  52. (tmp_path / "命名规则.csv").write_text(
  53. "\n".join(
  54. [
  55. "编号,适用技能,分类,层级,关键词,意图与同义词,适用题材,大模型指令,核心摘要,详细展开,命名对象,规则,正例,反例,毒点",
  56. "NR-999,write,测试,知识补充,角色命名,,玄幻,指令,摘要,详细,人名,规则,正例,反例,毒点,EXTRA",
  57. ]
  58. ),
  59. encoding="utf-8-sig",
  60. )
  61. result = subprocess.run(
  62. [sys.executable, SCRIPT, "--csv-dir", str(tmp_path), "--format", "json"],
  63. capture_output=True,
  64. text=True,
  65. )
  66. import json
  67. data = json.loads(result.stdout)
  68. assert any("字段数超过表头" in error for error in data["errors"])
  69. def test_detects_invalid_skill_and_level(self):
  70. tmp_path = _make_local_tmp_path()
  71. (tmp_path / "命名规则.csv").write_text(
  72. "\n".join(
  73. [
  74. "编号,适用技能,分类,层级,关键词,意图与同义词,适用题材,大模型指令,核心摘要,详细展开,命名对象,规则,正例,反例,毒点",
  75. "NR-998,bogus,测试,推理层,角色命名,,玄幻,指令,摘要,详细,人名,规则,正例,反例,毒点",
  76. ]
  77. ),
  78. encoding="utf-8-sig",
  79. )
  80. result = subprocess.run(
  81. [sys.executable, SCRIPT, "--csv-dir", str(tmp_path), "--format", "json"],
  82. capture_output=True,
  83. text=True,
  84. )
  85. import json
  86. data = json.loads(result.stdout)
  87. assert any("适用技能值 'bogus' 不合法" in error for error in data["errors"])
  88. assert any("层级值 '推理层' 不合法" in error for error in data["errors"])