stress_test_500chapters.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 500章写作沙盘模拟 - 数据链稳定性压力测试
  5. 测试目标:
  6. 1. state.json 增长曲线(文件大小随章节变化)
  7. 2. entities_v3 实体数量增长
  8. 3. alias_index 别名索引膨胀
  9. 4. 伏笔追踪(埋设/回收比例)
  10. 5. 原子写入性能
  11. 6. index.db 查询性能
  12. 模拟参数(基于典型网文):
  13. - 500章,每章约3500字
  14. - 平均每章新增 0.8 个角色(前100章密集,后期稀疏)
  15. - 平均每章新增 0.3 个地点
  16. - 平均每章埋设 0.5 个伏笔,回收 0.3 个
  17. - 主角每 10 章升级一次境界
  18. - 每 5 章更新一次关系
  19. """
  20. import json
  21. import os
  22. import sys
  23. import time
  24. import random
  25. import shutil
  26. import tempfile
  27. from pathlib import Path
  28. from datetime import datetime
  29. from typing import Dict, Any, List
  30. # 添加脚本目录到路径
  31. script_dir = Path(__file__).resolve().parent
  32. sys.path.insert(0, str(script_dir))
  33. from security_utils import atomic_write_json, read_json_safe
  34. # Windows 编码修复
  35. if sys.platform == 'win32':
  36. import io
  37. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
  38. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
  39. # ============================================================================
  40. # 模拟配置
  41. # ============================================================================
  42. CONFIG = {
  43. "total_chapters": 500,
  44. "words_per_chapter": 3500,
  45. # 实体生成概率(随章节递减)
  46. "new_character_base_rate": 0.8, # 前50章
  47. "new_character_decay": 0.95, # 每50章衰减
  48. "new_location_rate": 0.3,
  49. "new_item_rate": 0.2,
  50. "new_faction_rate": 0.1,
  51. "new_technique_rate": 0.15,
  52. # 伏笔
  53. "foreshadow_plant_rate": 0.5,
  54. "foreshadow_resolve_rate": 0.3,
  55. "foreshadow_tiers": ["核心", "支线", "装饰"],
  56. "foreshadow_tier_weights": [0.1, 0.3, 0.6],
  57. # 主角升级
  58. "protagonist_upgrade_interval": 10,
  59. "realms": ["练气", "筑基", "金丹", "元婴", "化神", "炼虚", "合体", "大乘", "渡劫"],
  60. "layers_per_realm": 9,
  61. # 关系更新
  62. "relationship_update_interval": 5,
  63. "relationship_types": ["ally", "enemy", "romance", "mentor", "rival", "family"],
  64. # 别名生成
  65. "alias_per_character": 2.5, # 平均每个角色的别名数
  66. }
  67. # 随机名字池
  68. SURNAME_POOL = ["林", "陈", "王", "李", "张", "刘", "赵", "黄", "周", "吴", "徐", "孙", "马", "朱", "胡", "郭", "何", "高", "罗", "郑"]
  69. NAME_POOL = ["天", "云", "风", "雷", "火", "水", "月", "星", "龙", "凤", "虎", "鹤", "剑", "刀", "枪", "棍", "拳", "掌", "指", "心"]
  70. LOCATION_PREFIX = ["天", "云", "龙", "凤", "青", "白", "黑", "红", "金", "玉"]
  71. LOCATION_SUFFIX = ["山", "谷", "城", "峰", "洞", "海", "林", "湖", "殿", "宗"]
  72. class SimulationMetrics:
  73. """模拟指标收集器"""
  74. def __init__(self):
  75. self.checkpoints: List[Dict] = []
  76. self.write_times: List[float] = []
  77. self.errors: List[str] = []
  78. def record_checkpoint(self, chapter: int, state: Dict, state_file: Path):
  79. """记录检查点"""
  80. file_size = state_file.stat().st_size if state_file.exists() else 0
  81. entities_v3 = state.get("entities_v3", {})
  82. entity_counts = {
  83. etype: len(entities)
  84. for etype, entities in entities_v3.items()
  85. }
  86. total_entities = sum(entity_counts.values())
  87. alias_count = len(state.get("alias_index", {}))
  88. foreshadowing = state.get("foreshadowing", [])
  89. active_foreshadow = len([f for f in foreshadowing if f.get("status") == "未回收"])
  90. resolved_foreshadow = len([f for f in foreshadowing if f.get("status") == "已回收"])
  91. relationships = state.get("relationships", [])
  92. if isinstance(relationships, dict):
  93. relationships = list(relationships.values())
  94. self.checkpoints.append({
  95. "chapter": chapter,
  96. "file_size_kb": file_size / 1024,
  97. "total_entities": total_entities,
  98. "entity_counts": entity_counts,
  99. "alias_count": alias_count,
  100. "active_foreshadow": active_foreshadow,
  101. "resolved_foreshadow": resolved_foreshadow,
  102. "relationship_count": len(relationships) if isinstance(relationships, list) else 0,
  103. "avg_write_time_ms": sum(self.write_times[-10:]) / max(len(self.write_times[-10:]), 1) * 1000,
  104. })
  105. def record_write_time(self, duration: float):
  106. self.write_times.append(duration)
  107. def record_error(self, error: str):
  108. self.errors.append(error)
  109. def generate_report(self) -> str:
  110. """生成测试报告"""
  111. if not self.checkpoints:
  112. return "No data collected"
  113. final = self.checkpoints[-1]
  114. first = self.checkpoints[0]
  115. lines = [
  116. "=" * 60,
  117. "📊 500章沙盘模拟测试报告",
  118. "=" * 60,
  119. "",
  120. "## 基础指标",
  121. f"- 总章节数: {final['chapter']}",
  122. f"- 总字数: {final['chapter'] * CONFIG['words_per_chapter']:,}",
  123. "",
  124. "## state.json 增长",
  125. f"- 初始大小: {first['file_size_kb']:.2f} KB",
  126. f"- 最终大小: {final['file_size_kb']:.2f} KB",
  127. f"- 增长倍数: {final['file_size_kb'] / max(first['file_size_kb'], 0.1):.1f}x",
  128. "",
  129. "## 实体统计",
  130. f"- 总实体数: {final['total_entities']}",
  131. ]
  132. for etype, count in final['entity_counts'].items():
  133. lines.append(f" - {etype}: {count}")
  134. lines.extend([
  135. f"- 别名索引条目: {final['alias_count']}",
  136. "",
  137. "## 伏笔统计",
  138. f"- 活跃伏笔: {final['active_foreshadow']}",
  139. f"- 已回收伏笔: {final['resolved_foreshadow']}",
  140. f"- 回收率: {final['resolved_foreshadow'] / max(final['active_foreshadow'] + final['resolved_foreshadow'], 1) * 100:.1f}%",
  141. "",
  142. "## 性能指标",
  143. f"- 平均写入时间: {sum(self.write_times) / max(len(self.write_times), 1) * 1000:.2f} ms",
  144. f"- 最大写入时间: {max(self.write_times) * 1000:.2f} ms" if self.write_times else "N/A",
  145. f"- 最小写入时间: {min(self.write_times) * 1000:.2f} ms" if self.write_times else "N/A",
  146. "",
  147. "## 错误统计",
  148. f"- 错误数: {len(self.errors)}",
  149. ])
  150. if self.errors:
  151. lines.append("- 错误详情:")
  152. for err in self.errors[:5]:
  153. lines.append(f" - {err}")
  154. # 增长曲线(每100章采样)
  155. lines.extend([
  156. "",
  157. "## 增长曲线(每100章)",
  158. "| 章节 | 文件大小(KB) | 实体数 | 别名数 | 活跃伏笔 | 写入时间(ms) |",
  159. "|------|-------------|-------|-------|---------|-------------|",
  160. ])
  161. for cp in self.checkpoints:
  162. if cp['chapter'] % 100 == 0 or cp['chapter'] == final['chapter']:
  163. lines.append(
  164. f"| {cp['chapter']} | {cp['file_size_kb']:.1f} | "
  165. f"{cp['total_entities']} | {cp['alias_count']} | "
  166. f"{cp['active_foreshadow']} | {cp['avg_write_time_ms']:.1f} |"
  167. )
  168. # 稳定性评估
  169. lines.extend([
  170. "",
  171. "## 稳定性评估",
  172. ])
  173. # 检查文件大小是否在合理范围
  174. if final['file_size_kb'] < 500:
  175. lines.append("✅ 文件大小合理 (< 500KB)")
  176. elif final['file_size_kb'] < 1024:
  177. lines.append("⚠️ 文件大小偏大 (500KB-1MB),建议启用归档")
  178. else:
  179. lines.append("❌ 文件过大 (> 1MB),需要优化")
  180. # 检查写入性能
  181. avg_write = sum(self.write_times) / max(len(self.write_times), 1) * 1000
  182. if avg_write < 50:
  183. lines.append("✅ 写入性能良好 (< 50ms)")
  184. elif avg_write < 200:
  185. lines.append("⚠️ 写入性能一般 (50-200ms)")
  186. else:
  187. lines.append("❌ 写入性能差 (> 200ms)")
  188. # 检查错误率
  189. if not self.errors:
  190. lines.append("✅ 无错误")
  191. else:
  192. lines.append(f"❌ 有 {len(self.errors)} 个错误")
  193. lines.append("")
  194. lines.append("=" * 60)
  195. return "\n".join(lines)
  196. class ChapterSimulator:
  197. """章节模拟器"""
  198. def __init__(self, project_root: Path):
  199. self.project_root = project_root
  200. self.state_file = project_root / ".webnovel" / "state.json"
  201. self.metrics = SimulationMetrics()
  202. self.generated_names = set()
  203. self.entity_id_counter = 0
  204. def _generate_id(self, prefix: str) -> str:
  205. self.entity_id_counter += 1
  206. return f"{prefix}_{self.entity_id_counter:05d}"
  207. def _generate_character_name(self) -> str:
  208. for _ in range(100):
  209. name = random.choice(SURNAME_POOL) + random.choice(NAME_POOL) + random.choice(NAME_POOL)
  210. if name not in self.generated_names:
  211. self.generated_names.add(name)
  212. return name
  213. return f"角色_{len(self.generated_names)}"
  214. def _generate_location_name(self) -> str:
  215. return random.choice(LOCATION_PREFIX) + random.choice(LOCATION_SUFFIX)
  216. def _get_character_rate(self, chapter: int) -> float:
  217. """根据章节获取角色生成概率(递减)"""
  218. decay_periods = chapter // 50
  219. rate = CONFIG["new_character_base_rate"] * (CONFIG["new_character_decay"] ** decay_periods)
  220. return max(rate, 0.1) # 最低 10%
  221. def init_project(self):
  222. """初始化模拟项目"""
  223. self.project_root.mkdir(parents=True, exist_ok=True)
  224. (self.project_root / ".webnovel").mkdir(exist_ok=True)
  225. (self.project_root / "正文").mkdir(exist_ok=True)
  226. # 初始 state.json
  227. initial_state = {
  228. "project_info": {
  229. "title": "模拟测试小说",
  230. "genre": "玄幻",
  231. "created_at": datetime.now().strftime("%Y-%m-%d"),
  232. "target_chapters": 500,
  233. },
  234. "progress": {
  235. "current_chapter": 0,
  236. "total_words": 0,
  237. },
  238. "protagonist_state": {
  239. "name": "林天",
  240. "realm": "练气",
  241. "layer": 1,
  242. "golden_finger": {"name": "混沌珠", "level": 1},
  243. },
  244. "entities_v3": {
  245. "角色": {},
  246. "地点": {},
  247. "物品": {},
  248. "势力": {},
  249. "招式": {},
  250. },
  251. "alias_index": {},
  252. "foreshadowing": [],
  253. "relationships": [],
  254. }
  255. # 添加主角到实体
  256. protagonist_id = "protagonist_lintian"
  257. initial_state["entities_v3"]["角色"][protagonist_id] = {
  258. "canonical_name": "林天",
  259. "desc": "主角,拥有混沌珠",
  260. "tier": "核心",
  261. "aliases": ["林天", "天哥", "林少侠"],
  262. "current": {"realm": "练气", "layer": 1},
  263. "history": [],
  264. }
  265. initial_state["alias_index"]["林天"] = [{"type": "角色", "id": protagonist_id}]
  266. initial_state["alias_index"]["天哥"] = [{"type": "角色", "id": protagonist_id}]
  267. atomic_write_json(self.state_file, initial_state, backup=False)
  268. return initial_state
  269. def simulate_chapter(self, chapter: int, state: Dict) -> Dict:
  270. """模拟一章的数据变化"""
  271. # 1. 更新进度
  272. state["progress"]["current_chapter"] = chapter
  273. state["progress"]["total_words"] += CONFIG["words_per_chapter"]
  274. entities_v3 = state["entities_v3"]
  275. alias_index = state["alias_index"]
  276. # 2. 新增角色(概率递减)
  277. if random.random() < self._get_character_rate(chapter):
  278. char_name = self._generate_character_name()
  279. char_id = self._generate_id("char")
  280. tier = random.choices(
  281. ["核心", "支线", "装饰"],
  282. weights=[0.1, 0.3, 0.6]
  283. )[0]
  284. entities_v3["角色"][char_id] = {
  285. "canonical_name": char_name,
  286. "desc": f"第{chapter}章出场的{tier}角色",
  287. "tier": tier,
  288. "aliases": [char_name],
  289. "current": {"first_appearance": chapter},
  290. "history": [],
  291. }
  292. alias_index[char_name] = [{"type": "角色", "id": char_id}]
  293. # 生成额外别名
  294. if random.random() < 0.5:
  295. alias = char_name[0] + "兄" if random.random() < 0.5 else char_name + "前辈"
  296. entities_v3["角色"][char_id]["aliases"].append(alias)
  297. if alias not in alias_index:
  298. alias_index[alias] = []
  299. alias_index[alias].append({"type": "角色", "id": char_id})
  300. # 3. 新增地点
  301. if random.random() < CONFIG["new_location_rate"]:
  302. loc_name = self._generate_location_name()
  303. loc_id = self._generate_id("loc")
  304. entities_v3["地点"][loc_id] = {
  305. "canonical_name": loc_name,
  306. "desc": f"第{chapter}章出现的地点",
  307. "tier": "装饰",
  308. "aliases": [loc_name],
  309. "current": {},
  310. "history": [],
  311. }
  312. alias_index[loc_name] = [{"type": "地点", "id": loc_id}]
  313. # 4. 新增物品
  314. if random.random() < CONFIG["new_item_rate"]:
  315. item_name = random.choice(["灵", "仙", "神", "圣"]) + random.choice(["剑", "丹", "符", "器"])
  316. item_id = self._generate_id("item")
  317. entities_v3["物品"][item_id] = {
  318. "canonical_name": item_name,
  319. "desc": f"第{chapter}章获得的物品",
  320. "tier": "装饰",
  321. "aliases": [item_name],
  322. "current": {},
  323. "history": [],
  324. }
  325. if item_name not in alias_index:
  326. alias_index[item_name] = []
  327. alias_index[item_name].append({"type": "物品", "id": item_id})
  328. # 5. 埋设伏笔
  329. if random.random() < CONFIG["foreshadow_plant_rate"]:
  330. tier = random.choices(
  331. CONFIG["foreshadow_tiers"],
  332. weights=CONFIG["foreshadow_tier_weights"]
  333. )[0]
  334. target = chapter + random.randint(10, 100)
  335. state["foreshadowing"].append({
  336. "id": f"fs_{chapter}_{random.randint(1000, 9999)}",
  337. "content": f"第{chapter}章埋设的{tier}伏笔",
  338. "tier": tier,
  339. "status": "未回收",
  340. "planted_chapter": chapter,
  341. "target_chapter": target,
  342. })
  343. # 6. 回收伏笔
  344. active_foreshadows = [
  345. f for f in state["foreshadowing"]
  346. if f.get("status") == "未回收" and f.get("target_chapter", 999) <= chapter
  347. ]
  348. for fs in active_foreshadows:
  349. if random.random() < CONFIG["foreshadow_resolve_rate"]:
  350. fs["status"] = "已回收"
  351. fs["resolved_chapter"] = chapter
  352. # 7. 主角升级
  353. if chapter % CONFIG["protagonist_upgrade_interval"] == 0:
  354. ps = state["protagonist_state"]
  355. current_layer = ps.get("layer", 1)
  356. current_realm_idx = CONFIG["realms"].index(ps.get("realm", "练气"))
  357. if current_layer < CONFIG["layers_per_realm"]:
  358. ps["layer"] = current_layer + 1
  359. elif current_realm_idx < len(CONFIG["realms"]) - 1:
  360. ps["realm"] = CONFIG["realms"][current_realm_idx + 1]
  361. ps["layer"] = 1
  362. # 8. 更新关系
  363. if chapter % CONFIG["relationship_update_interval"] == 0:
  364. char_ids = list(entities_v3["角色"].keys())
  365. if len(char_ids) >= 2:
  366. char1, char2 = random.sample(char_ids, 2)
  367. rel_type = random.choice(CONFIG["relationship_types"])
  368. state["relationships"].append({
  369. "char1_id": char1,
  370. "char2_id": char2,
  371. "type": rel_type,
  372. "intensity": random.randint(30, 100),
  373. "established_chapter": chapter,
  374. })
  375. return state
  376. def run_simulation(self, checkpoint_interval: int = 10):
  377. """运行完整模拟"""
  378. print("🚀 开始500章沙盘模拟...")
  379. print(f"📁 测试目录: {self.project_root}")
  380. print()
  381. state = self.init_project()
  382. self.metrics.record_checkpoint(0, state, self.state_file)
  383. start_time = time.time()
  384. for chapter in range(1, CONFIG["total_chapters"] + 1):
  385. try:
  386. # 模拟章节
  387. state = self.simulate_chapter(chapter, state)
  388. # 原子写入
  389. write_start = time.time()
  390. atomic_write_json(self.state_file, state, use_lock=True, backup=False)
  391. write_duration = time.time() - write_start
  392. self.metrics.record_write_time(write_duration)
  393. # 记录检查点
  394. if chapter % checkpoint_interval == 0:
  395. self.metrics.record_checkpoint(chapter, state, self.state_file)
  396. elapsed = time.time() - start_time
  397. eta = elapsed / chapter * (CONFIG["total_chapters"] - chapter)
  398. print(f" 第 {chapter:3d} 章完成 | "
  399. f"文件 {self.state_file.stat().st_size / 1024:.1f}KB | "
  400. f"实体 {sum(len(e) for e in state['entities_v3'].values())} | "
  401. f"写入 {write_duration*1000:.1f}ms | "
  402. f"ETA {eta:.0f}s")
  403. except Exception as e:
  404. self.metrics.record_error(f"Chapter {chapter}: {str(e)}")
  405. print(f" ❌ 第 {chapter} 章错误: {e}")
  406. # 最终检查点
  407. self.metrics.record_checkpoint(CONFIG["total_chapters"], state, self.state_file)
  408. total_time = time.time() - start_time
  409. print()
  410. print(f"✅ 模拟完成!总耗时: {total_time:.1f}s")
  411. print()
  412. return self.metrics.generate_report()
  413. def main():
  414. """主函数"""
  415. # 创建临时测试目录
  416. test_dir = Path(tempfile.mkdtemp(prefix="webnovel_stress_test_"))
  417. try:
  418. simulator = ChapterSimulator(test_dir)
  419. report = simulator.run_simulation(checkpoint_interval=10)
  420. print(report)
  421. # 保存报告
  422. report_file = test_dir / "stress_test_report.md"
  423. report_file.write_text(report, encoding="utf-8")
  424. print(f"\n📄 报告已保存: {report_file}")
  425. # 询问是否保留测试数据
  426. print(f"\n测试数据目录: {test_dir}")
  427. print("(测试完成后可手动删除)")
  428. except KeyboardInterrupt:
  429. print("\n⚠️ 测试被中断")
  430. except Exception as e:
  431. print(f"\n❌ 测试失败: {e}")
  432. raise
  433. if __name__ == "__main__":
  434. main()