community_check.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. #!/usr/bin/env python3
  2. """COMMUNITY.md 收录PR的半自动检查。
  3. 机器检查(本脚本):
  4. 1. PR只改动 COMMUNITY.md(动了SKILL.md等核心文件直接判❌)
  5. 2. 新增行里的GitHub仓库真实存在且公开
  6. 3. 若目标仓库含SKILL.md(人物/主题skill类)→ 额外要求:
  7. - FIDELITY.md 存在且总分≥70(B级)
  8. - SKILL.md 含「诚实边界」章节
  9. - 有 references/ 调研底稿目录
  10. 若不含SKILL.md(合集/工具类)→ 仅存在性检查,标注请人工确认类别
  11. 4. 结果贴成PR评论
  12. 人工检查(维护者):伦理红线 + 内容质量抽查 + 合并。
  13. 本地测试:python3 community_check.py --check-repo owner/repo
  14. """
  15. import json
  16. import os
  17. import re
  18. import sys
  19. import urllib.request
  20. API = "https://api.github.com"
  21. def gh(path, token, raw=False):
  22. req = urllib.request.Request(API + path)
  23. if token:
  24. req.add_header("Authorization", f"Bearer {token}")
  25. if raw:
  26. req.add_header("Accept", "application/vnd.github.raw+json")
  27. try:
  28. with urllib.request.urlopen(req, timeout=15) as r:
  29. data = r.read().decode("utf-8", "replace")
  30. return data if raw else json.loads(data)
  31. except Exception:
  32. return None
  33. def check_target_repo(slug, token):
  34. """检查被收录的仓库,返回 (是否通过, 检查项列表)。"""
  35. items = []
  36. repo = gh(f"/repos/{slug}", token)
  37. if not repo:
  38. return False, [("❌", f"`{slug}` 仓库不存在或不可访问")]
  39. items.append(("✅", f"[`{slug}`](https://github.com/{slug}) 存在(★{repo.get('stargazers_count', 0)})"))
  40. skill_md = gh(f"/repos/{slug}/contents/SKILL.md", token, raw=True)
  41. if skill_md is None:
  42. items.append(("ℹ️", "无根目录SKILL.md → 按「合集/工具类」处理,请人工确认类别与内容"))
  43. return True, items
  44. items.append(("✅", "含 SKILL.md(按skill类审核)"))
  45. if "诚实边界" in skill_md or "Honest" in skill_md or "honest-limits" in skill_md.lower():
  46. items.append(("✅", "SKILL.md 含诚实边界章节"))
  47. else:
  48. items.append(("❌", "SKILL.md 缺「诚实边界」章节(收录门槛之一)"))
  49. refs = gh(f"/repos/{slug}/contents/references", token)
  50. if isinstance(refs, list) and refs:
  51. items.append(("✅", "含 references/ 调研底稿"))
  52. else:
  53. items.append(("❌", "缺 references/ 调研底稿(skill需自包含可溯源)"))
  54. fidelity = gh(f"/repos/{slug}/contents/FIDELITY.md", token, raw=True)
  55. if fidelity is None:
  56. items.append(("❌", "缺 FIDELITY.md 保真度评分卡(见 references/fidelity-scorecard.md)"))
  57. else:
  58. m = re.search(r"总分[::]\s*(\d+)\s*/\s*100", fidelity)
  59. if not m:
  60. items.append(("❌", "FIDELITY.md 存在但未解析到「总分:NN/100」"))
  61. elif int(m.group(1)) >= 70:
  62. items.append(("✅", f"保真度 {m.group(1)}/100 ≥ 70(B级门槛)"))
  63. else:
  64. items.append(("❌", f"保真度 {m.group(1)}/100 未达B级门槛(70)"))
  65. ok = all(mark != "❌" for mark, _ in items)
  66. return ok, items
  67. def main():
  68. if len(sys.argv) == 3 and sys.argv[1] == "--check-repo":
  69. ok, items = check_target_repo(sys.argv[2], os.environ.get("GITHUB_TOKEN", ""))
  70. for mark, text in items:
  71. print(mark, text)
  72. sys.exit(0 if ok else 1)
  73. token = os.environ["GITHUB_TOKEN"]
  74. repo = os.environ["GITHUB_REPOSITORY"]
  75. pr = os.environ["PR_NUMBER"]
  76. files = gh(f"/repos/{repo}/pulls/{pr}/files?per_page=100", token) or []
  77. names = [f["filename"] for f in files]
  78. lines = ["## 🤖 社区收录检查\n"]
  79. all_ok = True
  80. core_touched = [n for n in names if n != "COMMUNITY.md"]
  81. if core_touched:
  82. all_ok = False
  83. lines.append(f"❌ PR改动了 COMMUNITY.md 以外的文件:`{'`, `'.join(core_touched[:10])}`")
  84. if any(n == "SKILL.md" for n in core_touched):
  85. lines.append("  ⚠️ SKILL.md 是核心资产,不接受外部PR改动(见 CONTRIBUTING.md),请从PR中移除")
  86. else:
  87. lines.append("✅ 只改动 COMMUNITY.md")
  88. added = []
  89. for f in files:
  90. if f["filename"] == "COMMUNITY.md":
  91. for ln in (f.get("patch") or "").splitlines():
  92. if ln.startswith("+") and not ln.startswith("+++"):
  93. added += re.findall(r"github\.com/([\w.-]+/[\w.-]+)", ln)
  94. added = list(dict.fromkeys(s.rstrip(")/") for s in added))
  95. if not added:
  96. all_ok = False
  97. lines.append("❌ 未在新增行中检测到GitHub仓库链接")
  98. for slug in added[:5]:
  99. ok, items = check_target_repo(slug, token)
  100. all_ok = all_ok and ok
  101. lines.append(f"\n**{slug}**")
  102. lines += [f"- {mark} {text}" for mark, text in items]
  103. lines.append("\n---")
  104. lines.append(("✅ **机器检查通过**。" if all_ok else "❌ **机器检查未通过**,请按上述项修改后推送更新(会自动重跑)。"))
  105. lines.append("最终合并前维护者还会人工确认:伦理红线(CONTRIBUTING.md)+ 内容质量抽查。")
  106. body = "\n".join(lines)
  107. req = urllib.request.Request(
  108. f"{API}/repos/{repo}/issues/{pr}/comments",
  109. data=json.dumps({"body": body}).encode(),
  110. headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
  111. method="POST",
  112. )
  113. urllib.request.urlopen(req, timeout=15)
  114. sys.exit(0 if all_ok else 1)
  115. if __name__ == "__main__":
  116. main()