1
0

showcase.html 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059
  1. <!DOCTYPE html>
  2. <html lang="zh-CN">
  3. <head>
  4. <meta charset="UTF-8">
  5. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6. <title>自主技能优化系统</title>
  7. <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&display=swap" rel="stylesheet">
  8. <style>
  9. :root {
  10. --accent: #D4532B;
  11. --black: #111111;
  12. --dark: #1a1a1a;
  13. --mid: #666666;
  14. --light: #999999;
  15. --border: #d0d0d0;
  16. --bg: #fafafa;
  17. --white: #ffffff;
  18. --col: calc((100% - 11 * 24px) / 12);
  19. }
  20. * { margin: 0; padding: 0; box-sizing: border-box; }
  21. body {
  22. font-family: 'Inter', -apple-system, sans-serif;
  23. background: var(--bg);
  24. color: var(--black);
  25. font-size: 15px;
  26. line-height: 1.6;
  27. -webkit-font-smoothing: antialiased;
  28. }
  29. .container {
  30. max-width: 1200px;
  31. margin: 0 auto;
  32. padding: 0 48px;
  33. }
  34. /* ═══════ HERO ═══════ */
  35. .hero {
  36. padding: 120px 0 80px;
  37. border-bottom: 1px solid var(--black);
  38. }
  39. .hero-label {
  40. font-size: 11px;
  41. font-weight: 600;
  42. letter-spacing: 3px;
  43. text-transform: uppercase;
  44. color: var(--accent);
  45. margin-bottom: 32px;
  46. }
  47. .hero h1 {
  48. font-size: 88px;
  49. font-weight: 900;
  50. line-height: 0.95;
  51. letter-spacing: -3px;
  52. margin-bottom: 40px;
  53. max-width: 900px;
  54. }
  55. .hero-subtitle {
  56. font-size: 20px;
  57. font-weight: 400;
  58. color: var(--mid);
  59. line-height: 1.5;
  60. max-width: 640px;
  61. margin-bottom: 56px;
  62. }
  63. .hero-subtitle strong {
  64. color: var(--black);
  65. font-weight: 600;
  66. }
  67. .hero-quote {
  68. border-left: 3px solid var(--accent);
  69. padding: 20px 0 20px 24px;
  70. max-width: 600px;
  71. }
  72. .hero-quote p {
  73. font-size: 16px;
  74. font-weight: 400;
  75. font-style: italic;
  76. color: var(--dark);
  77. line-height: 1.7;
  78. }
  79. .hero-quote cite {
  80. display: block;
  81. margin-top: 12px;
  82. font-size: 12px;
  83. font-weight: 600;
  84. letter-spacing: 1px;
  85. text-transform: uppercase;
  86. font-style: normal;
  87. color: var(--light);
  88. }
  89. /* ═══════ SECTION HEADERS ═══════ */
  90. .section {
  91. padding: 80px 0;
  92. border-bottom: 1px solid var(--border);
  93. }
  94. .section:last-child {
  95. border-bottom: none;
  96. }
  97. .section-num {
  98. font-size: 12px;
  99. font-weight: 700;
  100. letter-spacing: 2px;
  101. color: var(--accent);
  102. margin-bottom: 16px;
  103. font-variant-numeric: tabular-nums;
  104. }
  105. .section-title {
  106. font-size: 48px;
  107. font-weight: 800;
  108. line-height: 1.05;
  109. letter-spacing: -1.5px;
  110. margin-bottom: 16px;
  111. }
  112. .section-lead {
  113. font-size: 17px;
  114. color: var(--mid);
  115. max-width: 560px;
  116. line-height: 1.6;
  117. margin-bottom: 48px;
  118. }
  119. /* ═══════ PRINCIPLES ═══════ */
  120. .principles-grid {
  121. display: grid;
  122. grid-template-columns: 1fr 1fr;
  123. gap: 0;
  124. }
  125. .principle {
  126. padding: 32px 32px 32px 0;
  127. border-top: 1px solid var(--border);
  128. }
  129. .principle:nth-child(even) {
  130. padding-left: 32px;
  131. border-left: 1px solid var(--border);
  132. }
  133. .principle:nth-child(1),
  134. .principle:nth-child(2) {
  135. border-top: 1px solid var(--black);
  136. }
  137. .principle-num {
  138. font-size: 36px;
  139. font-weight: 800;
  140. color: var(--accent);
  141. margin-bottom: 12px;
  142. line-height: 1;
  143. }
  144. .principle h3 {
  145. font-size: 18px;
  146. font-weight: 700;
  147. margin-bottom: 8px;
  148. letter-spacing: -0.3px;
  149. }
  150. .principle p {
  151. font-size: 14px;
  152. color: var(--mid);
  153. line-height: 1.6;
  154. }
  155. .principle--full {
  156. grid-column: 1 / -1;
  157. padding-left: 0;
  158. border-left: none;
  159. }
  160. /* ═══════ RUBRIC ═══════ */
  161. .rubric-header {
  162. display: flex;
  163. gap: 48px;
  164. margin-bottom: 48px;
  165. }
  166. .rubric-stat {
  167. display: flex;
  168. align-items: baseline;
  169. gap: 12px;
  170. }
  171. .rubric-stat-num {
  172. font-size: 64px;
  173. font-weight: 900;
  174. line-height: 1;
  175. letter-spacing: -2px;
  176. }
  177. .rubric-stat-num--accent {
  178. color: var(--accent);
  179. }
  180. .rubric-stat-label {
  181. font-size: 13px;
  182. font-weight: 600;
  183. text-transform: uppercase;
  184. letter-spacing: 1.5px;
  185. color: var(--mid);
  186. }
  187. .rubric-table {
  188. width: 100%;
  189. border-collapse: collapse;
  190. margin-bottom: 40px;
  191. }
  192. .rubric-table caption {
  193. text-align: left;
  194. font-size: 11px;
  195. font-weight: 700;
  196. letter-spacing: 2.5px;
  197. text-transform: uppercase;
  198. color: var(--light);
  199. padding-bottom: 16px;
  200. }
  201. .rubric-table th {
  202. text-align: left;
  203. font-size: 11px;
  204. font-weight: 600;
  205. letter-spacing: 1.5px;
  206. text-transform: uppercase;
  207. color: var(--light);
  208. padding: 12px 16px 12px 0;
  209. border-bottom: 2px solid var(--black);
  210. }
  211. .rubric-table td {
  212. padding: 14px 16px 14px 0;
  213. border-bottom: 1px solid var(--border);
  214. font-size: 14px;
  215. vertical-align: top;
  216. }
  217. .rubric-table tr:last-child td {
  218. border-bottom: none;
  219. }
  220. .rubric-table .dim-num {
  221. font-weight: 700;
  222. color: var(--accent);
  223. font-variant-numeric: tabular-nums;
  224. width: 36px;
  225. }
  226. .rubric-table .dim-name {
  227. font-weight: 600;
  228. white-space: nowrap;
  229. }
  230. .rubric-table .dim-weight {
  231. font-weight: 800;
  232. font-size: 20px;
  233. font-variant-numeric: tabular-nums;
  234. text-align: center;
  235. width: 60px;
  236. color: var(--dark);
  237. }
  238. .rubric-table .dim-desc {
  239. color: var(--mid);
  240. line-height: 1.5;
  241. }
  242. /* ═══════ PHASES ═══════ */
  243. .phases {
  244. display: flex;
  245. flex-direction: column;
  246. gap: 0;
  247. }
  248. .phase {
  249. display: grid;
  250. grid-template-columns: 160px 1fr;
  251. gap: 40px;
  252. padding: 40px 0;
  253. border-top: 1px solid var(--border);
  254. }
  255. .phase:first-child {
  256. border-top: 1px solid var(--black);
  257. }
  258. .phase-id {
  259. font-size: 48px;
  260. font-weight: 900;
  261. color: var(--accent);
  262. line-height: 1;
  263. letter-spacing: -1px;
  264. }
  265. .phase-id span {
  266. display: block;
  267. font-size: 11px;
  268. font-weight: 600;
  269. letter-spacing: 2px;
  270. text-transform: uppercase;
  271. color: var(--light);
  272. margin-top: 8px;
  273. }
  274. .phase-body h3 {
  275. font-size: 22px;
  276. font-weight: 700;
  277. margin-bottom: 12px;
  278. letter-spacing: -0.3px;
  279. }
  280. .phase-body p {
  281. font-size: 14px;
  282. color: var(--mid);
  283. line-height: 1.6;
  284. margin-bottom: 16px;
  285. max-width: 560px;
  286. }
  287. .phase-steps {
  288. list-style: none;
  289. counter-reset: step;
  290. }
  291. .phase-steps li {
  292. counter-increment: step;
  293. padding: 8px 0 8px 32px;
  294. position: relative;
  295. font-size: 14px;
  296. line-height: 1.5;
  297. color: var(--dark);
  298. }
  299. .phase-steps li::before {
  300. content: counter(step);
  301. position: absolute;
  302. left: 0;
  303. font-size: 11px;
  304. font-weight: 700;
  305. color: var(--accent);
  306. width: 20px;
  307. height: 20px;
  308. display: flex;
  309. align-items: center;
  310. justify-content: center;
  311. top: 9px;
  312. }
  313. /* ═══════ RATCHET ═══════ */
  314. .ratchet-viz {
  315. display: flex;
  316. align-items: flex-end;
  317. gap: 0;
  318. padding: 48px 0;
  319. position: relative;
  320. }
  321. .ratchet-viz::before {
  322. content: '';
  323. position: absolute;
  324. bottom: 48px;
  325. left: 0;
  326. right: 0;
  327. height: 1px;
  328. background: var(--border);
  329. }
  330. .ratchet-step {
  331. flex: 1;
  332. display: flex;
  333. flex-direction: column;
  334. align-items: center;
  335. position: relative;
  336. }
  337. .ratchet-bar {
  338. width: 80px;
  339. background: var(--black);
  340. position: relative;
  341. z-index: 1;
  342. }
  343. .ratchet-bar--revert {
  344. background: none;
  345. border: 2px solid var(--border);
  346. }
  347. .ratchet-score {
  348. font-size: 36px;
  349. font-weight: 900;
  350. margin-bottom: 8px;
  351. letter-spacing: -1px;
  352. line-height: 1;
  353. }
  354. .ratchet-score--revert {
  355. color: var(--light);
  356. text-decoration: line-through;
  357. text-decoration-color: var(--accent);
  358. text-decoration-thickness: 2px;
  359. }
  360. .ratchet-label {
  361. font-size: 11px;
  362. font-weight: 700;
  363. letter-spacing: 1.5px;
  364. text-transform: uppercase;
  365. margin-top: 12px;
  366. padding: 4px 10px;
  367. }
  368. .ratchet-label--keep {
  369. background: var(--black);
  370. color: var(--white);
  371. }
  372. .ratchet-label--revert {
  373. background: none;
  374. border: 1px solid var(--accent);
  375. color: var(--accent);
  376. }
  377. .ratchet-label--baseline {
  378. background: var(--accent);
  379. color: var(--white);
  380. }
  381. .ratchet-arrow {
  382. position: absolute;
  383. top: 50%;
  384. right: -12px;
  385. width: 24px;
  386. height: 2px;
  387. background: var(--border);
  388. z-index: 2;
  389. }
  390. .ratchet-arrow::after {
  391. content: '';
  392. position: absolute;
  393. right: -1px;
  394. top: -4px;
  395. border: solid var(--border);
  396. border-width: 0 2px 2px 0;
  397. padding: 3px;
  398. transform: rotate(-45deg);
  399. }
  400. .ratchet-round {
  401. font-size: 12px;
  402. color: var(--light);
  403. margin-top: 8px;
  404. font-weight: 500;
  405. }
  406. /* ═══════ COMPARISON ═══════ */
  407. .comparison {
  408. display: grid;
  409. grid-template-columns: 1fr 1fr;
  410. gap: 0;
  411. }
  412. .comparison-col {
  413. padding: 40px;
  414. border: 1px solid var(--border);
  415. }
  416. .comparison-col:first-child {
  417. border-right: none;
  418. }
  419. .comparison-col--highlight {
  420. background: var(--black);
  421. color: var(--white);
  422. border-color: var(--black);
  423. }
  424. .comparison-tag {
  425. font-size: 11px;
  426. font-weight: 700;
  427. letter-spacing: 2px;
  428. text-transform: uppercase;
  429. margin-bottom: 16px;
  430. }
  431. .comparison-col:first-child .comparison-tag {
  432. color: var(--light);
  433. }
  434. .comparison-col--highlight .comparison-tag {
  435. color: var(--accent);
  436. }
  437. .comparison-col h3 {
  438. font-size: 24px;
  439. font-weight: 800;
  440. margin-bottom: 20px;
  441. letter-spacing: -0.5px;
  442. }
  443. .comparison-list {
  444. list-style: none;
  445. }
  446. .comparison-list li {
  447. padding: 10px 0;
  448. font-size: 14px;
  449. line-height: 1.5;
  450. border-bottom: 1px solid;
  451. }
  452. .comparison-col:first-child .comparison-list li {
  453. border-color: var(--border);
  454. color: var(--mid);
  455. }
  456. .comparison-col--highlight .comparison-list li {
  457. border-color: #333;
  458. color: #ccc;
  459. }
  460. .comparison-list li:last-child {
  461. border-bottom: none;
  462. }
  463. .comparison-list li strong {
  464. color: var(--black);
  465. }
  466. .comparison-col--highlight .comparison-list li strong {
  467. color: var(--white);
  468. }
  469. .check-icon {
  470. display: inline-block;
  471. width: 16px;
  472. height: 16px;
  473. margin-right: 8px;
  474. vertical-align: middle;
  475. position: relative;
  476. top: -1px;
  477. }
  478. /* ═══════ MAPPING TABLE ═══════ */
  479. .mapping-table {
  480. width: 100%;
  481. border-collapse: collapse;
  482. }
  483. .mapping-table th {
  484. text-align: left;
  485. font-size: 11px;
  486. font-weight: 700;
  487. letter-spacing: 2px;
  488. text-transform: uppercase;
  489. padding: 16px 24px 16px 0;
  490. border-bottom: 2px solid var(--black);
  491. }
  492. .mapping-table th:first-child {
  493. color: var(--light);
  494. }
  495. .mapping-table th:nth-child(2) {
  496. color: var(--accent);
  497. }
  498. .mapping-table th:last-child {
  499. color: var(--light);
  500. }
  501. .mapping-table td {
  502. padding: 16px 24px 16px 0;
  503. border-bottom: 1px solid var(--border);
  504. font-size: 14px;
  505. vertical-align: top;
  506. }
  507. .mapping-table td:first-child {
  508. font-weight: 600;
  509. color: var(--dark);
  510. white-space: nowrap;
  511. }
  512. .mapping-table td:nth-child(2) {
  513. font-weight: 600;
  514. color: var(--black);
  515. }
  516. .mapping-table td:last-child {
  517. color: var(--mid);
  518. line-height: 1.5;
  519. }
  520. .mapping-arrow {
  521. display: inline-block;
  522. color: var(--accent);
  523. font-weight: 400;
  524. margin: 0 4px;
  525. }
  526. /* ═══════ FOOTER ═══════ */
  527. .footer {
  528. padding: 48px 0;
  529. border-top: 1px solid var(--black);
  530. display: flex;
  531. justify-content: space-between;
  532. align-items: center;
  533. }
  534. .footer-left {
  535. font-size: 12px;
  536. font-weight: 600;
  537. letter-spacing: 1px;
  538. text-transform: uppercase;
  539. color: var(--light);
  540. }
  541. .footer-right {
  542. font-size: 12px;
  543. color: var(--light);
  544. }
  545. /* ═══════ RESPONSIVE ═══════ */
  546. @media (max-width: 768px) {
  547. .container { padding: 0 24px; }
  548. .hero { padding: 64px 0 48px; }
  549. .hero h1 { font-size: 48px; letter-spacing: -1.5px; }
  550. .hero-subtitle { font-size: 17px; }
  551. .section { padding: 48px 0; }
  552. .section-title { font-size: 32px; }
  553. .principles-grid { grid-template-columns: 1fr; }
  554. .principle:nth-child(even) { padding-left: 0; border-left: none; }
  555. .principle:nth-child(2) { border-top: 1px solid var(--border); }
  556. .phase { grid-template-columns: 1fr; gap: 16px; }
  557. .comparison { grid-template-columns: 1fr; }
  558. .comparison-col:first-child { border-right: 1px solid var(--border); border-bottom: none; }
  559. .ratchet-viz { flex-wrap: wrap; gap: 24px; }
  560. .ratchet-step { flex: none; width: calc(33% - 16px); }
  561. .rubric-stat-num { font-size: 48px; }
  562. .mapping-table td:first-child { white-space: normal; }
  563. }
  564. </style>
  565. </head>
  566. <body>
  567. <!-- ═══════════════════════════ HERO ═══════════════════════════ -->
  568. <div class="container">
  569. <section class="hero">
  570. <div class="hero-label">自主技能优化系统</div>
  571. <h1>Auto Skill<br>Optimizer</h1>
  572. <p class="hero-subtitle">
  573. <strong>评估</strong> &rarr; <strong>改进</strong> &rarr; <strong>实测验证</strong> &rarr; <strong>人类确认</strong> &rarr; <strong>保留或回滚</strong>
  574. </p>
  575. <div class="hero-quote">
  576. <p>「autoresearch 的核心想法很简单:让系统自主运行实验,评估结果,只保留有效的改进。一个只能向前转的棘轮。」</p>
  577. <cite>Andrej Karpathy &mdash; 谈自主实验循环</cite>
  578. </div>
  579. </section>
  580. </div>
  581. <!-- ═══════════════════════════ 01 PRINCIPLES ═══════════════════════════ -->
  582. <div class="container">
  583. <section class="section">
  584. <div class="section-num">01</div>
  585. <h2 class="section-title">核心原则</h2>
  586. <p class="section-lead">五条规则,防止优化器偏移方向、自我刷分或引入退化。</p>
  587. <div class="principles-grid">
  588. <div class="principle">
  589. <div class="principle-num">01</div>
  590. <h3>单一可编辑资产</h3>
  591. <p>每轮优化只针对一个 SKILL.md 文件。一次修改,一次测量,一次决策。不做跨文件编辑,避免归因模糊。</p>
  592. </div>
  593. <div class="principle">
  594. <div class="principle-num">02</div>
  595. <h3>双重评估</h3>
  596. <p>静态结构分析捕捉格式和完整性问题。实测执行捕捉行为退化。两者缺一不可。</p>
  597. </div>
  598. <div class="principle">
  599. <div class="principle-num">03</div>
  600. <h3>棘轮机制</h3>
  601. <p>提升总分的改进被 commit。降低分数的修改自动 revert。分数只能上升或持平,永远不会下降。</p>
  602. </div>
  603. <div class="principle">
  604. <div class="principle-num">04</div>
  605. <h3>独立评分</h3>
  606. <p>编辑 Skill 的 Agent 永远不为自己打分。由独立的子 Agent 评估输出质量,防止自我表扬偏差。</p>
  607. </div>
  608. <div class="principle principle--full">
  609. <div class="principle-num">05</div>
  610. <h3>人在回路</h3>
  611. <p>每个 Skill 的优化循环完成后,系统暂停。向人类展示 diff 摘要、分数变化和测试输出对比。没有明确确认,任何改动都不会生效。</p>
  612. </div>
  613. </div>
  614. </section>
  615. </div>
  616. <!-- ═══════════════════════════ 02 RUBRIC ═══════════════════════════ -->
  617. <div class="container">
  618. <section class="section">
  619. <div class="section-num">02</div>
  620. <h2 class="section-title">8维度<br>评估体系</h2>
  621. <p class="section-lead">100分评估体系。结构维度捕捉你能看到的问题,效果维度捕捉只有运行时才能感知的问题。</p>
  622. <div class="rubric-header">
  623. <div class="rubric-stat">
  624. <div class="rubric-stat-num">60</div>
  625. <div class="rubric-stat-label">结构<br>分值</div>
  626. </div>
  627. <div class="rubric-stat">
  628. <div class="rubric-stat-num rubric-stat-num--accent">40</div>
  629. <div class="rubric-stat-label">效果<br>分值</div>
  630. </div>
  631. </div>
  632. <table class="rubric-table">
  633. <caption>结构维度 &mdash; 静态分析</caption>
  634. <thead>
  635. <tr>
  636. <th style="width:36px">#</th>
  637. <th style="width:180px">维度</th>
  638. <th style="width:60px">权重</th>
  639. <th>评分标准</th>
  640. </tr>
  641. </thead>
  642. <tbody>
  643. <tr>
  644. <td class="dim-num">1</td>
  645. <td class="dim-name">Frontmatter质量</td>
  646. <td class="dim-weight">8</td>
  647. <td class="dim-desc">名称正确,描述包含功能/触发条件/使用场景,不超过1024字符</td>
  648. </tr>
  649. <tr>
  650. <td class="dim-num">2</td>
  651. <td class="dim-name">工作流清晰度</td>
  652. <td class="dim-weight">15</td>
  653. <td class="dim-desc">步骤有编号、可执行,每步都有明确的输入/输出</td>
  654. </tr>
  655. <tr>
  656. <td class="dim-num">3</td>
  657. <td class="dim-name">边界条件覆盖</td>
  658. <td class="dim-weight">10</td>
  659. <td class="dim-desc">错误处理、降级方案、常见故障恢复</td>
  660. </tr>
  661. <tr>
  662. <td class="dim-num">4</td>
  663. <td class="dim-name">检查点设计</td>
  664. <td class="dim-weight">7</td>
  665. <td class="dim-desc">关键决策前需用户确认,防止自主失控</td>
  666. </tr>
  667. <tr>
  668. <td class="dim-num">5</td>
  669. <td class="dim-name">指令具体性</td>
  670. <td class="dim-weight">15</td>
  671. <td class="dim-desc">无歧义,具体的参数/格式/示例,可直接执行</td>
  672. </tr>
  673. <tr>
  674. <td class="dim-num">6</td>
  675. <td class="dim-name">资源整合度</td>
  676. <td class="dim-weight">5</td>
  677. <td class="dim-desc">所有引用的脚本/资产路径存在且可访问</td>
  678. </tr>
  679. </tbody>
  680. </table>
  681. <table class="rubric-table">
  682. <caption>效果维度 &mdash; 需要实测</caption>
  683. <thead>
  684. <tr>
  685. <th style="width:36px">#</th>
  686. <th style="width:180px">维度</th>
  687. <th style="width:60px">权重</th>
  688. <th>评分标准</th>
  689. </tr>
  690. </thead>
  691. <tbody>
  692. <tr>
  693. <td class="dim-num">7</td>
  694. <td class="dim-name">整体架构</td>
  695. <td class="dim-weight">15</td>
  696. <td class="dim-desc">层次清晰,无冗余或遗漏,符合生态系统约定</td>
  697. </tr>
  698. <tr>
  699. <td class="dim-num">8</td>
  700. <td class="dim-name">实测表现</td>
  701. <td class="dim-weight">25</td>
  702. <td class="dim-desc">运行2-3个测试提示词,对比启用 Skill 和 baseline 的输出质量</td>
  703. </tr>
  704. </tbody>
  705. </table>
  706. </section>
  707. </div>
  708. <!-- ═══════════════════════════ 03 PHASES ═══════════════════════════ -->
  709. <div class="container">
  710. <section class="section">
  711. <div class="section-num">03</div>
  712. <h2 class="section-title">优化循环</h2>
  713. <p class="section-lead">从初始化到最终报告的五个阶段。系统在每个阶段内自主运行,但在阶段之间暂停等待人类审查。</p>
  714. <div class="phases">
  715. <div class="phase">
  716. <div class="phase-id">
  717. 0
  718. <span>初始化</span>
  719. </div>
  720. <div class="phase-body">
  721. <h3>范围与分支设置</h3>
  722. <p>确定优化范围,创建版本控制基础设施,加载历史记录。</p>
  723. <ol class="phase-steps">
  724. <li>确认范围:全部 Skill 还是用户指定子集</li>
  725. <li>扫描 .claude/skills/*/SKILL.md 获取目标列表</li>
  726. <li>创建 git 分支:auto-optimize/YYYYMMDD-HHMM</li>
  727. <li>初始化或加载 results.tsv 用于历史追踪</li>
  728. </ol>
  729. </div>
  730. </div>
  731. <div class="phase">
  732. <div class="phase-id">
  733. 0.5
  734. <span>设计</span>
  735. </div>
  736. <div class="phase-body">
  737. <h3>测试提示词工程</h3>
  738. <p>在任何评分之前,先设计用于衡量效果的测试提示词。没有好的测试,优化器就是盲飞。</p>
  739. <ol class="phase-steps">
  740. <li>阅读每个 SKILL.md,理解其声明的能力</li>
  741. <li>为每个 Skill 设计2-3个提示词:一个正常路径,一个模糊场景</li>
  742. <li>保存到每个 Skill 目录下的 test-prompts.json</li>
  743. <li>在继续之前,将所有测试提示词提交人类审批</li>
  744. </ol>
  745. </div>
  746. </div>
  747. <div class="phase">
  748. <div class="phase-id">
  749. 1
  750. <span>基线</span>
  751. </div>
  752. <div class="phase-body">
  753. <h3>全维度评分</h3>
  754. <p>为每个 Skill 建立起始分数。结构评分由主 Agent 完成,效果评分由独立子 Agent 完成。</p>
  755. <ol class="phase-steps">
  756. <li>阅读 SKILL.md,为维度1-7评分并附理由</li>
  757. <li>启动子 Agent:分别在启用和未启用 Skill 的情况下运行测试提示词</li>
  758. <li>对比输出,为维度8评分(如子 Agent 不可用则标记 dry_run)</li>
  759. <li>计算加权总分,记录到 results.tsv</li>
  760. <li>展示评分卡,暂停等待人类确认</li>
  761. </ol>
  762. </div>
  763. </div>
  764. <div class="phase">
  765. <div class="phase-id">
  766. 2
  767. <span>优化</span>
  768. </div>
  769. <div class="phase-body">
  770. <h3>Hill-Climbing 循环</h3>
  771. <p>按分数从低到高处理 Skill。每轮:诊断最弱维度,提出一个针对性修复,执行,重新评分,做出决定。</p>
  772. <ol class="phase-steps">
  773. <li>找出该 Skill 得分最低的维度</li>
  774. <li>生成一项具体改进(改什么,为什么改,预期分数变化)</li>
  775. <li>编辑 SKILL.md,用结构化消息 git commit</li>
  776. <li>重新评分:结构由主 Agent,效果由独立子 Agent</li>
  777. <li>新分 > 旧分:保留。否则:git revert,进入下一个 Skill</li>
  778. <li>每个 Skill 完成后:展示 diff + 分数变化,等待人类确认</li>
  779. </ol>
  780. </div>
  781. </div>
  782. <div class="phase">
  783. <div class="phase-id">
  784. 3
  785. <span>报告</span>
  786. </div>
  787. <div class="phase-body">
  788. <h3>总结与指标</h3>
  789. <p>将所有结果汇总为最终优化报告,包含优化前后分数、实验次数和关键改进。</p>
  790. <ol class="phase-steps">
  791. <li>统计总实验次数、保留次数、回滚次数和测试模式</li>
  792. <li>生成每个 Skill 的优化前后分数对比表</li>
  793. <li>列出影响最大的改进及其对应维度</li>
  794. <li>归档 results.tsv 供未来 baseline 参考</li>
  795. </ol>
  796. </div>
  797. </div>
  798. </div>
  799. </section>
  800. </div>
  801. <!-- ═══════════════════════════ 04 RATCHET ═══════════════════════════ -->
  802. <div class="container">
  803. <section class="section">
  804. <div class="section-num">04</div>
  805. <h2 class="section-title">棘轮机制</h2>
  806. <p class="section-lead">分数只能上升。每轮要么改进 Skill,要么干净地回滚。不会随时间积累局部退化。</p>
  807. <div class="ratchet-viz">
  808. <div class="ratchet-step">
  809. <div class="ratchet-score">72</div>
  810. <div style="height:144px" class="ratchet-bar"></div>
  811. <div class="ratchet-label ratchet-label--baseline">基线</div>
  812. <div class="ratchet-round">轮次 0</div>
  813. <div class="ratchet-arrow"></div>
  814. </div>
  815. <div class="ratchet-step">
  816. <div class="ratchet-score">78</div>
  817. <div style="height:156px" class="ratchet-bar"></div>
  818. <div class="ratchet-label ratchet-label--keep">保留</div>
  819. <div class="ratchet-round">轮次 1</div>
  820. <div class="ratchet-arrow"></div>
  821. </div>
  822. <div class="ratchet-step">
  823. <div class="ratchet-score ratchet-score--revert">75</div>
  824. <div style="height:150px" class="ratchet-bar ratchet-bar--revert"></div>
  825. <div class="ratchet-label ratchet-label--revert">回滚</div>
  826. <div class="ratchet-round">轮次 2</div>
  827. <div class="ratchet-arrow"></div>
  828. </div>
  829. <div class="ratchet-step">
  830. <div class="ratchet-score">84</div>
  831. <div style="height:168px" class="ratchet-bar"></div>
  832. <div class="ratchet-label ratchet-label--keep">Keep</div>
  833. <div class="ratchet-round">轮次 3</div>
  834. <div class="ratchet-arrow"></div>
  835. </div>
  836. <div class="ratchet-step">
  837. <div class="ratchet-score">87</div>
  838. <div style="height:174px" class="ratchet-bar"></div>
  839. <div class="ratchet-label ratchet-label--keep">Keep</div>
  840. <div class="ratchet-round">轮次 4</div>
  841. </div>
  842. </div>
  843. </section>
  844. </div>
  845. <!-- ═══════════════════════════ 05 COMPARISON ═══════════════════════════ -->
  846. <div class="container">
  847. <section class="section">
  848. <div class="section-num">05</div>
  849. <h2 class="section-title">为什么需要<br>双重评估</h2>
  850. <p class="section-lead">单看结构无法判断 Skill 是否真正好用。单看效果无法判断它为何失败。</p>
  851. <div class="comparison">
  852. <div class="comparison-col">
  853. <div class="comparison-tag">传统方法</div>
  854. <h3>纯结构审查</h3>
  855. <ul class="comparison-list">
  856. <li>检查 frontmatter 是否存在且格式正确</li>
  857. <li>验证步骤是否有编号和描述</li>
  858. <li>确认文件路径和引用是否有效</li>
  859. <li>无法检测 Skill 是否<strong>真正提升了</strong>输出质量</li>
  860. <li>无法检测<strong>看似正确</strong>实则产生差结果的误导性指令</li>
  861. <li>无法检测<strong>弊大于利</strong>的过度约束</li>
  862. </ul>
  863. </div>
  864. <div class="comparison-col comparison-col--highlight">
  865. <div class="comparison-tag">Auto Skill Optimizer</div>
  866. <h3>双重评估</h3>
  867. <ul class="comparison-list">
  868. <li><strong>结构评分</strong>捕捉格式、完整性和可读性问题</li>
  869. <li><strong>实测执行</strong>揭示真实场景下的行为影响</li>
  870. <li><strong>基线对比</strong>衡量 Skill 是增值还是减值</li>
  871. <li><strong>独立子 Agent</strong>防止自我表扬的评分偏差</li>
  872. <li><strong>测试提示词设计</strong>确保评估针对真实用户场景</li>
  873. <li><strong>Dry-run 降级</strong>在实测不可用时提供覆盖</li>
  874. </ul>
  875. </div>
  876. </div>
  877. </section>
  878. </div>
  879. <!-- ═══════════════════════════ 06 MAPPING ═══════════════════════════ -->
  880. <div class="container">
  881. <section class="section">
  882. <div class="section-num">06</div>
  883. <h2 class="section-title">概念映射</h2>
  884. <p class="section-lead">autoresearch 的核心抽象如何转化为 Skill 优化。同一台机器,不同的领域。</p>
  885. <table class="mapping-table">
  886. <thead>
  887. <tr>
  888. <th style="width:220px">Autoresearch</th>
  889. <th style="width:220px">Skill Optimizer</th>
  890. <th>实现细节</th>
  891. </tr>
  892. </thead>
  893. <tbody>
  894. <tr>
  895. <td>研究论文草稿</td>
  896. <td>SKILL.md 文件</td>
  897. <td>唯一的可编辑产物。所有改进都表现为对这一个文件的编辑。</td>
  898. </tr>
  899. <tr>
  900. <td>评估指标</td>
  901. <td>8维度评估体系</td>
  902. <td>跨结构(60分)和效果(40分)的加权评分,总计100分。</td>
  903. </tr>
  904. <tr>
  905. <td>实验循环</td>
  906. <td>阶段2 hill-climbing</td>
  907. <td>诊断最弱维度,提出修复,执行,重新评分,保留或回滚。每个 Skill 最多3轮。</td>
  908. </tr>
  909. <tr>
  910. <td>版本控制</td>
  911. <td>Git 分支 + revert</td>
  912. <td>每次编辑都是一次 commit。退化通过 revert(新 commit)回滚。完整审计记录。</td>
  913. </tr>
  914. <tr>
  915. <td>自动化评估</td>
  916. <td>子 Agent 测试执行</td>
  917. <td>独立 Agent 分别在启用和未启用 Skill 的情况下运行测试提示词,对比输出质量。</td>
  918. </tr>
  919. <tr>
  920. <td>人类审查关卡</td>
  921. <td>阶段转换暂停</td>
  922. <td>系统在基线评分后和每个 Skill 优化后暂停。展示 diff + 分数变化。</td>
  923. </tr>
  924. <tr>
  925. <td>探索 vs 利用</td>
  926. <td>阶段2.5探索性重写</td>
  927. <td>当 hill-climbing 停滞(连续2次在第1轮就中断),提出完整的结构重写。</td>
  928. </tr>
  929. <tr>
  930. <td>实验日志</td>
  931. <td>results.tsv</td>
  932. <td>带时间戳的记录:commit 哈希、Skill 名称、新旧分数、保留/回滚状态、评估模式。</td>
  933. </tr>
  934. </tbody>
  935. </table>
  936. </section>
  937. </div>
  938. <!-- ═══════════════════════════ FOOTER ═══════════════════════════ -->
  939. <div class="container">
  940. <footer class="footer">
  941. <div class="footer-left">Auto Skill Optimizer</div>
  942. <div class="footer-right">灵感源自 Karpathy autoresearch &mdash; 为 Claude Code Skill 生态而建</div>
  943. </footer>
  944. </div>
  945. </body>
  946. </html>