fix: remove hardcoded author emails from throughput script

Replace the hardcoded GARRY_EMAILS constant with --email CLI flags
(repeatable), a GSTACK_AUTHOR_EMAILS env var, and a git config user.email
fallback. Same behavior, no PII checked in.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-04-18 15:36:50 +08:00
parent 0a803f9e81
commit 4d2c8d94d0

View File

@@ -1,17 +1,18 @@
#!/usr/bin/env bun #!/usr/bin/env bun
/** /**
* Garry's 2013 vs 2026 output throughput comparison. * 2013 vs 2026 output throughput comparison.
* *
* Rationale: the README hero used to brag "600,000+ lines of production code" as * Rationale: the README hero used to brag "600,000+ lines of production code" as
* a proxy for productivity. After Louise de Sadeleer's review * a proxy for productivity. After Louise de Sadeleer's review
* (https://x.com/LouiseDSadeleer/status/2045139351227478199) called out LOC as * (https://x.com/LouiseDSadeleer/status/2045139351227478199) called out LOC as
* a vanity metric when AI writes most of the code, we replaced it with a real * a vanity metric when AI writes most of the code, we replaced it with a real
* pro-rata multiple on logical code change: non-blank, non-comment lines added * pro-rata multiple on logical code change: non-blank, non-comment lines added
* across Garry-authored commits in public repos, computed for 2013 and 2026. * across authored commits in public repos, computed for 2013 and 2026.
* *
* Algorithm (per Codex Pass 2 review in PLAN_TUNING_V1): * Algorithm (per Codex Pass 2 review in PLAN_TUNING_V1):
* 1. For each year (2013, 2026), enumerate authored commits on public * 1. For each year (2013, 2026), enumerate authored commits. Author filter
* garrytan/* repos. Email filter: garry@ycombinator.com + known aliases. * comes from --email CLI flags (repeatable), the GSTACK_AUTHOR_EMAILS env
* var (comma-separated), or falls back to `git config user.email`.
* 2. For each commit, git diff <commit>^ <commit> produces a unified diff. * 2. For each commit, git diff <commit>^ <commit> produces a unified diff.
* 3. Extract ADDED lines from the diff. Classify as "logical" by filtering * 3. Extract ADDED lines from the diff. Classify as "logical" by filtering
* out blank lines + single-line comments (per-language regex; imperfect * out blank lines + single-line comments (per-language regex; imperfect
@@ -21,20 +22,45 @@
* private work exclusion. * private work exclusion.
* *
* Requires: scc (for classification when available; falls back to regex). * Requires: scc (for classification when available; falls back to regex).
* Run: bun run scripts/garry-output-comparison.ts [--repo-root <path>] * Run: bun run scripts/garry-output-comparison.ts [--repo-root <path>] [--email <addr>...]
* GSTACK_AUTHOR_EMAILS=a@x.com,b@y.com bun run scripts/garry-output-comparison.ts
* Output: docs/throughput-2013-vs-2026.json * Output: docs/throughput-2013-vs-2026.json
*/ */
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path'; import * as path from 'path';
import { execSync } from 'child_process'; import { execSync } from 'child_process';
// Known historical email aliases for Garry. Add more via PR if needed. function resolveAuthorEmails(argv: string[]): string[] {
const GARRY_EMAILS = [ const fromArgs: string[] = [];
'garry@ycombinator.com', for (let i = 0; i < argv.length; i++) {
'garry@posterous.com', if (argv[i] === '--email' && argv[i + 1]) {
'garrytan@gmail.com', fromArgs.push(argv[i + 1]);
'garry@garrytan.com', i++;
]; }
}
if (fromArgs.length > 0) return fromArgs;
const envVar = process.env.GSTACK_AUTHOR_EMAILS;
if (envVar && envVar.trim()) {
return envVar.split(',').map(s => s.trim()).filter(Boolean);
}
try {
const gitEmail = execSync('git config user.email', {
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'ignore'],
}).trim();
if (gitEmail) return [gitEmail];
} catch {
// fall through
}
process.stderr.write(
'No author email configured. Pass --email <addr> (repeatable), ' +
'set GSTACK_AUTHOR_EMAILS=a@x.com,b@y.com, or configure git user.email.\n'
);
process.exit(1);
}
const TARGET_YEARS = [2013, 2026]; const TARGET_YEARS = [2013, 2026];
@@ -139,10 +165,10 @@ function isLogicalLine(line: string): boolean {
return true; return true;
} }
function enumerateCommits(year: number, repoPath: string): string[] { function enumerateCommits(year: number, repoPath: string, authorEmails: string[]): string[] {
const since = `${year}-01-01`; const since = `${year}-01-01`;
const until = `${year}-12-31`; const until = `${year}-12-31`;
const authorFlags = GARRY_EMAILS.map(e => `--author=${e}`).join(' '); const authorFlags = authorEmails.map(e => `--author=${e}`).join(' ');
try { try {
const cmd = `git -C "${repoPath}" log --since=${since} --until=${until} ${authorFlags} --pretty=format:'%H' 2>/dev/null`; const cmd = `git -C "${repoPath}" log --since=${since} --until=${until} ${authorFlags} --pretty=format:'%H' 2>/dev/null`;
const out = execSync(cmd, { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'ignore'] }); const out = execSync(cmd, { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'ignore'] });
@@ -217,8 +243,8 @@ function daysElapsed(year: number, now: Date = new Date()): number {
return Math.max(1, Math.floor(diffMs / (24 * 60 * 60 * 1000)) + 1); return Math.max(1, Math.floor(diffMs / (24 * 60 * 60 * 1000)) + 1);
} }
function analyzeRepo(repoPath: string, year: number, sccAvailable: boolean, now: Date = new Date()): PerYearResult { function analyzeRepo(repoPath: string, year: number, authorEmails: string[], sccAvailable: boolean, now: Date = new Date()): PerYearResult {
const commits = enumerateCommits(year, repoPath); const commits = enumerateCommits(year, repoPath, authorEmails);
const perLang: Record<string, { commits: number; logical_added: number }> = {}; const perLang: Record<string, { commits: number; logical_added: number }> = {};
let rawTotal = 0; let rawTotal = 0;
let logicalTotal = 0; let logicalTotal = 0;
@@ -312,10 +338,12 @@ function main() {
process.stderr.write('Continuing with regex-based logical-line classification (an approximation).\n\n'); process.stderr.write('Continuing with regex-based logical-line classification (an approximation).\n\n');
} }
const authorEmails = resolveAuthorEmails(args);
// For V1, we analyze the single repo at repoRoot. Future work: enumerate // For V1, we analyze the single repo at repoRoot. Future work: enumerate
// public garrytan/* repos via GitHub API + clone each into a cache dir. // public repos via GitHub API + clone each into a cache dir.
const now = new Date(); const now = new Date();
const years = TARGET_YEARS.map(y => analyzeRepo(repoRoot, y, sccAvailable, now)); const years = TARGET_YEARS.map(y => analyzeRepo(repoRoot, y, authorEmails, sccAvailable, now));
const y2013 = years.find(y => y.year === 2013); const y2013 = years.find(y => y.year === 2013);
const y2026 = years.find(y => y.year === 2026); const y2026 = years.find(y => y.year === 2026);
@@ -371,8 +399,8 @@ function main() {
sccAvailable sccAvailable
? 'Logical-line classification uses scc-aware regex (approximate).' ? 'Logical-line classification uses scc-aware regex (approximate).'
: 'Logical-line classification uses a crude regex fallback (scc not installed). Exclude blank lines + single-line comments; does not catch block comments or docstrings. Approximate.', : 'Logical-line classification uses a crude regex fallback (scc not installed). Exclude blank lines + single-line comments; does not catch block comments or docstrings. Approximate.',
'This script analyzes a single repo at a time. Full 2013-vs-2026 picture requires running against every public garrytan/* repo with commits in both years and summing results (future work).', 'This script analyzes a single repo at a time. Full 2013-vs-2026 picture requires running against every public repo with commits in both years and summing results (future work).',
'Authorship attribution relies on commit email matching. Historical aliases are listed in GARRY_EMAILS at the top of this script.', 'Authorship attribution relies on commit email matching. Supply historical aliases via --email flags or GSTACK_AUTHOR_EMAILS.',
], ],
version: 1, version: 1,
}; };