mirror of
https://github.com/garrytan/gstack.git
synced 2026-05-16 09:12:13 +08:00
fix: remove hardcoded author emails from throughput script
Replace the hardcoded GARRY_EMAILS constant with --email CLI flags (repeatable), a GSTACK_AUTHOR_EMAILS env var, and a git config user.email fallback. Same behavior, no PII checked in. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,17 +1,18 @@
|
|||||||
#!/usr/bin/env bun
|
#!/usr/bin/env bun
|
||||||
/**
|
/**
|
||||||
* Garry's 2013 vs 2026 output throughput comparison.
|
* 2013 vs 2026 output throughput comparison.
|
||||||
*
|
*
|
||||||
* Rationale: the README hero used to brag "600,000+ lines of production code" as
|
* Rationale: the README hero used to brag "600,000+ lines of production code" as
|
||||||
* a proxy for productivity. After Louise de Sadeleer's review
|
* a proxy for productivity. After Louise de Sadeleer's review
|
||||||
* (https://x.com/LouiseDSadeleer/status/2045139351227478199) called out LOC as
|
* (https://x.com/LouiseDSadeleer/status/2045139351227478199) called out LOC as
|
||||||
* a vanity metric when AI writes most of the code, we replaced it with a real
|
* a vanity metric when AI writes most of the code, we replaced it with a real
|
||||||
* pro-rata multiple on logical code change: non-blank, non-comment lines added
|
* pro-rata multiple on logical code change: non-blank, non-comment lines added
|
||||||
* across Garry-authored commits in public repos, computed for 2013 and 2026.
|
* across authored commits in public repos, computed for 2013 and 2026.
|
||||||
*
|
*
|
||||||
* Algorithm (per Codex Pass 2 review in PLAN_TUNING_V1):
|
* Algorithm (per Codex Pass 2 review in PLAN_TUNING_V1):
|
||||||
* 1. For each year (2013, 2026), enumerate authored commits on public
|
* 1. For each year (2013, 2026), enumerate authored commits. Author filter
|
||||||
* garrytan/* repos. Email filter: garry@ycombinator.com + known aliases.
|
* comes from --email CLI flags (repeatable), the GSTACK_AUTHOR_EMAILS env
|
||||||
|
* var (comma-separated), or falls back to `git config user.email`.
|
||||||
* 2. For each commit, git diff <commit>^ <commit> produces a unified diff.
|
* 2. For each commit, git diff <commit>^ <commit> produces a unified diff.
|
||||||
* 3. Extract ADDED lines from the diff. Classify as "logical" by filtering
|
* 3. Extract ADDED lines from the diff. Classify as "logical" by filtering
|
||||||
* out blank lines + single-line comments (per-language regex; imperfect
|
* out blank lines + single-line comments (per-language regex; imperfect
|
||||||
@@ -21,20 +22,45 @@
|
|||||||
* private work exclusion.
|
* private work exclusion.
|
||||||
*
|
*
|
||||||
* Requires: scc (for classification when available; falls back to regex).
|
* Requires: scc (for classification when available; falls back to regex).
|
||||||
* Run: bun run scripts/garry-output-comparison.ts [--repo-root <path>]
|
* Run: bun run scripts/garry-output-comparison.ts [--repo-root <path>] [--email <addr>...]
|
||||||
|
* GSTACK_AUTHOR_EMAILS=a@x.com,b@y.com bun run scripts/garry-output-comparison.ts
|
||||||
* Output: docs/throughput-2013-vs-2026.json
|
* Output: docs/throughput-2013-vs-2026.json
|
||||||
*/
|
*/
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { execSync } from 'child_process';
|
import { execSync } from 'child_process';
|
||||||
|
|
||||||
// Known historical email aliases for Garry. Add more via PR if needed.
|
function resolveAuthorEmails(argv: string[]): string[] {
|
||||||
const GARRY_EMAILS = [
|
const fromArgs: string[] = [];
|
||||||
'garry@ycombinator.com',
|
for (let i = 0; i < argv.length; i++) {
|
||||||
'garry@posterous.com',
|
if (argv[i] === '--email' && argv[i + 1]) {
|
||||||
'garrytan@gmail.com',
|
fromArgs.push(argv[i + 1]);
|
||||||
'garry@garrytan.com',
|
i++;
|
||||||
];
|
}
|
||||||
|
}
|
||||||
|
if (fromArgs.length > 0) return fromArgs;
|
||||||
|
|
||||||
|
const envVar = process.env.GSTACK_AUTHOR_EMAILS;
|
||||||
|
if (envVar && envVar.trim()) {
|
||||||
|
return envVar.split(',').map(s => s.trim()).filter(Boolean);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const gitEmail = execSync('git config user.email', {
|
||||||
|
encoding: 'utf-8',
|
||||||
|
stdio: ['ignore', 'pipe', 'ignore'],
|
||||||
|
}).trim();
|
||||||
|
if (gitEmail) return [gitEmail];
|
||||||
|
} catch {
|
||||||
|
// fall through
|
||||||
|
}
|
||||||
|
|
||||||
|
process.stderr.write(
|
||||||
|
'No author email configured. Pass --email <addr> (repeatable), ' +
|
||||||
|
'set GSTACK_AUTHOR_EMAILS=a@x.com,b@y.com, or configure git user.email.\n'
|
||||||
|
);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
const TARGET_YEARS = [2013, 2026];
|
const TARGET_YEARS = [2013, 2026];
|
||||||
|
|
||||||
@@ -139,10 +165,10 @@ function isLogicalLine(line: string): boolean {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function enumerateCommits(year: number, repoPath: string): string[] {
|
function enumerateCommits(year: number, repoPath: string, authorEmails: string[]): string[] {
|
||||||
const since = `${year}-01-01`;
|
const since = `${year}-01-01`;
|
||||||
const until = `${year}-12-31`;
|
const until = `${year}-12-31`;
|
||||||
const authorFlags = GARRY_EMAILS.map(e => `--author=${e}`).join(' ');
|
const authorFlags = authorEmails.map(e => `--author=${e}`).join(' ');
|
||||||
try {
|
try {
|
||||||
const cmd = `git -C "${repoPath}" log --since=${since} --until=${until} ${authorFlags} --pretty=format:'%H' 2>/dev/null`;
|
const cmd = `git -C "${repoPath}" log --since=${since} --until=${until} ${authorFlags} --pretty=format:'%H' 2>/dev/null`;
|
||||||
const out = execSync(cmd, { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'ignore'] });
|
const out = execSync(cmd, { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'ignore'] });
|
||||||
@@ -217,8 +243,8 @@ function daysElapsed(year: number, now: Date = new Date()): number {
|
|||||||
return Math.max(1, Math.floor(diffMs / (24 * 60 * 60 * 1000)) + 1);
|
return Math.max(1, Math.floor(diffMs / (24 * 60 * 60 * 1000)) + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
function analyzeRepo(repoPath: string, year: number, sccAvailable: boolean, now: Date = new Date()): PerYearResult {
|
function analyzeRepo(repoPath: string, year: number, authorEmails: string[], sccAvailable: boolean, now: Date = new Date()): PerYearResult {
|
||||||
const commits = enumerateCommits(year, repoPath);
|
const commits = enumerateCommits(year, repoPath, authorEmails);
|
||||||
const perLang: Record<string, { commits: number; logical_added: number }> = {};
|
const perLang: Record<string, { commits: number; logical_added: number }> = {};
|
||||||
let rawTotal = 0;
|
let rawTotal = 0;
|
||||||
let logicalTotal = 0;
|
let logicalTotal = 0;
|
||||||
@@ -312,10 +338,12 @@ function main() {
|
|||||||
process.stderr.write('Continuing with regex-based logical-line classification (an approximation).\n\n');
|
process.stderr.write('Continuing with regex-based logical-line classification (an approximation).\n\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const authorEmails = resolveAuthorEmails(args);
|
||||||
|
|
||||||
// For V1, we analyze the single repo at repoRoot. Future work: enumerate
|
// For V1, we analyze the single repo at repoRoot. Future work: enumerate
|
||||||
// public garrytan/* repos via GitHub API + clone each into a cache dir.
|
// public repos via GitHub API + clone each into a cache dir.
|
||||||
const now = new Date();
|
const now = new Date();
|
||||||
const years = TARGET_YEARS.map(y => analyzeRepo(repoRoot, y, sccAvailable, now));
|
const years = TARGET_YEARS.map(y => analyzeRepo(repoRoot, y, authorEmails, sccAvailable, now));
|
||||||
|
|
||||||
const y2013 = years.find(y => y.year === 2013);
|
const y2013 = years.find(y => y.year === 2013);
|
||||||
const y2026 = years.find(y => y.year === 2026);
|
const y2026 = years.find(y => y.year === 2026);
|
||||||
@@ -371,8 +399,8 @@ function main() {
|
|||||||
sccAvailable
|
sccAvailable
|
||||||
? 'Logical-line classification uses scc-aware regex (approximate).'
|
? 'Logical-line classification uses scc-aware regex (approximate).'
|
||||||
: 'Logical-line classification uses a crude regex fallback (scc not installed). Exclude blank lines + single-line comments; does not catch block comments or docstrings. Approximate.',
|
: 'Logical-line classification uses a crude regex fallback (scc not installed). Exclude blank lines + single-line comments; does not catch block comments or docstrings. Approximate.',
|
||||||
'This script analyzes a single repo at a time. Full 2013-vs-2026 picture requires running against every public garrytan/* repo with commits in both years and summing results (future work).',
|
'This script analyzes a single repo at a time. Full 2013-vs-2026 picture requires running against every public repo with commits in both years and summing results (future work).',
|
||||||
'Authorship attribution relies on commit email matching. Historical aliases are listed in GARRY_EMAILS at the top of this script.',
|
'Authorship attribution relies on commit email matching. Supply historical aliases via --email flags or GSTACK_AUTHOR_EMAILS.',
|
||||||
],
|
],
|
||||||
version: 1,
|
version: 1,
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user