feat: QA restructure, browser ref staleness, eval efficiency metrics (v0.4.0) (#83)

* feat: browser ref staleness detection via async count() validation

resolveRef() now checks element count to detect stale refs after page
mutations (e.g. SPA navigation). RefEntry stores role+name metadata
for better diagnostics. 3 new snapshot tests for staleness detection.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* feat: qa-only skill, qa fix loop, plan-to-QA artifact flow

Add /qa-only (report-only, Edit tool blocked), restructure /qa with
find-fix-verify cycle, add {{QA_METHODOLOGY}} DRY placeholder for
shared methodology. /plan-eng-review now writes test-plan artifacts
to ~/.gstack/projects/<slug>/ for QA consumption.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* feat: eval efficiency metrics — turns, duration, commentary across all surfaces

Add generateCommentary() for natural-language delta interpretation,
per-test turns/duration in comparison and summary output, judgePassed
unit tests, 3 new E2E tests (qa-only, qa fix loop, plan artifact).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* chore: bump version and changelog (v0.4.0)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* docs: update ARCHITECTURE, BROWSER, CONTRIBUTING, README for v0.4.0

- ARCHITECTURE: add ref staleness detection section, update RefEntry type
- BROWSER: add ref staleness paragraph to snapshot system docs
- CONTRIBUTING: update eval tool descriptions with commentary feature
- README: fix missing qa-only in project-local uninstall command

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* docs: add user-facing benefit descriptions to v0.4.0 changelog

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Garry Tan
2026-03-15 23:55:39 -05:00
committed by GitHub
parent bb46ca6b21
commit f3ee0ee28a
30 changed files with 2317 additions and 354 deletions

View File

@@ -18,6 +18,12 @@
import { chromium, type Browser, type BrowserContext, type Page, type Locator } from 'playwright';
import { addConsoleEntry, addNetworkEntry, addDialogEntry, networkBuffer, type DialogEntry } from './buffers';
export interface RefEntry {
locator: Locator;
role: string;
name: string;
}
export class BrowserManager {
private browser: Browser | null = null;
private context: BrowserContext | null = null;
@@ -31,7 +37,7 @@ export class BrowserManager {
public serverPort: number = 0;
// ─── Ref Map (snapshot → @e1, @e2, @c1, @c2, ...) ────────
private refMap: Map<string, Locator> = new Map();
private refMap: Map<string, RefEntry> = new Map();
// ─── Snapshot Diffing ─────────────────────────────────────
// NOT cleared on navigation — it's a text baseline for diffing
@@ -169,7 +175,7 @@ export class BrowserManager {
}
// ─── Ref Map ──────────────────────────────────────────────
setRefMap(refs: Map<string, Locator>) {
setRefMap(refs: Map<string, RefEntry>) {
this.refMap = refs;
}
@@ -181,16 +187,23 @@ export class BrowserManager {
* Resolve a selector that may be a @ref (e.g., "@e3", "@c1") or a CSS selector.
* Returns { locator } for refs or { selector } for CSS selectors.
*/
resolveRef(selector: string): { locator: Locator } | { selector: string } {
async resolveRef(selector: string): Promise<{ locator: Locator } | { selector: string }> {
if (selector.startsWith('@e') || selector.startsWith('@c')) {
const ref = selector.slice(1); // "e3" or "c1"
const locator = this.refMap.get(ref);
if (!locator) {
const entry = this.refMap.get(ref);
if (!entry) {
throw new Error(
`Ref ${selector} not found. Page may have changed — run 'snapshot' to get fresh refs.`
`Ref ${selector} not found. Run 'snapshot' to get fresh refs.`
);
}
return { locator };
const count = await entry.locator.count();
if (count === 0) {
throw new Error(
`Ref ${selector} (${entry.role} "${entry.name}") is stale — element no longer exists. ` +
`Run 'snapshot' for fresh refs.`
);
}
return { locator: entry.locator };
}
return { selector };
}

View File

@@ -150,7 +150,7 @@ export async function handleMetaCommand(
}
if (targetSelector) {
const resolved = bm.resolveRef(targetSelector);
const resolved = await bm.resolveRef(targetSelector);
const locator = 'locator' in resolved ? resolved.locator : page.locator(resolved.selector);
await locator.screenshot({ path: outputPath, timeout: 5000 });
return `Screenshot saved (element): ${outputPath}`;

View File

@@ -61,7 +61,7 @@ export async function handleReadCommand(
case 'html': {
const selector = args[0];
if (selector) {
const resolved = bm.resolveRef(selector);
const resolved = await bm.resolveRef(selector);
if ('locator' in resolved) {
return await resolved.locator.innerHTML({ timeout: 5000 });
}
@@ -135,7 +135,7 @@ export async function handleReadCommand(
case 'css': {
const [selector, property] = args;
if (!selector || !property) throw new Error('Usage: browse css <selector> <property>');
const resolved = bm.resolveRef(selector);
const resolved = await bm.resolveRef(selector);
if ('locator' in resolved) {
const value = await resolved.locator.evaluate(
(el, prop) => getComputedStyle(el).getPropertyValue(prop),
@@ -157,7 +157,7 @@ export async function handleReadCommand(
case 'attrs': {
const selector = args[0];
if (!selector) throw new Error('Usage: browse attrs <selector>');
const resolved = bm.resolveRef(selector);
const resolved = await bm.resolveRef(selector);
if ('locator' in resolved) {
const attrs = await resolved.locator.evaluate((el) => {
const result: Record<string, string> = {};
@@ -221,7 +221,7 @@ export async function handleReadCommand(
const selector = args[1];
if (!property || !selector) throw new Error('Usage: browse is <property> <selector>\nProperties: visible, hidden, enabled, disabled, checked, editable, focused');
const resolved = bm.resolveRef(selector);
const resolved = await bm.resolveRef(selector);
let locator;
if ('locator' in resolved) {
locator = resolved.locator;

View File

@@ -18,7 +18,7 @@
*/
import type { Page, Locator } from 'playwright';
import type { BrowserManager } from './browser-manager';
import type { BrowserManager, RefEntry } from './browser-manager';
import * as Diff from 'diff';
// Roles considered "interactive" for the -i flag
@@ -154,7 +154,7 @@ export async function handleSnapshot(
// Parse the ariaSnapshot output
const lines = ariaText.split('\n');
const refMap = new Map<string, Locator>();
const refMap = new Map<string, RefEntry>();
const output: string[] = [];
let refCounter = 1;
@@ -218,7 +218,7 @@ export async function handleSnapshot(
locator = locator.nth(seenIndex);
}
refMap.set(ref, locator);
refMap.set(ref, { locator, role: node.role, name: node.name || '' });
// Format output line
let outputLine = `${indent}@${ref} [${node.role}]`;
@@ -287,7 +287,7 @@ export async function handleSnapshot(
for (const elem of cursorElements) {
const ref = `c${cRefCounter++}`;
const locator = page.locator(elem.selector);
refMap.set(ref, locator);
refMap.set(ref, { locator, role: 'cursor-interactive', name: elem.text });
output.push(`@${ref} [${elem.reason}] "${elem.text}"`);
}
}
@@ -318,9 +318,9 @@ export async function handleSnapshot(
try {
// Inject overlay divs at each ref's bounding box
const boxes: Array<{ ref: string; box: { x: number; y: number; width: number; height: number } }> = [];
for (const [ref, locator] of refMap) {
for (const [ref, entry] of refMap) {
try {
const box = await locator.boundingBox({ timeout: 1000 });
const box = await entry.locator.boundingBox({ timeout: 1000 });
if (box) {
boxes.push({ ref: `@${ref}`, box });
}

View File

@@ -44,7 +44,7 @@ export async function handleWriteCommand(
case 'click': {
const selector = args[0];
if (!selector) throw new Error('Usage: browse click <selector>');
const resolved = bm.resolveRef(selector);
const resolved = await bm.resolveRef(selector);
if ('locator' in resolved) {
await resolved.locator.click({ timeout: 5000 });
} else {
@@ -59,7 +59,7 @@ export async function handleWriteCommand(
const [selector, ...valueParts] = args;
const value = valueParts.join(' ');
if (!selector || !value) throw new Error('Usage: browse fill <selector> <value>');
const resolved = bm.resolveRef(selector);
const resolved = await bm.resolveRef(selector);
if ('locator' in resolved) {
await resolved.locator.fill(value, { timeout: 5000 });
} else {
@@ -72,7 +72,7 @@ export async function handleWriteCommand(
const [selector, ...valueParts] = args;
const value = valueParts.join(' ');
if (!selector || !value) throw new Error('Usage: browse select <selector> <value>');
const resolved = bm.resolveRef(selector);
const resolved = await bm.resolveRef(selector);
if ('locator' in resolved) {
await resolved.locator.selectOption(value, { timeout: 5000 });
} else {
@@ -84,7 +84,7 @@ export async function handleWriteCommand(
case 'hover': {
const selector = args[0];
if (!selector) throw new Error('Usage: browse hover <selector>');
const resolved = bm.resolveRef(selector);
const resolved = await bm.resolveRef(selector);
if ('locator' in resolved) {
await resolved.locator.hover({ timeout: 5000 });
} else {
@@ -110,7 +110,7 @@ export async function handleWriteCommand(
case 'scroll': {
const selector = args[0];
if (selector) {
const resolved = bm.resolveRef(selector);
const resolved = await bm.resolveRef(selector);
if ('locator' in resolved) {
await resolved.locator.scrollIntoViewIfNeeded({ timeout: 5000 });
} else {
@@ -139,7 +139,7 @@ export async function handleWriteCommand(
return 'DOM content loaded';
}
const timeout = args[1] ? parseInt(args[1], 10) : 15000;
const resolved = bm.resolveRef(selector);
const resolved = await bm.resolveRef(selector);
if ('locator' in resolved) {
await resolved.locator.waitFor({ state: 'visible', timeout });
} else {
@@ -204,7 +204,7 @@ export async function handleWriteCommand(
if (!fs.existsSync(fp)) throw new Error(`File not found: ${fp}`);
}
const resolved = bm.resolveRef(selector);
const resolved = await bm.resolveRef(selector);
if ('locator' in resolved) {
await resolved.locator.setInputFiles(filePaths);
} else {