178 lines
5.4 KiB
TypeScript
178 lines
5.4 KiB
TypeScript
|
|
/**
|
||
|
|
* Holdout scenario reporting helper.
|
||
|
|
*
|
||
|
|
* Each scenario calls `recordMetric()` during execution, then `writeReport()`
|
||
|
|
* at the end. On pass, the report is informational. On fail, it captures
|
||
|
|
* exactly what went wrong with full numbers for post-mortem analysis.
|
||
|
|
*
|
||
|
|
* Reports are written to `test-results/holdout-reports/` as JSON + markdown.
|
||
|
|
*/
|
||
|
|
import { writeFileSync, mkdirSync, readFileSync, existsSync } from 'fs';
|
||
|
|
import { join, dirname } from 'path';
|
||
|
|
|
||
|
|
export interface ScenarioMetric {
|
||
|
|
label: string;
|
||
|
|
value: string | number | bigint;
|
||
|
|
unit?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ScenarioReport {
|
||
|
|
scenario: string;
|
||
|
|
domain: string;
|
||
|
|
timestamp: string;
|
||
|
|
durationMs: number;
|
||
|
|
passed: boolean;
|
||
|
|
/** The invariant being tested (one sentence) */
|
||
|
|
invariant: string;
|
||
|
|
/** Metrics collected during the run */
|
||
|
|
metrics: ScenarioMetric[];
|
||
|
|
/** If failed: what went wrong */
|
||
|
|
failureReason?: string;
|
||
|
|
/** Possible causes from the scenario spec's "Why this might fail" */
|
||
|
|
possibleCauses?: string[];
|
||
|
|
/** Screenshot paths captured during the run */
|
||
|
|
screenshots?: string[];
|
||
|
|
}
|
||
|
|
|
||
|
|
const REPORT_DIR = 'test-results/holdout-reports';
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Create a report builder for a scenario. Call methods to add metrics,
|
||
|
|
* then call `.write()` to persist.
|
||
|
|
*/
|
||
|
|
export function createReportBuilder(scenario: string, domain: string, invariant: string) {
|
||
|
|
const startTime = Date.now();
|
||
|
|
const metrics: ScenarioMetric[] = [];
|
||
|
|
const screenshots: string[] = [];
|
||
|
|
let possibleCauses: string[] = [];
|
||
|
|
|
||
|
|
return {
|
||
|
|
/** Record a metric during test execution */
|
||
|
|
metric(label: string, value: string | number | bigint, unit?: string) {
|
||
|
|
const displayValue = typeof value === 'bigint' ? value.toString() : value;
|
||
|
|
metrics.push({ label, value: displayValue, unit });
|
||
|
|
console.log(`[METRIC] ${label}: ${displayValue}${unit ? ' ' + unit : ''}`);
|
||
|
|
},
|
||
|
|
|
||
|
|
/** Add a screenshot path */
|
||
|
|
screenshot(path: string) {
|
||
|
|
screenshots.push(path);
|
||
|
|
},
|
||
|
|
|
||
|
|
/** Set possible failure causes (from scenario .md) */
|
||
|
|
setCauses(causes: string[]) {
|
||
|
|
possibleCauses = causes;
|
||
|
|
},
|
||
|
|
|
||
|
|
/** Write the report (call in finally block) */
|
||
|
|
write(passed: boolean, failureReason?: string): ScenarioReport {
|
||
|
|
const report: ScenarioReport = {
|
||
|
|
scenario,
|
||
|
|
domain,
|
||
|
|
timestamp: new Date().toISOString(),
|
||
|
|
durationMs: Date.now() - startTime,
|
||
|
|
passed,
|
||
|
|
invariant,
|
||
|
|
metrics: metrics.map(m => ({
|
||
|
|
...m,
|
||
|
|
value: typeof m.value === 'bigint' ? m.value.toString() : m.value,
|
||
|
|
})),
|
||
|
|
failureReason,
|
||
|
|
possibleCauses: passed ? undefined : possibleCauses,
|
||
|
|
screenshots,
|
||
|
|
};
|
||
|
|
|
||
|
|
mkdirSync(REPORT_DIR, { recursive: true });
|
||
|
|
|
||
|
|
// JSON report
|
||
|
|
const safeName = scenario.replace(/\//g, '-');
|
||
|
|
const jsonPath = join(REPORT_DIR, `${safeName}.json`);
|
||
|
|
writeFileSync(jsonPath, JSON.stringify(report, null, 2));
|
||
|
|
|
||
|
|
// Markdown report (human-readable)
|
||
|
|
const mdPath = join(REPORT_DIR, `${safeName}.md`);
|
||
|
|
writeFileSync(mdPath, formatMarkdown(report));
|
||
|
|
|
||
|
|
// Append to aggregate results
|
||
|
|
appendToAggregate(report);
|
||
|
|
|
||
|
|
console.log(`[REPORT] Written to ${jsonPath}`);
|
||
|
|
return report;
|
||
|
|
},
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
function formatMarkdown(r: ScenarioReport): string {
|
||
|
|
const status = r.passed ? '✅ PASSED' : '❌ FAILED';
|
||
|
|
const lines = [
|
||
|
|
`# ${r.scenario} — ${status}`,
|
||
|
|
'',
|
||
|
|
`**Domain:** ${r.domain}`,
|
||
|
|
`**Invariant:** ${r.invariant}`,
|
||
|
|
`**Duration:** ${(r.durationMs / 1000).toFixed(1)}s`,
|
||
|
|
`**Timestamp:** ${r.timestamp}`,
|
||
|
|
'',
|
||
|
|
'## Metrics',
|
||
|
|
'',
|
||
|
|
'| Metric | Value | Unit |',
|
||
|
|
'|--------|-------|------|',
|
||
|
|
];
|
||
|
|
|
||
|
|
for (const m of r.metrics) {
|
||
|
|
const val = typeof m.value === 'bigint' ? m.value.toString() : String(m.value);
|
||
|
|
lines.push(`| ${m.label} | ${val} | ${m.unit ?? ''} |`);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!r.passed) {
|
||
|
|
lines.push('', '## Failure', '', `**Reason:** ${r.failureReason ?? 'Unknown'}`);
|
||
|
|
if (r.possibleCauses?.length) {
|
||
|
|
lines.push('', '### Possible Causes (from scenario spec)', '');
|
||
|
|
for (const c of r.possibleCauses) {
|
||
|
|
lines.push(`- ${c}`);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (r.screenshots?.length) {
|
||
|
|
lines.push('', '## Screenshots', '');
|
||
|
|
for (const s of r.screenshots) {
|
||
|
|
lines.push(`- ${s}`);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return lines.join('\n') + '\n';
|
||
|
|
}
|
||
|
|
|
||
|
|
function appendToAggregate(r: ScenarioReport): void {
|
||
|
|
const aggPath = join(REPORT_DIR, 'RESULTS.md');
|
||
|
|
const status = r.passed ? '✅' : '❌';
|
||
|
|
const duration = (r.durationMs / 1000).toFixed(1);
|
||
|
|
|
||
|
|
// Find key metrics to show in summary
|
||
|
|
const profitMetric = r.metrics.find(m =>
|
||
|
|
m.label.toLowerCase().includes('profit') ||
|
||
|
|
m.label.toLowerCase().includes('loss') ||
|
||
|
|
m.label.toLowerCase().includes('net'),
|
||
|
|
);
|
||
|
|
const summary = profitMetric
|
||
|
|
? ` — ${profitMetric.label}: ${profitMetric.value}${profitMetric.unit ? ' ' + profitMetric.unit : ''}`
|
||
|
|
: '';
|
||
|
|
|
||
|
|
const line = `| ${status} | ${r.scenario} | ${duration}s | ${r.invariant}${summary} |`;
|
||
|
|
|
||
|
|
if (!existsSync(aggPath)) {
|
||
|
|
writeFileSync(aggPath, [
|
||
|
|
`# Holdout Scenario Results — ${new Date().toISOString().slice(0, 10)}`,
|
||
|
|
'',
|
||
|
|
'| Status | Scenario | Time | Summary |',
|
||
|
|
'|--------|----------|------|---------|',
|
||
|
|
line,
|
||
|
|
'',
|
||
|
|
].join('\n'));
|
||
|
|
} else {
|
||
|
|
const content = readFileSync(aggPath, 'utf-8');
|
||
|
|
// Insert before the trailing newline
|
||
|
|
writeFileSync(aggPath, content.trimEnd() + '\n' + line + '\n');
|
||
|
|
}
|
||
|
|
}
|