/** * Holdout scenario reporting helper. * * Each scenario calls `recordMetric()` during execution, then `writeReport()` * at the end. On pass, the report is informational. On fail, it captures * exactly what went wrong with full numbers for post-mortem analysis. * * Reports are written to `test-results/holdout-reports/` as JSON + markdown. */ import { writeFileSync, mkdirSync, readFileSync, existsSync } from 'fs'; import { join, dirname } from 'path'; export interface ScenarioMetric { label: string; value: string | number | bigint; unit?: string; } export interface ScenarioReport { scenario: string; domain: string; timestamp: string; durationMs: number; passed: boolean; /** The invariant being tested (one sentence) */ invariant: string; /** Metrics collected during the run */ metrics: ScenarioMetric[]; /** If failed: what went wrong */ failureReason?: string; /** Possible causes from the scenario spec's "Why this might fail" */ possibleCauses?: string[]; /** Screenshot paths captured during the run */ screenshots?: string[]; } const REPORT_DIR = 'test-results/holdout-reports'; /** * Create a report builder for a scenario. Call methods to add metrics, * then call `.write()` to persist. */ export function createReportBuilder(scenario: string, domain: string, invariant: string) { const startTime = Date.now(); const metrics: ScenarioMetric[] = []; const screenshots: string[] = []; let possibleCauses: string[] = []; return { /** Record a metric during test execution */ metric(label: string, value: string | number | bigint, unit?: string) { const displayValue = typeof value === 'bigint' ? value.toString() : value; metrics.push({ label, value: displayValue, unit }); console.log(`[METRIC] ${label}: ${displayValue}${unit ? ' ' + unit : ''}`); }, /** Add a screenshot path */ screenshot(path: string) { screenshots.push(path); }, /** Set possible failure causes (from scenario .md) */ setCauses(causes: string[]) { possibleCauses = causes; }, /** Write the report (call in finally block) */ write(passed: boolean, failureReason?: string): ScenarioReport { const report: ScenarioReport = { scenario, domain, timestamp: new Date().toISOString(), durationMs: Date.now() - startTime, passed, invariant, metrics: metrics.map(m => ({ ...m, value: typeof m.value === 'bigint' ? m.value.toString() : m.value, })), failureReason, possibleCauses: passed ? undefined : possibleCauses, screenshots, }; mkdirSync(REPORT_DIR, { recursive: true }); // JSON report const safeName = scenario.replace(/\//g, '-'); const jsonPath = join(REPORT_DIR, `${safeName}.json`); writeFileSync(jsonPath, JSON.stringify(report, null, 2)); // Markdown report (human-readable) const mdPath = join(REPORT_DIR, `${safeName}.md`); writeFileSync(mdPath, formatMarkdown(report)); // Append to aggregate results appendToAggregate(report); console.log(`[REPORT] Written to ${jsonPath}`); return report; }, }; } function formatMarkdown(r: ScenarioReport): string { const status = r.passed ? '✅ PASSED' : '❌ FAILED'; const lines = [ `# ${r.scenario} — ${status}`, '', `**Domain:** ${r.domain}`, `**Invariant:** ${r.invariant}`, `**Duration:** ${(r.durationMs / 1000).toFixed(1)}s`, `**Timestamp:** ${r.timestamp}`, '', '## Metrics', '', '| Metric | Value | Unit |', '|--------|-------|------|', ]; for (const m of r.metrics) { const val = typeof m.value === 'bigint' ? m.value.toString() : String(m.value); lines.push(`| ${m.label} | ${val} | ${m.unit ?? ''} |`); } if (!r.passed) { lines.push('', '## Failure', '', `**Reason:** ${r.failureReason ?? 'Unknown'}`); if (r.possibleCauses?.length) { lines.push('', '### Possible Causes (from scenario spec)', ''); for (const c of r.possibleCauses) { lines.push(`- ${c}`); } } } if (r.screenshots?.length) { lines.push('', '## Screenshots', ''); for (const s of r.screenshots) { lines.push(`- ${s}`); } } return lines.join('\n') + '\n'; } function appendToAggregate(r: ScenarioReport): void { const aggPath = join(REPORT_DIR, 'RESULTS.md'); const status = r.passed ? '✅' : '❌'; const duration = (r.durationMs / 1000).toFixed(1); // Find key metrics to show in summary const profitMetric = r.metrics.find(m => m.label.toLowerCase().includes('profit') || m.label.toLowerCase().includes('loss') || m.label.toLowerCase().includes('net'), ); const summary = profitMetric ? ` — ${profitMetric.label}: ${profitMetric.value}${profitMetric.unit ? ' ' + profitMetric.unit : ''}` : ''; const line = `| ${status} | ${r.scenario} | ${duration}s | ${r.invariant}${summary} |`; if (!existsSync(aggPath)) { writeFileSync(aggPath, [ `# Holdout Scenario Results — ${new Date().toISOString().slice(0, 10)}`, '', '| Status | Scenario | Time | Summary |', '|--------|----------|------|---------|', line, '', ].join('\n')); } else { const content = readFileSync(aggPath, 'utf-8'); // Insert before the trailing newline writeFileSync(aggPath, content.trimEnd() + '\n' + line + '\n'); } }