diff --git a/AGENTS.md b/AGENTS.md
index 4db9488..1533637 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -38,12 +38,14 @@ See [docs/dev-environment.md](docs/dev-environment.md) for restart modes, ports,
 - **Harberger staking** supplies the sentiment oracle that drives Optimizer parameters, which in turn tune liquidity placement and supply expansion.
 
 ## Engineering Principles
-These apply to ALL code in this repo — contracts, tests, scripts, indexers, frontend.
+These apply to infrastructure (Docker, scripts, startup/teardown) and test/scenario execution — NOT to frontend polling of HTTP APIs where caching is the correct solution.
 
 1. **Never use fixed delays or `waitForTimeout`** — react to actual events instead. Use `eth_subscribe` (WebSocket) for on-chain push notifications, `eth_newFilter` + `eth_getFilterChanges` for on-chain polling, DOM mutation observers or Playwright's `waitForSelector`/`waitForURL` for UI changes, callback patterns for async flows. Even if event-driven code takes more effort, it is always the right answer.
 2. **Never use hardcoded expectations** — dynamic systems change. React to actual state, not assumed state. Don't assert a specific block number, token amount, or address unless it's a protocol constant.
 3. **Event subscription > polling with timeout > fixed delay** — prefer true push subscriptions (`eth_subscribe`, WebSocket, observers). When push is unavailable (e.g. HTTP-only RPC), polling with a timeout and clear error is acceptable. A fixed `sleep`/`wait`/`waitForTimeout` is never acceptable. Existing violations should be replaced when touched.
 
+**Note:** Frontend components polling HTTP APIs (e.g. LiveStats polling Ponder GraphQL) are fine — the scalability solution there is caching at the proxy layer, not subscriptions.
+
 ## Before Opening a PR
 1. `forge build && forge test` in `onchain/` — contracts must compile and pass.
 2. Run `npm run test:e2e` from repo root if you touched frontend or services.
diff --git a/scripts/harb-evaluator/helpers/report.ts b/scripts/harb-evaluator/helpers/report.ts
new file mode 100644
index 0000000..529203c
--- /dev/null
+++ b/scripts/harb-evaluator/helpers/report.ts
@@ -0,0 +1,177 @@
+/**
+ * Holdout scenario reporting helper.
+ *
+ * Each scenario calls `recordMetric()` during execution, then `writeReport()`
+ * at the end. On pass, the report is informational. On fail, it captures
+ * exactly what went wrong with full numbers for post-mortem analysis.
+ *
+ * Reports are written to `test-results/holdout-reports/` as JSON + markdown.
+ */
+import { writeFileSync, mkdirSync, readFileSync, existsSync } from 'fs';
+import { join, dirname } from 'path';
+
+export interface ScenarioMetric {
+  label: string;
+  value: string | number | bigint;
+  unit?: string;
+}
+
+export interface ScenarioReport {
+  scenario: string;
+  domain: string;
+  timestamp: string;
+  durationMs: number;
+  passed: boolean;
+  /** The invariant being tested (one sentence) */
+  invariant: string;
+  /** Metrics collected during the run */
+  metrics: ScenarioMetric[];
+  /** If failed: what went wrong */
+  failureReason?: string;
+  /** Possible causes from the scenario spec's "Why this might fail" */
+  possibleCauses?: string[];
+  /** Screenshot paths captured during the run */
+  screenshots?: string[];
+}
+
+const REPORT_DIR = 'test-results/holdout-reports';
+
+/**
+ * Create a report builder for a scenario. Call methods to add metrics,
+ * then call `.write()` to persist.
+ */
+export function createReportBuilder(scenario: string, domain: string, invariant: string) {
+  const startTime = Date.now();
+  const metrics: ScenarioMetric[] = [];
+  const screenshots: string[] = [];
+  let possibleCauses: string[] = [];
+
+  return {
+    /** Record a metric during test execution */
+    metric(label: string, value: string | number | bigint, unit?: string) {
+      const displayValue = typeof value === 'bigint' ? value.toString() : value;
+      metrics.push({ label, value: displayValue, unit });
+      console.log(`[METRIC] ${label}: ${displayValue}${unit ? ' ' + unit : ''}`);
+    },
+
+    /** Add a screenshot path */
+    screenshot(path: string) {
+      screenshots.push(path);
+    },
+
+    /** Set possible failure causes (from scenario .md) */
+    setCauses(causes: string[]) {
+      possibleCauses = causes;
+    },
+
+    /** Write the report (call in finally block) */
+    write(passed: boolean, failureReason?: string): ScenarioReport {
+      const report: ScenarioReport = {
+        scenario,
+        domain,
+        timestamp: new Date().toISOString(),
+        durationMs: Date.now() - startTime,
+        passed,
+        invariant,
+        metrics: metrics.map(m => ({
+          ...m,
+          value: typeof m.value === 'bigint' ? m.value.toString() : m.value,
+        })),
+        failureReason,
+        possibleCauses: passed ? undefined : possibleCauses,
+        screenshots,
+      };
+
+      mkdirSync(REPORT_DIR, { recursive: true });
+
+      // JSON report
+      const safeName = scenario.replace(/\//g, '-');
+      const jsonPath = join(REPORT_DIR, `${safeName}.json`);
+      writeFileSync(jsonPath, JSON.stringify(report, null, 2));
+
+      // Markdown report (human-readable)
+      const mdPath = join(REPORT_DIR, `${safeName}.md`);
+      writeFileSync(mdPath, formatMarkdown(report));
+
+      // Append to aggregate results
+      appendToAggregate(report);
+
+      console.log(`[REPORT] Written to ${jsonPath}`);
+      return report;
+    },
+  };
+}
+
+function formatMarkdown(r: ScenarioReport): string {
+  const status = r.passed ? '✅ PASSED' : '❌ FAILED';
+  const lines = [
+    `# ${r.scenario} — ${status}`,
+    '',
+    `**Domain:** ${r.domain}`,
+    `**Invariant:** ${r.invariant}`,
+    `**Duration:** ${(r.durationMs / 1000).toFixed(1)}s`,
+    `**Timestamp:** ${r.timestamp}`,
+    '',
+    '## Metrics',
+    '',
+    '| Metric | Value | Unit |',
+    '|--------|-------|------|',
+  ];
+
+  for (const m of r.metrics) {
+    const val = typeof m.value === 'bigint' ? m.value.toString() : String(m.value);
+    lines.push(`| ${m.label} | ${val} | ${m.unit ?? ''} |`);
+  }
+
+  if (!r.passed) {
+    lines.push('', '## Failure', '', `**Reason:** ${r.failureReason ?? 'Unknown'}`);
+    if (r.possibleCauses?.length) {
+      lines.push('', '### Possible Causes (from scenario spec)', '');
+      for (const c of r.possibleCauses) {
+        lines.push(`- ${c}`);
+      }
+    }
+  }
+
+  if (r.screenshots?.length) {
+    lines.push('', '## Screenshots', '');
+    for (const s of r.screenshots) {
+      lines.push(`- ${s}`);
+    }
+  }
+
+  return lines.join('\n') + '\n';
+}
+
+function appendToAggregate(r: ScenarioReport): void {
+  const aggPath = join(REPORT_DIR, 'RESULTS.md');
+  const status = r.passed ? '✅' : '❌';
+  const duration = (r.durationMs / 1000).toFixed(1);
+
+  // Find key metrics to show in summary
+  const profitMetric = r.metrics.find(m =>
+    m.label.toLowerCase().includes('profit') ||
+    m.label.toLowerCase().includes('loss') ||
+    m.label.toLowerCase().includes('net'),
+  );
+  const summary = profitMetric
+    ? ` — ${profitMetric.label}: ${profitMetric.value}${profitMetric.unit ? ' ' + profitMetric.unit : ''}`
+    : '';
+
+  const line = `| ${status} | ${r.scenario} | ${duration}s | ${r.invariant}${summary} |`;
+
+  if (!existsSync(aggPath)) {
+    writeFileSync(aggPath, [
+      `# Holdout Scenario Results — ${new Date().toISOString().slice(0, 10)}`,
+      '',
+      '| Status | Scenario | Time | Summary |',
+      '|--------|----------|------|---------|',
+      line,
+      '',
+    ].join('\n'));
+  } else {
+    const content = readFileSync(aggPath, 'utf-8');
+    // Insert before the trailing newline
+    writeFileSync(aggPath, content.trimEnd() + '\n' + line + '\n');
+  }
+}