diff --git a/AGENTS.md b/AGENTS.md index 4db9488..1533637 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -38,12 +38,14 @@ See [docs/dev-environment.md](docs/dev-environment.md) for restart modes, ports, - **Harberger staking** supplies the sentiment oracle that drives Optimizer parameters, which in turn tune liquidity placement and supply expansion. ## Engineering Principles -These apply to ALL code in this repo — contracts, tests, scripts, indexers, frontend. +These apply to infrastructure (Docker, scripts, startup/teardown) and test/scenario execution — NOT to frontend polling of HTTP APIs where caching is the correct solution. 1. **Never use fixed delays or `waitForTimeout`** — react to actual events instead. Use `eth_subscribe` (WebSocket) for on-chain push notifications, `eth_newFilter` + `eth_getFilterChanges` for on-chain polling, DOM mutation observers or Playwright's `waitForSelector`/`waitForURL` for UI changes, callback patterns for async flows. Even if event-driven code takes more effort, it is always the right answer. 2. **Never use hardcoded expectations** — dynamic systems change. React to actual state, not assumed state. Don't assert a specific block number, token amount, or address unless it's a protocol constant. 3. **Event subscription > polling with timeout > fixed delay** — prefer true push subscriptions (`eth_subscribe`, WebSocket, observers). When push is unavailable (e.g. HTTP-only RPC), polling with a timeout and clear error is acceptable. A fixed `sleep`/`wait`/`waitForTimeout` is never acceptable. Existing violations should be replaced when touched. +**Note:** Frontend components polling HTTP APIs (e.g. LiveStats polling Ponder GraphQL) are fine — the scalability solution there is caching at the proxy layer, not subscriptions. + ## Before Opening a PR 1. `forge build && forge test` in `onchain/` — contracts must compile and pass. 2. Run `npm run test:e2e` from repo root if you touched frontend or services. diff --git a/scripts/harb-evaluator/helpers/report.ts b/scripts/harb-evaluator/helpers/report.ts new file mode 100644 index 0000000..529203c --- /dev/null +++ b/scripts/harb-evaluator/helpers/report.ts @@ -0,0 +1,177 @@ +/** + * Holdout scenario reporting helper. + * + * Each scenario calls `recordMetric()` during execution, then `writeReport()` + * at the end. On pass, the report is informational. On fail, it captures + * exactly what went wrong with full numbers for post-mortem analysis. + * + * Reports are written to `test-results/holdout-reports/` as JSON + markdown. + */ +import { writeFileSync, mkdirSync, readFileSync, existsSync } from 'fs'; +import { join, dirname } from 'path'; + +export interface ScenarioMetric { + label: string; + value: string | number | bigint; + unit?: string; +} + +export interface ScenarioReport { + scenario: string; + domain: string; + timestamp: string; + durationMs: number; + passed: boolean; + /** The invariant being tested (one sentence) */ + invariant: string; + /** Metrics collected during the run */ + metrics: ScenarioMetric[]; + /** If failed: what went wrong */ + failureReason?: string; + /** Possible causes from the scenario spec's "Why this might fail" */ + possibleCauses?: string[]; + /** Screenshot paths captured during the run */ + screenshots?: string[]; +} + +const REPORT_DIR = 'test-results/holdout-reports'; + +/** + * Create a report builder for a scenario. Call methods to add metrics, + * then call `.write()` to persist. + */ +export function createReportBuilder(scenario: string, domain: string, invariant: string) { + const startTime = Date.now(); + const metrics: ScenarioMetric[] = []; + const screenshots: string[] = []; + let possibleCauses: string[] = []; + + return { + /** Record a metric during test execution */ + metric(label: string, value: string | number | bigint, unit?: string) { + const displayValue = typeof value === 'bigint' ? value.toString() : value; + metrics.push({ label, value: displayValue, unit }); + console.log(`[METRIC] ${label}: ${displayValue}${unit ? ' ' + unit : ''}`); + }, + + /** Add a screenshot path */ + screenshot(path: string) { + screenshots.push(path); + }, + + /** Set possible failure causes (from scenario .md) */ + setCauses(causes: string[]) { + possibleCauses = causes; + }, + + /** Write the report (call in finally block) */ + write(passed: boolean, failureReason?: string): ScenarioReport { + const report: ScenarioReport = { + scenario, + domain, + timestamp: new Date().toISOString(), + durationMs: Date.now() - startTime, + passed, + invariant, + metrics: metrics.map(m => ({ + ...m, + value: typeof m.value === 'bigint' ? m.value.toString() : m.value, + })), + failureReason, + possibleCauses: passed ? undefined : possibleCauses, + screenshots, + }; + + mkdirSync(REPORT_DIR, { recursive: true }); + + // JSON report + const safeName = scenario.replace(/\//g, '-'); + const jsonPath = join(REPORT_DIR, `${safeName}.json`); + writeFileSync(jsonPath, JSON.stringify(report, null, 2)); + + // Markdown report (human-readable) + const mdPath = join(REPORT_DIR, `${safeName}.md`); + writeFileSync(mdPath, formatMarkdown(report)); + + // Append to aggregate results + appendToAggregate(report); + + console.log(`[REPORT] Written to ${jsonPath}`); + return report; + }, + }; +} + +function formatMarkdown(r: ScenarioReport): string { + const status = r.passed ? '✅ PASSED' : '❌ FAILED'; + const lines = [ + `# ${r.scenario} — ${status}`, + '', + `**Domain:** ${r.domain}`, + `**Invariant:** ${r.invariant}`, + `**Duration:** ${(r.durationMs / 1000).toFixed(1)}s`, + `**Timestamp:** ${r.timestamp}`, + '', + '## Metrics', + '', + '| Metric | Value | Unit |', + '|--------|-------|------|', + ]; + + for (const m of r.metrics) { + const val = typeof m.value === 'bigint' ? m.value.toString() : String(m.value); + lines.push(`| ${m.label} | ${val} | ${m.unit ?? ''} |`); + } + + if (!r.passed) { + lines.push('', '## Failure', '', `**Reason:** ${r.failureReason ?? 'Unknown'}`); + if (r.possibleCauses?.length) { + lines.push('', '### Possible Causes (from scenario spec)', ''); + for (const c of r.possibleCauses) { + lines.push(`- ${c}`); + } + } + } + + if (r.screenshots?.length) { + lines.push('', '## Screenshots', ''); + for (const s of r.screenshots) { + lines.push(`- ${s}`); + } + } + + return lines.join('\n') + '\n'; +} + +function appendToAggregate(r: ScenarioReport): void { + const aggPath = join(REPORT_DIR, 'RESULTS.md'); + const status = r.passed ? '✅' : '❌'; + const duration = (r.durationMs / 1000).toFixed(1); + + // Find key metrics to show in summary + const profitMetric = r.metrics.find(m => + m.label.toLowerCase().includes('profit') || + m.label.toLowerCase().includes('loss') || + m.label.toLowerCase().includes('net'), + ); + const summary = profitMetric + ? ` — ${profitMetric.label}: ${profitMetric.value}${profitMetric.unit ? ' ' + profitMetric.unit : ''}` + : ''; + + const line = `| ${status} | ${r.scenario} | ${duration}s | ${r.invariant}${summary} |`; + + if (!existsSync(aggPath)) { + writeFileSync(aggPath, [ + `# Holdout Scenario Results — ${new Date().toISOString().slice(0, 10)}`, + '', + '| Status | Scenario | Time | Summary |', + '|--------|----------|------|---------|', + line, + '', + ].join('\n')); + } else { + const content = readFileSync(aggPath, 'utf-8'); + // Insert before the trailing newline + writeFileSync(aggPath, content.trimEnd() + '\n' + line + '\n'); + } +}