From 2ddd8e9ed29d7c6ea1ed9126862137791be52cc3 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 1 Mar 2026 11:24:15 +0000 Subject: [PATCH] fix: Holdout evaluator: Playwright browser-based scenario scripts (#381) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace shell-script scenario runner with Playwright. The evaluator now runs `npx playwright test --config scripts/harb-evaluator/holdout.config.ts` after booting the stack, using the existing tests/setup/ wallet-provider and navigation infrastructure. Changes: - scripts/harb-evaluator/holdout.config.ts — new Playwright config pointing to scenarios/, headless chromium, 5-min timeout per test - scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts — Playwright spec that buys KRK through the LocalSwapWidget then sells it back via the injected wallet provider, asserting sovereign exit works - scripts/harb-evaluator/evaluate.sh — adds root npm install step (needed for npx playwright), exports STACK_* env aliases for getStackConfig(), replaces shell-script loop with a single playwright test invocation Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/evaluate.sh | 47 ++--- scripts/harb-evaluator/holdout.config.ts | 43 ++++ .../sovereign-exit/always-leave.spec.ts | 192 ++++++++++++++++++ 3 files changed, 253 insertions(+), 29 deletions(-) create mode 100644 scripts/harb-evaluator/holdout.config.ts create mode 100644 scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts diff --git a/scripts/harb-evaluator/evaluate.sh b/scripts/harb-evaluator/evaluate.sh index 40e48f6..2e14b21 100755 --- a/scripts/harb-evaluator/evaluate.sh +++ b/scripts/harb-evaluator/evaluate.sh @@ -147,6 +147,11 @@ log "Building kraiken-lib..." (cd "$WORKTREE_DIR" && ./scripts/build-kraiken-lib.sh) \ || infra_error "kraiken-lib build failed" +# ── Install root npm dependencies (needed for npx playwright test) ───── +log "Installing root npm dependencies..." +(cd "$WORKTREE_DIR" && npm install --no-audit --no-fund --silent) \ + || infra_error "npm install failed" + # ── Boot the stack ───────────────────────────────────────────────────── cd "$WORKTREE_DIR" log "Starting containerised stack (project: $COMPOSE_PROJECT)..." @@ -249,7 +254,7 @@ if [[ "$ponder_ready" != "true" ]]; then infra_error "Ponder did not finish indexing within ${PONDER_READY_TIMEOUT}s" fi -# ── Export stack endpoints for scenario scripts ──────────────────────── +# ── Export stack endpoints ───────────────────────────────────────────── export EVAL_PR_NUMBER="$PR_NUMBER" export EVAL_BRANCH="$PR_BRANCH" export EVAL_WORKTREE="$WORKTREE_DIR" @@ -260,38 +265,22 @@ export EVAL_KRAIKEN="$KRAIKEN" export EVAL_STAKE="$STAKE" export EVAL_LIQUIDITY_MANAGER="$LIQUIDITY_MANAGER" +# Alias as STACK_* so getStackConfig() in tests/setup/stack.ts resolves correctly +export STACK_RPC_URL="$EVAL_RPC_URL" +export STACK_GRAPHQL_URL="$EVAL_GRAPHQL_URL" +export STACK_WEBAPP_URL="$EVAL_WEBAPP_URL" + log "Stack ready. Endpoints:" log " RPC: $EVAL_RPC_URL" log " GraphQL: $EVAL_GRAPHQL_URL" log " WebApp: $EVAL_WEBAPP_URL" -# ── Run scenario scripts ─────────────────────────────────────────────── -shopt -s nullglob -scenario_scripts=("$SCENARIOS_DIR"/*.sh) -shopt -u nullglob - -if [[ ${#scenario_scripts[@]} -eq 0 ]]; then - log "No scenario scripts found in $SCENARIOS_DIR" - log "Gate PASSED (no scenarios)" +# ── Run holdout Playwright scenarios ────────────────────────────────── +log "Running holdout scenarios via Playwright..." +cd "$WORKTREE_DIR" +if npx playwright test --config scripts/harb-evaluator/holdout.config.ts; then + log "Gate PASSED" exit 0 +else + gate_fail "One or more holdout scenarios failed" fi - -failures=0 -for scenario in "${scenario_scripts[@]}"; do - [[ -f "$scenario" ]] || continue - scenario_name="$(basename "$scenario")" - log "--- Running scenario: $scenario_name ---" - if bash "$scenario"; then - log " PASSED: $scenario_name" - else - log " FAILED: $scenario_name" - failures=$((failures + 1)) - fi -done - -if (( failures > 0 )); then - gate_fail "$failures of ${#scenario_scripts[@]} scenario(s) failed" -fi - -log "Gate PASSED (${#scenario_scripts[@]} scenario(s))" -exit 0 diff --git a/scripts/harb-evaluator/holdout.config.ts b/scripts/harb-evaluator/holdout.config.ts new file mode 100644 index 0000000..54dfb33 --- /dev/null +++ b/scripts/harb-evaluator/holdout.config.ts @@ -0,0 +1,43 @@ +import { defineConfig, devices } from '@playwright/test'; + +/** + * Playwright config for holdout scenarios. + * + * Holdout specs live under scripts/harb-evaluator/scenarios/ and reuse the + * existing tests/setup/ infrastructure (wallet-provider, stack, navigate). + * + * The evaluator boots the stack first, then runs: + * npx playwright test --config scripts/harb-evaluator/holdout.config.ts + * + * Required env vars (set by evaluate.sh): + * STACK_RPC_URL – Anvil JSON-RPC endpoint + * STACK_WEBAPP_URL – Vite dev server URL + * STACK_GRAPHQL_URL – Ponder GraphQL endpoint + */ +export default defineConfig({ + testDir: './scenarios', + fullyParallel: false, + forbidOnly: !!process.env.CI, + retries: 0, + workers: 1, + reporter: 'list', + timeout: 5 * 60 * 1000, // 5 min per test — scenarios involve on-chain txns + expect: { + timeout: 30_000, + }, + use: { + headless: true, + viewport: { width: 1280, height: 720 }, + screen: { width: 1280, height: 720 }, + actionTimeout: 60_000, + launchOptions: { + args: ['--disable-dev-shm-usage', '--no-sandbox'], + }, + }, + projects: [ + { + name: 'chromium', + use: { ...devices['Desktop Chrome'] }, + }, + ], +}); diff --git a/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts b/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts new file mode 100644 index 0000000..4eee972 --- /dev/null +++ b/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts @@ -0,0 +1,192 @@ +/** + * Holdout scenario: sovereign-exit / always-leave + * + * Verifies the core protocol invariant: a user can ALWAYS exit their position + * by buying KRK through the in-app swap widget and then selling it back. + * + * Reuses tests/setup/ infrastructure — no new wallet or navigation helpers. + * + * Account 5 from the Anvil test mnemonic is used so it never collides with + * the deploy scripts (which use accounts 0–1). + */ +import { expect, test } from '@playwright/test'; +import { Interface, Wallet } from 'ethers'; +import { createWalletContext } from '../../../../tests/setup/wallet-provider'; +import { getStackConfig } from '../../../../tests/setup/stack'; +import { navigateSPA } from '../../../../tests/setup/navigate'; + +// Anvil account 5 — never used by deploy or txnBot +const PK = '0x8b3a350cf5c34c9194ca85829a2df0ec3153be0318b5e2d3348e872092edffba'; +const ACCOUNT = new Wallet(PK); +const ACCOUNT_ADDRESS = ACCOUNT.address; + +// Infrastructure addresses that are stable across Anvil forks of Base Sepolia +const SWAP_ROUTER = '0x94cC0AaC535CCDB3C01d6787D6413C739ae12bc4'; +const WETH = '0x4200000000000000000000000000000000000006'; +const POOL_FEE = 10_000; // 1% tier used by KRAIKEN pool + +// ── RPC helpers (Node.js context) ────────────────────────────────────────── + +async function rpcCall(rpcUrl: string, method: string, params: unknown[]): Promise { + const resp = await fetch(rpcUrl, { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ jsonrpc: '2.0', id: Date.now(), method, params }), + }); + const payload = await resp.json(); + if (payload.error) throw new Error(`RPC ${method}: ${payload.error.message}`); + return payload.result; +} + +async function getKrkBalance(rpcUrl: string, krkAddress: string, account: string): Promise { + const selector = '0x70a08231'; // balanceOf(address) + const data = selector + account.slice(2).padStart(64, '0'); + const result = (await rpcCall(rpcUrl, 'eth_call', [{ to: krkAddress, data }, 'latest'])) as string; + return BigInt(result); +} + +// ── ABI helpers for the sell path ────────────────────────────────────────── + +const ERC20_ABI = ['function approve(address spender, uint256 amount) returns (bool)']; + +const ROUTER_ABI = [ + 'function exactInputSingle((address tokenIn, address tokenOut, uint24 fee, address recipient, uint256 amountIn, uint256 amountOutMinimum, uint160 sqrtPriceLimitX96) params) payable returns (uint256 amountOut)', +]; + +// ── Test ─────────────────────────────────────────────────────────────────── + +test('I can always leave', async ({ browser }) => { + const config = getStackConfig(); + const ctx = await createWalletContext(browser, { + privateKey: PK, + rpcUrl: config.rpcUrl, + }); + const page = await ctx.newPage(); + + page.on('console', msg => console.log(`[BROWSER] ${msg.type()}: ${msg.text()}`)); + page.on('pageerror', err => console.log(`[BROWSER ERROR] ${err.message}`)); + + try { + // ── 1. Load the web app ────────────────────────────────────────────── + console.log('[TEST] Loading web app...'); + await page.goto(`${config.webAppUrl}/app/`, { waitUntil: 'domcontentloaded' }); + await expect(page.locator('.navbar-title').first()).toBeVisible({ timeout: 30_000 }); + + // Force desktop-mode recalculation (wallet-provider sets screen.width = 1280) + await page.evaluate(() => window.dispatchEvent(new Event('resize'))); + await page.waitForTimeout(2_000); + + // ── 2. Connect wallet via the UI ──────────────────────────────────── + console.log('[TEST] Connecting wallet...'); + const connectButton = page.locator('.connect-button--disconnected').first(); + if (await connectButton.isVisible({ timeout: 5_000 })) { + await connectButton.click(); + await page.waitForTimeout(1_000); + const connector = page.locator('.connectors-element').first(); + if (await connector.isVisible({ timeout: 5_000 })) { + await connector.click(); + await page.waitForTimeout(2_000); + } + } + + // Confirm wallet address is displayed in the navbar (app shows first ~6 chars) + const addrPrefix = ACCOUNT_ADDRESS.slice(0, 8); // e.g. "0x996550" + await expect(page.getByText(new RegExp(addrPrefix, 'i')).first()).toBeVisible({ timeout: 15_000 }); + console.log('[TEST] Wallet connected'); + + // ── 3. Navigate to get-krk and buy KRK via the swap widget ────────── + console.log('[TEST] Navigating to get-krk...'); + await navigateSPA(page, '/app/get-krk'); + + // The LocalSwapWidget is rendered when VITE_ENABLE_LOCAL_SWAP=true + const swapInput = page.locator('#local-swap-amount'); + await expect(swapInput).toBeVisible({ timeout: 15_000 }); + console.log('[TEST] Swap widget visible'); + + const krkBefore = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS); + console.log(`[TEST] KRK balance before buy: ${krkBefore}`); + + await swapInput.fill('0.1'); + + const buyButton = page.locator('.local-swap-widget .swap-button'); + await expect(buyButton).toBeVisible(); + console.log('[TEST] Clicking Buy KRK...'); + await buyButton.click(); + + // Wait for the swap to complete (button cycles through "Submitting…" → "Buy KRK") + try { + await buyButton.filter({ hasText: /Submitting/i }).waitFor({ state: 'visible', timeout: 5_000 }); + console.log('[TEST] Swap in progress...'); + await buyButton.filter({ hasText: /Buy KRK/i }).waitFor({ state: 'visible', timeout: 60_000 }); + console.log('[TEST] Swap completed'); + } catch { + // Swap may have been instant on a fast Anvil node + console.log('[TEST] Swap completed (no intermediate state observed)'); + } + await page.waitForTimeout(2_000); + + // ── 4. Verify KRK was received ──────────────────────────────────── + const krkAfterBuy = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS); + console.log(`[TEST] KRK balance after buy: ${krkAfterBuy}`); + expect(krkAfterBuy).toBeGreaterThan(krkBefore); + console.log('[TEST] ✅ KRK received'); + + // ── 5. Sell all KRK back (sovereign exit) ─────────────────────────── + // Encode approve + exactInputSingle calldata in Node.js, then send via + // the injected window.ethereum wallet provider (tests/setup/wallet-provider). + console.log('[TEST] Encoding sell transactions...'); + const erc20Iface = new Interface(ERC20_ABI); + const routerIface = new Interface(ROUTER_ABI); + + const approveData = erc20Iface.encodeFunctionData('approve', [ + SWAP_ROUTER, + BigInt('0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff'), + ]); + + const swapData = routerIface.encodeFunctionData('exactInputSingle', [ + { + tokenIn: config.contracts.Kraiken, + tokenOut: WETH, + fee: POOL_FEE, + recipient: ACCOUNT_ADDRESS, + amountIn: krkAfterBuy, + amountOutMinimum: 0n, + sqrtPriceLimitX96: 0n, + }, + ]); + + // Step 5a: approve KRK to the Uniswap router + console.log('[TEST] Approving KRK to router...'); + await page.evaluate( + async ({ krkAddr, data, from }: { krkAddr: string; data: string; from: string }) => { + await (window.ethereum as any).request({ + method: 'eth_sendTransaction', + params: [{ from, to: krkAddr, data, gas: '0x30000' }], + }); + }, + { krkAddr: config.contracts.Kraiken, data: approveData, from: ACCOUNT_ADDRESS }, + ); + await page.waitForTimeout(2_000); + + // Step 5b: swap KRK → WETH + console.log('[TEST] Swapping KRK → WETH (exit)...'); + await page.evaluate( + async ({ routerAddr, data, from }: { routerAddr: string; data: string; from: string }) => { + await (window.ethereum as any).request({ + method: 'eth_sendTransaction', + params: [{ from, to: routerAddr, data, gas: '0x80000' }], + }); + }, + { routerAddr: SWAP_ROUTER, data: swapData, from: ACCOUNT_ADDRESS }, + ); + await page.waitForTimeout(2_000); + + // ── 6. Assert KRK was sold ──────────────────────────────────────── + const krkAfterSell = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS); + console.log(`[TEST] KRK balance after sell: ${krkAfterSell}`); + expect(krkAfterSell).toBeLessThan(krkAfterBuy); + console.log('[TEST] ✅ Sovereign exit confirmed: KRK sold back to WETH'); + } finally { + await ctx.close(); + } +});