From cce3c92120c6e67651348c02b1f71e7c583fd066 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 1 Mar 2026 12:04:35 +0000 Subject: [PATCH] fix: address review feedback on holdout evaluator (#381) - evaluate.sh: add --ignore-scripts to npm install (prevents husky from writing to permanent repo .git/hooks from the ephemeral worktree) - evaluate.sh: change --silent to --quiet (errors still printed on failure) - evaluate.sh: add `npx playwright install chromium` step so browser binaries are present even when the cached revision doesn't match ^1.55.1 - evaluate.sh: set CI=true inline on the playwright invocation so forbidOnly activates and accidental test.only() causes a gate failure - holdout.config.ts: document that CI=true is supplied by evaluate.sh - always-leave.spec.ts: add waitForReceipt() helper; replace fixed waitForTimeout(2000) after eth_sendTransaction with proper receipt polling so tx confirmation is not a timing assumption - always-leave.spec.ts: log the caught error in the button-cycling try/catch so contract reverts surface in the output - always-leave.spec.ts: add console.log when connect button or connector panel is not found to make silent-skip cases diagnosable Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/evaluate.sh | 17 +++++- scripts/harb-evaluator/holdout.config.ts | 3 ++ .../sovereign-exit/always-leave.spec.ts | 54 +++++++++++++------ 3 files changed, 55 insertions(+), 19 deletions(-) diff --git a/scripts/harb-evaluator/evaluate.sh b/scripts/harb-evaluator/evaluate.sh index 2e14b21..52509ff 100755 --- a/scripts/harb-evaluator/evaluate.sh +++ b/scripts/harb-evaluator/evaluate.sh @@ -148,10 +148,21 @@ log "Building kraiken-lib..." || infra_error "kraiken-lib build failed" # ── Install root npm dependencies (needed for npx playwright test) ───── +# --ignore-scripts: prevents husky from touching the permanent repo's .git/hooks +# from inside this ephemeral worktree. +# --quiet: suppresses normal npm output while still printing errors. log "Installing root npm dependencies..." -(cd "$WORKTREE_DIR" && npm install --no-audit --no-fund --silent) \ +(cd "$WORKTREE_DIR" && npm install --no-audit --no-fund --ignore-scripts --quiet) \ || infra_error "npm install failed" +# ── Install Playwright browser binaries ──────────────────────────────── +# Browser binaries are version-pinned per Playwright revision. If the +# revision resolved by ^1.55.1 is not already cached on this host, +# playwright test aborts immediately with a cryptic "Executable doesn't exist" error. +log "Installing Playwright browser binaries..." +(cd "$WORKTREE_DIR" && npx playwright install chromium) \ + || infra_error "playwright install chromium failed" + # ── Boot the stack ───────────────────────────────────────────────────── cd "$WORKTREE_DIR" log "Starting containerised stack (project: $COMPOSE_PROJECT)..." @@ -276,9 +287,11 @@ log " GraphQL: $EVAL_GRAPHQL_URL" log " WebApp: $EVAL_WEBAPP_URL" # ── Run holdout Playwright scenarios ────────────────────────────────── +# CI=true triggers forbidOnly in holdout.config.ts so accidental test.only() +# in any scenario file causes an immediate failure rather than a silent partial run. log "Running holdout scenarios via Playwright..." cd "$WORKTREE_DIR" -if npx playwright test --config scripts/harb-evaluator/holdout.config.ts; then +if CI=true npx playwright test --config scripts/harb-evaluator/holdout.config.ts; then log "Gate PASSED" exit 0 else diff --git a/scripts/harb-evaluator/holdout.config.ts b/scripts/harb-evaluator/holdout.config.ts index 54dfb33..e393c52 100644 --- a/scripts/harb-evaluator/holdout.config.ts +++ b/scripts/harb-evaluator/holdout.config.ts @@ -17,6 +17,9 @@ import { defineConfig, devices } from '@playwright/test'; export default defineConfig({ testDir: './scenarios', fullyParallel: false, + // evaluate.sh sets CI=true before invoking playwright, so forbidOnly is always + // active in the evaluator context. Accidental test.only() in any scenario file + // causes an immediate failure rather than a silent partial run. forbidOnly: !!process.env.CI, retries: 0, workers: 1, diff --git a/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts b/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts index 4eee972..b4c677a 100644 --- a/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts +++ b/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts @@ -45,6 +45,21 @@ async function getKrkBalance(rpcUrl: string, krkAddress: string, account: string return BigInt(result); } +/** + * Poll eth_getTransactionReceipt until the tx is mined or maxAttempts exceeded. + * Anvil with automine mines synchronously before returning the tx hash, so this + * resolves almost immediately. The explicit check guards against Anvil instances + * configured with block intervals or high RPC latency. + */ +async function waitForReceipt(rpcUrl: string, txHash: string, maxAttempts = 20): Promise { + for (let i = 0; i < maxAttempts; i++) { + const receipt = await rpcCall(rpcUrl, 'eth_getTransactionReceipt', [txHash]); + if (receipt !== null) return; + await new Promise(r => setTimeout(r, 500)); + } + throw new Error(`Transaction ${txHash} not mined after ${maxAttempts * 500}ms`); +} + // ── ABI helpers for the sell path ────────────────────────────────────────── const ERC20_ABI = ['function approve(address spender, uint256 amount) returns (bool)']; @@ -86,7 +101,11 @@ test('I can always leave', async ({ browser }) => { if (await connector.isVisible({ timeout: 5_000 })) { await connector.click(); await page.waitForTimeout(2_000); + } else { + console.log('[TEST] WARNING: wallet connector panel not found after clicking Connect'); } + } else { + console.log('[TEST] Connect button not found — wallet may already be connected or UI class changed'); } // Confirm wallet address is displayed in the navbar (app shows first ~6 chars) @@ -119,9 +138,10 @@ test('I can always leave', async ({ browser }) => { console.log('[TEST] Swap in progress...'); await buyButton.filter({ hasText: /Buy KRK/i }).waitFor({ state: 'visible', timeout: 60_000 }); console.log('[TEST] Swap completed'); - } catch { - // Swap may have been instant on a fast Anvil node - console.log('[TEST] Swap completed (no intermediate state observed)'); + } catch (err) { + // On a fast Anvil node the button may cycle too quickly to observe. + // Log the caught value so the root cause is visible if a real error occurred. + console.log(`[TEST] Button state not observed (may be instant): ${err}`); } await page.waitForTimeout(2_000); @@ -155,31 +175,31 @@ test('I can always leave', async ({ browser }) => { }, ]); - // Step 5a: approve KRK to the Uniswap router + // Step 5a: approve KRK to the Uniswap router; wait for on-chain confirmation console.log('[TEST] Approving KRK to router...'); - await page.evaluate( - async ({ krkAddr, data, from }: { krkAddr: string; data: string; from: string }) => { - await (window.ethereum as any).request({ + const approveTxHash = await page.evaluate( + ({ krkAddr, data, from }: { krkAddr: string; data: string; from: string }) => + (window.ethereum as any).request({ method: 'eth_sendTransaction', params: [{ from, to: krkAddr, data, gas: '0x30000' }], - }); - }, + }) as Promise, { krkAddr: config.contracts.Kraiken, data: approveData, from: ACCOUNT_ADDRESS }, ); - await page.waitForTimeout(2_000); + await waitForReceipt(config.rpcUrl, approveTxHash); + console.log('[TEST] Approve mined'); - // Step 5b: swap KRK → WETH + // Step 5b: swap KRK → WETH; wait for on-chain confirmation console.log('[TEST] Swapping KRK → WETH (exit)...'); - await page.evaluate( - async ({ routerAddr, data, from }: { routerAddr: string; data: string; from: string }) => { - await (window.ethereum as any).request({ + const swapTxHash = await page.evaluate( + ({ routerAddr, data, from }: { routerAddr: string; data: string; from: string }) => + (window.ethereum as any).request({ method: 'eth_sendTransaction', params: [{ from, to: routerAddr, data, gas: '0x80000' }], - }); - }, + }) as Promise, { routerAddr: SWAP_ROUTER, data: swapData, from: ACCOUNT_ADDRESS }, ); - await page.waitForTimeout(2_000); + await waitForReceipt(config.rpcUrl, swapTxHash); + console.log('[TEST] Swap mined'); // ── 6. Assert KRK was sold ──────────────────────────────────────── const krkAfterSell = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS);