From 2ddd8e9ed29d7c6ea1ed9126862137791be52cc3 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Sun, 1 Mar 2026 11:24:15 +0000
Subject: [PATCH] fix: Holdout evaluator: Playwright browser-based scenario
 scripts (#381)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace shell-script scenario runner with Playwright. The evaluator now
runs `npx playwright test --config scripts/harb-evaluator/holdout.config.ts`
after booting the stack, using the existing tests/setup/ wallet-provider
and navigation infrastructure.

Changes:
- scripts/harb-evaluator/holdout.config.ts — new Playwright config pointing
  to scenarios/, headless chromium, 5-min timeout per test
- scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts —
  Playwright spec that buys KRK through the LocalSwapWidget then sells it
  back via the injected wallet provider, asserting sovereign exit works
- scripts/harb-evaluator/evaluate.sh — adds root npm install step (needed
  for npx playwright), exports STACK_* env aliases for getStackConfig(),
  replaces shell-script loop with a single playwright test invocation

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 scripts/harb-evaluator/evaluate.sh            |  47 ++---
 scripts/harb-evaluator/holdout.config.ts      |  43 ++++
 .../sovereign-exit/always-leave.spec.ts       | 192 ++++++++++++++++++
 3 files changed, 253 insertions(+), 29 deletions(-)
 create mode 100644 scripts/harb-evaluator/holdout.config.ts
 create mode 100644 scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts

diff --git a/scripts/harb-evaluator/evaluate.sh b/scripts/harb-evaluator/evaluate.sh
index 40e48f6..2e14b21 100755
--- a/scripts/harb-evaluator/evaluate.sh
+++ b/scripts/harb-evaluator/evaluate.sh
@@ -147,6 +147,11 @@ log "Building kraiken-lib..."
 (cd "$WORKTREE_DIR" && ./scripts/build-kraiken-lib.sh) \
   || infra_error "kraiken-lib build failed"
 
+# ── Install root npm dependencies (needed for npx playwright test) ─────
+log "Installing root npm dependencies..."
+(cd "$WORKTREE_DIR" && npm install --no-audit --no-fund --silent) \
+  || infra_error "npm install failed"
+
 # ── Boot the stack ─────────────────────────────────────────────────────
 cd "$WORKTREE_DIR"
 log "Starting containerised stack (project: $COMPOSE_PROJECT)..."
@@ -249,7 +254,7 @@ if [[ "$ponder_ready" != "true" ]]; then
   infra_error "Ponder did not finish indexing within ${PONDER_READY_TIMEOUT}s"
 fi
 
-# ── Export stack endpoints for scenario scripts ────────────────────────
+# ── Export stack endpoints ─────────────────────────────────────────────
 export EVAL_PR_NUMBER="$PR_NUMBER"
 export EVAL_BRANCH="$PR_BRANCH"
 export EVAL_WORKTREE="$WORKTREE_DIR"
@@ -260,38 +265,22 @@ export EVAL_KRAIKEN="$KRAIKEN"
 export EVAL_STAKE="$STAKE"
 export EVAL_LIQUIDITY_MANAGER="$LIQUIDITY_MANAGER"
 
+# Alias as STACK_* so getStackConfig() in tests/setup/stack.ts resolves correctly
+export STACK_RPC_URL="$EVAL_RPC_URL"
+export STACK_GRAPHQL_URL="$EVAL_GRAPHQL_URL"
+export STACK_WEBAPP_URL="$EVAL_WEBAPP_URL"
+
 log "Stack ready. Endpoints:"
 log "  RPC:     $EVAL_RPC_URL"
 log "  GraphQL: $EVAL_GRAPHQL_URL"
 log "  WebApp:  $EVAL_WEBAPP_URL"
 
-# ── Run scenario scripts ───────────────────────────────────────────────
-shopt -s nullglob
-scenario_scripts=("$SCENARIOS_DIR"/*.sh)
-shopt -u nullglob
-
-if [[ ${#scenario_scripts[@]} -eq 0 ]]; then
-  log "No scenario scripts found in $SCENARIOS_DIR"
-  log "Gate PASSED (no scenarios)"
+# ── Run holdout Playwright scenarios ──────────────────────────────────
+log "Running holdout scenarios via Playwright..."
+cd "$WORKTREE_DIR"
+if npx playwright test --config scripts/harb-evaluator/holdout.config.ts; then
+  log "Gate PASSED"
   exit 0
+else
+  gate_fail "One or more holdout scenarios failed"
 fi
-
-failures=0
-for scenario in "${scenario_scripts[@]}"; do
-  [[ -f "$scenario" ]] || continue
-  scenario_name="$(basename "$scenario")"
-  log "--- Running scenario: $scenario_name ---"
-  if bash "$scenario"; then
-    log "  PASSED: $scenario_name"
-  else
-    log "  FAILED: $scenario_name"
-    failures=$((failures + 1))
-  fi
-done
-
-if (( failures > 0 )); then
-  gate_fail "$failures of ${#scenario_scripts[@]} scenario(s) failed"
-fi
-
-log "Gate PASSED (${#scenario_scripts[@]} scenario(s))"
-exit 0
diff --git a/scripts/harb-evaluator/holdout.config.ts b/scripts/harb-evaluator/holdout.config.ts
new file mode 100644
index 0000000..54dfb33
--- /dev/null
+++ b/scripts/harb-evaluator/holdout.config.ts
@@ -0,0 +1,43 @@
+import { defineConfig, devices } from '@playwright/test';
+
+/**
+ * Playwright config for holdout scenarios.
+ *
+ * Holdout specs live under scripts/harb-evaluator/scenarios/ and reuse the
+ * existing tests/setup/ infrastructure (wallet-provider, stack, navigate).
+ *
+ * The evaluator boots the stack first, then runs:
+ *   npx playwright test --config scripts/harb-evaluator/holdout.config.ts
+ *
+ * Required env vars (set by evaluate.sh):
+ *   STACK_RPC_URL     – Anvil JSON-RPC endpoint
+ *   STACK_WEBAPP_URL  – Vite dev server URL
+ *   STACK_GRAPHQL_URL – Ponder GraphQL endpoint
+ */
+export default defineConfig({
+  testDir: './scenarios',
+  fullyParallel: false,
+  forbidOnly: !!process.env.CI,
+  retries: 0,
+  workers: 1,
+  reporter: 'list',
+  timeout: 5 * 60 * 1000, // 5 min per test — scenarios involve on-chain txns
+  expect: {
+    timeout: 30_000,
+  },
+  use: {
+    headless: true,
+    viewport: { width: 1280, height: 720 },
+    screen: { width: 1280, height: 720 },
+    actionTimeout: 60_000,
+    launchOptions: {
+      args: ['--disable-dev-shm-usage', '--no-sandbox'],
+    },
+  },
+  projects: [
+    {
+      name: 'chromium',
+      use: { ...devices['Desktop Chrome'] },
+    },
+  ],
+});
diff --git a/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts b/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts
new file mode 100644
index 0000000..4eee972
--- /dev/null
+++ b/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts
@@ -0,0 +1,192 @@
+/**
+ * Holdout scenario: sovereign-exit / always-leave
+ *
+ * Verifies the core protocol invariant: a user can ALWAYS exit their position
+ * by buying KRK through the in-app swap widget and then selling it back.
+ *
+ * Reuses tests/setup/ infrastructure — no new wallet or navigation helpers.
+ *
+ * Account 5 from the Anvil test mnemonic is used so it never collides with
+ * the deploy scripts (which use accounts 0–1).
+ */
+import { expect, test } from '@playwright/test';
+import { Interface, Wallet } from 'ethers';
+import { createWalletContext } from '../../../../tests/setup/wallet-provider';
+import { getStackConfig } from '../../../../tests/setup/stack';
+import { navigateSPA } from '../../../../tests/setup/navigate';
+
+// Anvil account 5 — never used by deploy or txnBot
+const PK = '0x8b3a350cf5c34c9194ca85829a2df0ec3153be0318b5e2d3348e872092edffba';
+const ACCOUNT = new Wallet(PK);
+const ACCOUNT_ADDRESS = ACCOUNT.address;
+
+// Infrastructure addresses that are stable across Anvil forks of Base Sepolia
+const SWAP_ROUTER = '0x94cC0AaC535CCDB3C01d6787D6413C739ae12bc4';
+const WETH = '0x4200000000000000000000000000000000000006';
+const POOL_FEE = 10_000; // 1% tier used by KRAIKEN pool
+
+// ── RPC helpers (Node.js context) ──────────────────────────────────────────
+
+async function rpcCall(rpcUrl: string, method: string, params: unknown[]): Promise<unknown> {
+  const resp = await fetch(rpcUrl, {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ jsonrpc: '2.0', id: Date.now(), method, params }),
+  });
+  const payload = await resp.json();
+  if (payload.error) throw new Error(`RPC ${method}: ${payload.error.message}`);
+  return payload.result;
+}
+
+async function getKrkBalance(rpcUrl: string, krkAddress: string, account: string): Promise<bigint> {
+  const selector = '0x70a08231'; // balanceOf(address)
+  const data = selector + account.slice(2).padStart(64, '0');
+  const result = (await rpcCall(rpcUrl, 'eth_call', [{ to: krkAddress, data }, 'latest'])) as string;
+  return BigInt(result);
+}
+
+// ── ABI helpers for the sell path ──────────────────────────────────────────
+
+const ERC20_ABI = ['function approve(address spender, uint256 amount) returns (bool)'];
+
+const ROUTER_ABI = [
+  'function exactInputSingle((address tokenIn, address tokenOut, uint24 fee, address recipient, uint256 amountIn, uint256 amountOutMinimum, uint160 sqrtPriceLimitX96) params) payable returns (uint256 amountOut)',
+];
+
+// ── Test ───────────────────────────────────────────────────────────────────
+
+test('I can always leave', async ({ browser }) => {
+  const config = getStackConfig();
+  const ctx = await createWalletContext(browser, {
+    privateKey: PK,
+    rpcUrl: config.rpcUrl,
+  });
+  const page = await ctx.newPage();
+
+  page.on('console', msg => console.log(`[BROWSER] ${msg.type()}: ${msg.text()}`));
+  page.on('pageerror', err => console.log(`[BROWSER ERROR] ${err.message}`));
+
+  try {
+    // ── 1. Load the web app ──────────────────────────────────────────────
+    console.log('[TEST] Loading web app...');
+    await page.goto(`${config.webAppUrl}/app/`, { waitUntil: 'domcontentloaded' });
+    await expect(page.locator('.navbar-title').first()).toBeVisible({ timeout: 30_000 });
+
+    // Force desktop-mode recalculation (wallet-provider sets screen.width = 1280)
+    await page.evaluate(() => window.dispatchEvent(new Event('resize')));
+    await page.waitForTimeout(2_000);
+
+    // ── 2. Connect wallet via the UI ────────────────────────────────────
+    console.log('[TEST] Connecting wallet...');
+    const connectButton = page.locator('.connect-button--disconnected').first();
+    if (await connectButton.isVisible({ timeout: 5_000 })) {
+      await connectButton.click();
+      await page.waitForTimeout(1_000);
+      const connector = page.locator('.connectors-element').first();
+      if (await connector.isVisible({ timeout: 5_000 })) {
+        await connector.click();
+        await page.waitForTimeout(2_000);
+      }
+    }
+
+    // Confirm wallet address is displayed in the navbar (app shows first ~6 chars)
+    const addrPrefix = ACCOUNT_ADDRESS.slice(0, 8); // e.g. "0x996550"
+    await expect(page.getByText(new RegExp(addrPrefix, 'i')).first()).toBeVisible({ timeout: 15_000 });
+    console.log('[TEST] Wallet connected');
+
+    // ── 3. Navigate to get-krk and buy KRK via the swap widget ──────────
+    console.log('[TEST] Navigating to get-krk...');
+    await navigateSPA(page, '/app/get-krk');
+
+    // The LocalSwapWidget is rendered when VITE_ENABLE_LOCAL_SWAP=true
+    const swapInput = page.locator('#local-swap-amount');
+    await expect(swapInput).toBeVisible({ timeout: 15_000 });
+    console.log('[TEST] Swap widget visible');
+
+    const krkBefore = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS);
+    console.log(`[TEST] KRK balance before buy: ${krkBefore}`);
+
+    await swapInput.fill('0.1');
+
+    const buyButton = page.locator('.local-swap-widget .swap-button');
+    await expect(buyButton).toBeVisible();
+    console.log('[TEST] Clicking Buy KRK...');
+    await buyButton.click();
+
+    // Wait for the swap to complete (button cycles through "Submitting…" → "Buy KRK")
+    try {
+      await buyButton.filter({ hasText: /Submitting/i }).waitFor({ state: 'visible', timeout: 5_000 });
+      console.log('[TEST] Swap in progress...');
+      await buyButton.filter({ hasText: /Buy KRK/i }).waitFor({ state: 'visible', timeout: 60_000 });
+      console.log('[TEST] Swap completed');
+    } catch {
+      // Swap may have been instant on a fast Anvil node
+      console.log('[TEST] Swap completed (no intermediate state observed)');
+    }
+    await page.waitForTimeout(2_000);
+
+    // ── 4. Verify KRK was received ────────────────────────────────────
+    const krkAfterBuy = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS);
+    console.log(`[TEST] KRK balance after buy: ${krkAfterBuy}`);
+    expect(krkAfterBuy).toBeGreaterThan(krkBefore);
+    console.log('[TEST] ✅ KRK received');
+
+    // ── 5. Sell all KRK back (sovereign exit) ───────────────────────────
+    // Encode approve + exactInputSingle calldata in Node.js, then send via
+    // the injected window.ethereum wallet provider (tests/setup/wallet-provider).
+    console.log('[TEST] Encoding sell transactions...');
+    const erc20Iface = new Interface(ERC20_ABI);
+    const routerIface = new Interface(ROUTER_ABI);
+
+    const approveData = erc20Iface.encodeFunctionData('approve', [
+      SWAP_ROUTER,
+      BigInt('0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff'),
+    ]);
+
+    const swapData = routerIface.encodeFunctionData('exactInputSingle', [
+      {
+        tokenIn: config.contracts.Kraiken,
+        tokenOut: WETH,
+        fee: POOL_FEE,
+        recipient: ACCOUNT_ADDRESS,
+        amountIn: krkAfterBuy,
+        amountOutMinimum: 0n,
+        sqrtPriceLimitX96: 0n,
+      },
+    ]);
+
+    // Step 5a: approve KRK to the Uniswap router
+    console.log('[TEST] Approving KRK to router...');
+    await page.evaluate(
+      async ({ krkAddr, data, from }: { krkAddr: string; data: string; from: string }) => {
+        await (window.ethereum as any).request({
+          method: 'eth_sendTransaction',
+          params: [{ from, to: krkAddr, data, gas: '0x30000' }],
+        });
+      },
+      { krkAddr: config.contracts.Kraiken, data: approveData, from: ACCOUNT_ADDRESS },
+    );
+    await page.waitForTimeout(2_000);
+
+    // Step 5b: swap KRK → WETH
+    console.log('[TEST] Swapping KRK → WETH (exit)...');
+    await page.evaluate(
+      async ({ routerAddr, data, from }: { routerAddr: string; data: string; from: string }) => {
+        await (window.ethereum as any).request({
+          method: 'eth_sendTransaction',
+          params: [{ from, to: routerAddr, data, gas: '0x80000' }],
+        });
+      },
+      { routerAddr: SWAP_ROUTER, data: swapData, from: ACCOUNT_ADDRESS },
+    );
+    await page.waitForTimeout(2_000);
+
+    // ── 6. Assert KRK was sold ────────────────────────────────────────
+    const krkAfterSell = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS);
+    console.log(`[TEST] KRK balance after sell: ${krkAfterSell}`);
+    expect(krkAfterSell).toBeLessThan(krkAfterBuy);
+    console.log('[TEST] ✅ Sovereign exit confirmed: KRK sold back to WETH');
+  } finally {
+    await ctx.close();
+  }
+});