From 69f6a87e2007949ad0b15150f54517a5d56ec823 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 3 Mar 2026 19:57:34 +0000 Subject: [PATCH 1/4] Move holdout scenarios to separate repo - Updated holdout.config.ts to use HOLDOUT_SCENARIOS_DIR env var - Modified evaluate.sh to clone harb-holdout-scenarios repo at runtime - Deleted scripts/harb-evaluator/scenarios/ directory - Added .holdout-scenarios/ to .gitignore - Holdout scenarios are now cloned into .holdout-scenarios/ during evaluation - This prevents dev-agent from seeing the holdout test set --- .gitignore | 3 + scripts/harb-evaluator/evaluate.sh | 12 +++ scripts/harb-evaluator/holdout.config.ts | 14 ++-- .../sovereign-exit/always-leave.spec.ts | 80 ------------------- 4 files changed, 23 insertions(+), 86 deletions(-) delete mode 100644 scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts diff --git a/.gitignore b/.gitignore index 0a28df3..5fece8b 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,6 @@ services/ponder/.ponder/ # Temporary files /tmp/ logs/ + +# Holdout scenarios (cloned at runtime by evaluate.sh) +.holdout-scenarios/ diff --git a/scripts/harb-evaluator/evaluate.sh b/scripts/harb-evaluator/evaluate.sh index 52509ff..e691637 100755 --- a/scripts/harb-evaluator/evaluate.sh +++ b/scripts/harb-evaluator/evaluate.sh @@ -163,6 +163,18 @@ log "Installing Playwright browser binaries..." (cd "$WORKTREE_DIR" && npx playwright install chromium) \ || infra_error "playwright install chromium failed" +# ── Clone holdout scenarios ──────────────────────────────────────────── +# The holdout scenarios live in a separate repo so the dev-agent cannot +# see them. Clone into .holdout-scenarios/ inside the worktree. +readonly HOLDOUT_REPO="ssh://git@codeberg.org/johba/harb-holdout-scenarios.git" +readonly HOLDOUT_DIR="$WORKTREE_DIR/.holdout-scenarios" +log "Cloning holdout scenarios from $HOLDOUT_REPO..." +git clone --quiet "$HOLDOUT_REPO" "$HOLDOUT_DIR" \ + || infra_error "Failed to clone holdout scenarios repo" + +# Export the scenarios directory for holdout.config.ts +export HOLDOUT_SCENARIOS_DIR=".holdout-scenarios/scenarios" + # ── Boot the stack ───────────────────────────────────────────────────── cd "$WORKTREE_DIR" log "Starting containerised stack (project: $COMPOSE_PROJECT)..." diff --git a/scripts/harb-evaluator/holdout.config.ts b/scripts/harb-evaluator/holdout.config.ts index e393c52..1409390 100644 --- a/scripts/harb-evaluator/holdout.config.ts +++ b/scripts/harb-evaluator/holdout.config.ts @@ -3,19 +3,21 @@ import { defineConfig, devices } from '@playwright/test'; /** * Playwright config for holdout scenarios. * - * Holdout specs live under scripts/harb-evaluator/scenarios/ and reuse the - * existing tests/setup/ infrastructure (wallet-provider, stack, navigate). + * Holdout specs are cloned from the separate harb-holdout-scenarios repo + * into .holdout-scenarios/ by evaluate.sh and reuse the existing tests/setup/ + * infrastructure (wallet-provider, stack, navigate). * * The evaluator boots the stack first, then runs: * npx playwright test --config scripts/harb-evaluator/holdout.config.ts * * Required env vars (set by evaluate.sh): - * STACK_RPC_URL – Anvil JSON-RPC endpoint - * STACK_WEBAPP_URL – Vite dev server URL - * STACK_GRAPHQL_URL – Ponder GraphQL endpoint + * STACK_RPC_URL – Anvil JSON-RPC endpoint + * STACK_WEBAPP_URL – Vite dev server URL + * STACK_GRAPHQL_URL – Ponder GraphQL endpoint + * HOLDOUT_SCENARIOS_DIR – Path to cloned scenarios (default: scripts/harb-evaluator/scenarios) */ export default defineConfig({ - testDir: './scenarios', + testDir: process.env.HOLDOUT_SCENARIOS_DIR ?? './scenarios', fullyParallel: false, // evaluate.sh sets CI=true before invoking playwright, so forbidOnly is always // active in the evaluator context. Accidental test.only() in any scenario file diff --git a/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts b/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts deleted file mode 100644 index 0351b8c..0000000 --- a/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts +++ /dev/null @@ -1,80 +0,0 @@ -/** - * Holdout scenario: sovereign-exit / always-leave - * - * Verifies the core protocol invariant: a user can ALWAYS exit their position - * by buying KRK through the in-app swap widget and then selling it back. - * - * Reuses tests/setup/ infrastructure and the shared helpers in - * scripts/harb-evaluator/helpers/ — no inline wallet, swap, or balance logic. - * - * Account 0 from the Anvil test mnemonic is used (same as e2e tests). - * Deploy scripts also use Account 0, but each test run gets a fresh Anvil stack, - * so no collision occurs. - */ -import { expect, test } from '@playwright/test'; -import { parseEther, Wallet } from 'ethers'; -import { createWalletContext } from '../../../../tests/setup/wallet-provider'; -import { getStackConfig } from '../../../../tests/setup/stack'; -import { connectWallet, getKrkBalance } from '../../helpers/wallet'; -import { buyKrk, sellAllKrk } from '../../helpers/swap'; - -// Anvil account 0 — same as e2e tests (deploy uses it but state is reset per stack) -const PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80'; -const ACCOUNT_ADDRESS = new Wallet(PK).address; - -test('I can always leave', async ({ browser }) => { - const config = getStackConfig(); - const ctx = await createWalletContext(browser, { - privateKey: PK, - rpcUrl: config.rpcUrl, - }); - const page = await ctx.newPage(); - - page.on('console', msg => console.log(`[BROWSER] ${msg.type()}: ${msg.text()}`)); - page.on('pageerror', err => console.log(`[BROWSER ERROR] ${err.message}`)); - - try { - // ── 1. Load the web app ────────────────────────────────────────────── - console.log('[TEST] Loading web app...'); - await page.goto(`${config.webAppUrl}/app/`, { waitUntil: 'domcontentloaded' }); - await expect(page.locator('.navbar-title').first()).toBeVisible({ timeout: 30_000 }); - - // ── 2. Connect wallet via the UI ───────────────────────────────────── - console.log('[TEST] Connecting wallet...'); - await connectWallet(page); - - // ── 3. Buy KRK via the get-krk page swap widget ─────────────────────── - const krkBefore = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS); - console.log(`[TEST] KRK balance before buy: ${krkBefore}`); - - await buyKrk(page, '0.1'); - - const krkAfterBuy = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS); - console.log(`[TEST] KRK balance after buy: ${krkAfterBuy}`); - expect(krkAfterBuy).toBeGreaterThan(krkBefore); - console.log('[TEST] ✅ KRK received'); - - // ── 4. Sell all KRK back (sovereign exit) ──────────────────────────── - const wethReceived = await sellAllKrk(page, { - rpcUrl: config.rpcUrl, - krkAddress: config.contracts.Kraiken, - accountAddress: ACCOUNT_ADDRESS, - }); - - // ── 5. Assert KRK was sold ──────────────────────────────────────────── - const krkAfterSell = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS); - console.log(`[TEST] KRK balance after sell: ${krkAfterSell}`); - expect(krkAfterSell).toBeLessThan(krkAfterBuy); - console.log('[TEST] ✅ Sovereign exit confirmed: KRK sold back to WETH'); - - // ── 6. Assert reasonable slippage (at least 90% of ETH spent) ───────── - const ethSpent = parseEther('0.1'); - const minExpected = parseEther('0.09'); // 90% of 0.1 ETH - expect(wethReceived).toBeGreaterThanOrEqual(minExpected); - const slippagePercent = ((Number(wethReceived) / Number(ethSpent)) * 100).toFixed(2); - console.log(`[TEST] ✅ Reasonable slippage: received ${wethReceived} WETH for 0.1 ETH spent (${slippagePercent}%)`); - - } finally { - await ctx.close(); - } -}); From f6fe37dcc01eab0a151405d89848e501b2c91d40 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 3 Mar 2026 20:59:32 +0000 Subject: [PATCH 2/4] fix: address PR #438 review findings - Fix HOLDOUT_SCENARIOS_DIR to use absolute path (resolves Playwright testDir issue) - Remove dead SCENARIOS_DIR variable - Replace fallback with explicit error in holdout.config.ts - Add SSH key requirement comment --- scripts/harb-evaluator/evaluate.sh | 5 +++-- scripts/harb-evaluator/holdout.config.ts | 10 ++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/scripts/harb-evaluator/evaluate.sh b/scripts/harb-evaluator/evaluate.sh index e691637..303c75e 100755 --- a/scripts/harb-evaluator/evaluate.sh +++ b/scripts/harb-evaluator/evaluate.sh @@ -24,7 +24,6 @@ readonly REPO_REMOTE="${HARB_REPO_REMOTE:-origin}" readonly CODEBERG_REPO="${CODEBERG_REPO:-johba/harb}" readonly REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" readonly EVALUATOR_DIR="$(cd "$(dirname "$0")" && pwd)" -readonly SCENARIOS_DIR="$EVALUATOR_DIR/scenarios" readonly ANVIL_TIMEOUT=120 # seconds to wait for anvil healthy readonly BOOTSTRAP_TIMEOUT=180 # seconds to wait for bootstrap container exit @@ -166,6 +165,7 @@ log "Installing Playwright browser binaries..." # ── Clone holdout scenarios ──────────────────────────────────────────── # The holdout scenarios live in a separate repo so the dev-agent cannot # see them. Clone into .holdout-scenarios/ inside the worktree. +# NOTE: Requires SSH key with read access to johba/harb-holdout-scenarios on Codeberg. readonly HOLDOUT_REPO="ssh://git@codeberg.org/johba/harb-holdout-scenarios.git" readonly HOLDOUT_DIR="$WORKTREE_DIR/.holdout-scenarios" log "Cloning holdout scenarios from $HOLDOUT_REPO..." @@ -173,7 +173,8 @@ git clone --quiet "$HOLDOUT_REPO" "$HOLDOUT_DIR" \ || infra_error "Failed to clone holdout scenarios repo" # Export the scenarios directory for holdout.config.ts -export HOLDOUT_SCENARIOS_DIR=".holdout-scenarios/scenarios" +# Must be absolute path — Playwright resolves testDir relative to config file's directory. +export HOLDOUT_SCENARIOS_DIR="$HOLDOUT_DIR/scenarios" # ── Boot the stack ───────────────────────────────────────────────────── cd "$WORKTREE_DIR" diff --git a/scripts/harb-evaluator/holdout.config.ts b/scripts/harb-evaluator/holdout.config.ts index 1409390..562fef1 100644 --- a/scripts/harb-evaluator/holdout.config.ts +++ b/scripts/harb-evaluator/holdout.config.ts @@ -14,10 +14,16 @@ import { defineConfig, devices } from '@playwright/test'; * STACK_RPC_URL – Anvil JSON-RPC endpoint * STACK_WEBAPP_URL – Vite dev server URL * STACK_GRAPHQL_URL – Ponder GraphQL endpoint - * HOLDOUT_SCENARIOS_DIR – Path to cloned scenarios (default: scripts/harb-evaluator/scenarios) + * HOLDOUT_SCENARIOS_DIR – Path to cloned scenarios */ + +const scenariosDir = process.env.HOLDOUT_SCENARIOS_DIR; +if (!scenariosDir) { + throw new Error('HOLDOUT_SCENARIOS_DIR env var required — run via evaluate.sh'); +} + export default defineConfig({ - testDir: process.env.HOLDOUT_SCENARIOS_DIR ?? './scenarios', + testDir: scenariosDir, fullyParallel: false, // evaluate.sh sets CI=true before invoking playwright, so forbidOnly is always // active in the evaluator context. Accidental test.only() in any scenario file From 106521af2ec15d39c3006a384a78a4a54db0671c Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 4 Mar 2026 06:16:16 +0000 Subject: [PATCH 3/4] ci: retrigger after Codeberg OAuth refresh From 7fc47d739a82fbaa1d28e95eb4e658688b86e3f6 Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 4 Mar 2026 08:09:26 +0000 Subject: [PATCH 4/4] ci: retrigger