Move holdout scenarios to separate repo

- Updated holdout.config.ts to use HOLDOUT_SCENARIOS_DIR env var - Modified evaluate.sh to clone harb-holdout-scenarios repo at runtime - Deleted scripts/harb-evaluator/scenarios/ directory - Added .holdout-scenarios/ to .gitignore - Holdout scenarios are now cloned into .holdout-scenarios/ during evaluation - This prevents dev-agent from seeing the holdout test set
2026-03-03 19:57:34 +00:00 · 2026-03-03 19:57:34 +00:00 · 69f6a87e20
commit 69f6a87e20
parent b2594a28b3
4 changed files with 23 additions and 86 deletions
--- a/scripts/harb-evaluator/evaluate.sh
+++ b/scripts/harb-evaluator/evaluate.sh
@ -163,6 +163,18 @@ log "Installing Playwright browser binaries..."
 (cd "$WORKTREE_DIR" && npx playwright install chromium) \
  || infra_error "playwright install chromium failed"

+# ── Clone holdout scenarios ────────────────────────────────────────────
+# The holdout scenarios live in a separate repo so the dev-agent cannot
+# see them. Clone into .holdout-scenarios/ inside the worktree.
+readonly HOLDOUT_REPO="ssh://git@codeberg.org/johba/harb-holdout-scenarios.git"
+readonly HOLDOUT_DIR="$WORKTREE_DIR/.holdout-scenarios"
+log "Cloning holdout scenarios from $HOLDOUT_REPO..."
+git clone --quiet "$HOLDOUT_REPO" "$HOLDOUT_DIR" \
+  || infra_error "Failed to clone holdout scenarios repo"
+
+# Export the scenarios directory for holdout.config.ts
+export HOLDOUT_SCENARIOS_DIR=".holdout-scenarios/scenarios"
+
 # ── Boot the stack ─────────────────────────────────────────────────────
 cd "$WORKTREE_DIR"
 log "Starting containerised stack (project: $COMPOSE_PROJECT)..."
--- a/scripts/harb-evaluator/holdout.config.ts
+++ b/scripts/harb-evaluator/holdout.config.ts
@ -3,19 +3,21 @@ import { defineConfig, devices } from '@playwright/test';
 /**
 * Playwright config for holdout scenarios.
 *
- * Holdout specs live under scripts/harb-evaluator/scenarios/ and reuse the
- * existing tests/setup/ infrastructure (wallet-provider, stack, navigate).
+ * Holdout specs are cloned from the separate harb-holdout-scenarios repo
+ * into .holdout-scenarios/ by evaluate.sh and reuse the existing tests/setup/
+ * infrastructure (wallet-provider, stack, navigate).
 *
 * The evaluator boots the stack first, then runs:
 *   npx playwright test --config scripts/harb-evaluator/holdout.config.ts
 *
 * Required env vars (set by evaluate.sh):
- *   STACK_RPC_URL     – Anvil JSON-RPC endpoint
- *   STACK_WEBAPP_URL  – Vite dev server URL
- *   STACK_GRAPHQL_URL – Ponder GraphQL endpoint
+ *   STACK_RPC_URL            – Anvil JSON-RPC endpoint
+ *   STACK_WEBAPP_URL         – Vite dev server URL
+ *   STACK_GRAPHQL_URL        – Ponder GraphQL endpoint
+ *   HOLDOUT_SCENARIOS_DIR    – Path to cloned scenarios (default: scripts/harb-evaluator/scenarios)
 */
 export default defineConfig({
-  testDir: './scenarios',
+  testDir: process.env.HOLDOUT_SCENARIOS_DIR ?? './scenarios',
  fullyParallel: false,
  // evaluate.sh sets CI=true before invoking playwright, so forbidOnly is always
  // active in the evaluator context. Accidental test.only() in any scenario file
--- a/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts
+++ b/scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts
@ -1,80 +0,0 @@
-/**
- * Holdout scenario: sovereign-exit / always-leave
- *
- * Verifies the core protocol invariant: a user can ALWAYS exit their position
- * by buying KRK through the in-app swap widget and then selling it back.
- *
- * Reuses tests/setup/ infrastructure and the shared helpers in
- * scripts/harb-evaluator/helpers/ — no inline wallet, swap, or balance logic.
- *
- * Account 0 from the Anvil test mnemonic is used (same as e2e tests).
- * Deploy scripts also use Account 0, but each test run gets a fresh Anvil stack,
- * so no collision occurs.
- */
-import { expect, test } from '@playwright/test';
-import { parseEther, Wallet } from 'ethers';
-import { createWalletContext } from '../../../../tests/setup/wallet-provider';
-import { getStackConfig } from '../../../../tests/setup/stack';
-import { connectWallet, getKrkBalance } from '../../helpers/wallet';
-import { buyKrk, sellAllKrk } from '../../helpers/swap';
-
-// Anvil account 0 — same as e2e tests (deploy uses it but state is reset per stack)
-const PK = '0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80';
-const ACCOUNT_ADDRESS = new Wallet(PK).address;
-
-test('I can always leave', async ({ browser }) => {
-  const config = getStackConfig();
-  const ctx = await createWalletContext(browser, {
-    privateKey: PK,
-    rpcUrl: config.rpcUrl,
-  });
-  const page = await ctx.newPage();
-
-  page.on('console', msg => console.log(`[BROWSER] ${msg.type()}: ${msg.text()}`));
-  page.on('pageerror', err => console.log(`[BROWSER ERROR] ${err.message}`));
-
-  try {
-    // ── 1. Load the web app ──────────────────────────────────────────────
-    console.log('[TEST] Loading web app...');
-    await page.goto(`${config.webAppUrl}/app/`, { waitUntil: 'domcontentloaded' });
-    await expect(page.locator('.navbar-title').first()).toBeVisible({ timeout: 30_000 });
-
-    // ── 2. Connect wallet via the UI ─────────────────────────────────────
-    console.log('[TEST] Connecting wallet...');
-    await connectWallet(page);
-
-    // ── 3. Buy KRK via the get-krk page swap widget ───────────────────────
-    const krkBefore = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS);
-    console.log(`[TEST] KRK balance before buy: ${krkBefore}`);
-
-    await buyKrk(page, '0.1');
-
-    const krkAfterBuy = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS);
-    console.log(`[TEST] KRK balance after buy: ${krkAfterBuy}`);
-    expect(krkAfterBuy).toBeGreaterThan(krkBefore);
-    console.log('[TEST] ✅ KRK received');
-
-    // ── 4. Sell all KRK back (sovereign exit) ────────────────────────────
-    const wethReceived = await sellAllKrk(page, {
-      rpcUrl: config.rpcUrl,
-      krkAddress: config.contracts.Kraiken,
-      accountAddress: ACCOUNT_ADDRESS,
-    });
-
-    // ── 5. Assert KRK was sold ────────────────────────────────────────────
-    const krkAfterSell = await getKrkBalance(config.rpcUrl, config.contracts.Kraiken, ACCOUNT_ADDRESS);
-    console.log(`[TEST] KRK balance after sell: ${krkAfterSell}`);
-    expect(krkAfterSell).toBeLessThan(krkAfterBuy);
-    console.log('[TEST] ✅ Sovereign exit confirmed: KRK sold back to WETH');
-
-    // ── 6. Assert reasonable slippage (at least 90% of ETH spent) ─────────
-    const ethSpent = parseEther('0.1');
-    const minExpected = parseEther('0.09'); // 90% of 0.1 ETH
-    expect(wethReceived).toBeGreaterThanOrEqual(minExpected);
-    const slippagePercent = ((Number(wethReceived) / Number(ethSpent)) * 100).toFixed(2);
-    console.log(`[TEST] ✅ Reasonable slippage: received ${wethReceived} WETH for 0.1 ETH spent (${slippagePercent}%)`);
-
-  } finally {
-    await ctx.close();
-  }
-});