fix: Holdout evaluator: Playwright browser-based scenario scripts (#381)

Replace shell-script scenario runner with Playwright. The evaluator now runs `npx playwright test --config scripts/harb-evaluator/holdout.config.ts` after booting the stack, using the existing tests/setup/ wallet-provider and navigation infrastructure. Changes: - scripts/harb-evaluator/holdout.config.ts — new Playwright config pointing to scenarios/, headless chromium, 5-min timeout per test - scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts — Playwright spec that buys KRK through the LocalSwapWidget then sells it back via the injected wallet provider, asserting sovereign exit works - scripts/harb-evaluator/evaluate.sh — adds root npm install step (needed for npx playwright), exports STACK_* env aliases for getStackConfig(), replaces shell-script loop with a single playwright test invocation Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-01 11:24:15 +00:00 · 2026-03-01 11:24:15 +00:00 · 2ddd8e9ed2
commit 2ddd8e9ed2
parent 910a02a7cb
3 changed files with 253 additions and 29 deletions
--- a/scripts/harb-evaluator/evaluate.sh
+++ b/scripts/harb-evaluator/evaluate.sh
@ -147,6 +147,11 @@ log "Building kraiken-lib..."
 (cd "$WORKTREE_DIR" && ./scripts/build-kraiken-lib.sh) \
  || infra_error "kraiken-lib build failed"

+# ── Install root npm dependencies (needed for npx playwright test) ─────
+log "Installing root npm dependencies..."
+(cd "$WORKTREE_DIR" && npm install --no-audit --no-fund --silent) \
+  || infra_error "npm install failed"
+
 # ── Boot the stack ─────────────────────────────────────────────────────
 cd "$WORKTREE_DIR"
 log "Starting containerised stack (project: $COMPOSE_PROJECT)..."
@ -249,7 +254,7 @@ if [[ "$ponder_ready" != "true" ]]; then
  infra_error "Ponder did not finish indexing within ${PONDER_READY_TIMEOUT}s"
 fi

-# ── Export stack endpoints for scenario scripts ────────────────────────
+# ── Export stack endpoints ─────────────────────────────────────────────
 export EVAL_PR_NUMBER="$PR_NUMBER"
 export EVAL_BRANCH="$PR_BRANCH"
 export EVAL_WORKTREE="$WORKTREE_DIR"
@ -260,38 +265,22 @@ export EVAL_KRAIKEN="$KRAIKEN"
 export EVAL_STAKE="$STAKE"
 export EVAL_LIQUIDITY_MANAGER="$LIQUIDITY_MANAGER"

+# Alias as STACK_* so getStackConfig() in tests/setup/stack.ts resolves correctly
+export STACK_RPC_URL="$EVAL_RPC_URL"
+export STACK_GRAPHQL_URL="$EVAL_GRAPHQL_URL"
+export STACK_WEBAPP_URL="$EVAL_WEBAPP_URL"
+
 log "Stack ready. Endpoints:"
 log "  RPC:     $EVAL_RPC_URL"
 log "  GraphQL: $EVAL_GRAPHQL_URL"
 log "  WebApp:  $EVAL_WEBAPP_URL"

-# ── Run scenario scripts ───────────────────────────────────────────────
-shopt -s nullglob
-scenario_scripts=("$SCENARIOS_DIR"/*.sh)
-shopt -u nullglob
-
-if [[ ${#scenario_scripts[@]} -eq 0 ]]; then
-  log "No scenario scripts found in $SCENARIOS_DIR"
-  log "Gate PASSED (no scenarios)"
+# ── Run holdout Playwright scenarios ──────────────────────────────────
+log "Running holdout scenarios via Playwright..."
+cd "$WORKTREE_DIR"
+if npx playwright test --config scripts/harb-evaluator/holdout.config.ts; then
+  log "Gate PASSED"
  exit 0
+else
+  gate_fail "One or more holdout scenarios failed"
 fi
-
-failures=0
-for scenario in "${scenario_scripts[@]}"; do
-  [[ -f "$scenario" ]] || continue
-  scenario_name="$(basename "$scenario")"
-  log "--- Running scenario: $scenario_name ---"
-  if bash "$scenario"; then
-    log "  PASSED: $scenario_name"
-  else
-    log "  FAILED: $scenario_name"
-    failures=$((failures + 1))
-  fi
-done
-
-if (( failures > 0 )); then
-  gate_fail "$failures of ${#scenario_scripts[@]} scenario(s) failed"
-fi
-
-log "Gate PASSED (${#scenario_scripts[@]} scenario(s))"
-exit 0