fix: Holdout evaluator: Playwright browser-based scenario scripts (#381)

Replace shell-script scenario runner with Playwright. The evaluator now
runs `npx playwright test --config scripts/harb-evaluator/holdout.config.ts`
after booting the stack, using the existing tests/setup/ wallet-provider
and navigation infrastructure.

Changes:
- scripts/harb-evaluator/holdout.config.ts — new Playwright config pointing
  to scenarios/, headless chromium, 5-min timeout per test
- scripts/harb-evaluator/scenarios/sovereign-exit/always-leave.spec.ts —
  Playwright spec that buys KRK through the LocalSwapWidget then sells it
  back via the injected wallet provider, asserting sovereign exit works
- scripts/harb-evaluator/evaluate.sh — adds root npm install step (needed
  for npx playwright), exports STACK_* env aliases for getStackConfig(),
  replaces shell-script loop with a single playwright test invocation

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-01 11:24:15 +00:00
parent 910a02a7cb
commit 2ddd8e9ed2
3 changed files with 253 additions and 29 deletions

View file

@ -147,6 +147,11 @@ log "Building kraiken-lib..."
(cd "$WORKTREE_DIR" && ./scripts/build-kraiken-lib.sh) \
|| infra_error "kraiken-lib build failed"
# ── Install root npm dependencies (needed for npx playwright test) ─────
log "Installing root npm dependencies..."
(cd "$WORKTREE_DIR" && npm install --no-audit --no-fund --silent) \
|| infra_error "npm install failed"
# ── Boot the stack ─────────────────────────────────────────────────────
cd "$WORKTREE_DIR"
log "Starting containerised stack (project: $COMPOSE_PROJECT)..."
@ -249,7 +254,7 @@ if [[ "$ponder_ready" != "true" ]]; then
infra_error "Ponder did not finish indexing within ${PONDER_READY_TIMEOUT}s"
fi
# ── Export stack endpoints for scenario scripts ────────────────────────
# ── Export stack endpoints ─────────────────────────────────────────────
export EVAL_PR_NUMBER="$PR_NUMBER"
export EVAL_BRANCH="$PR_BRANCH"
export EVAL_WORKTREE="$WORKTREE_DIR"
@ -260,38 +265,22 @@ export EVAL_KRAIKEN="$KRAIKEN"
export EVAL_STAKE="$STAKE"
export EVAL_LIQUIDITY_MANAGER="$LIQUIDITY_MANAGER"
# Alias as STACK_* so getStackConfig() in tests/setup/stack.ts resolves correctly
export STACK_RPC_URL="$EVAL_RPC_URL"
export STACK_GRAPHQL_URL="$EVAL_GRAPHQL_URL"
export STACK_WEBAPP_URL="$EVAL_WEBAPP_URL"
log "Stack ready. Endpoints:"
log " RPC: $EVAL_RPC_URL"
log " GraphQL: $EVAL_GRAPHQL_URL"
log " WebApp: $EVAL_WEBAPP_URL"
# ── Run scenario scripts ───────────────────────────────────────────────
shopt -s nullglob
scenario_scripts=("$SCENARIOS_DIR"/*.sh)
shopt -u nullglob
if [[ ${#scenario_scripts[@]} -eq 0 ]]; then
log "No scenario scripts found in $SCENARIOS_DIR"
log "Gate PASSED (no scenarios)"
# ── Run holdout Playwright scenarios ──────────────────────────────────
log "Running holdout scenarios via Playwright..."
cd "$WORKTREE_DIR"
if npx playwright test --config scripts/harb-evaluator/holdout.config.ts; then
log "Gate PASSED"
exit 0
else
gate_fail "One or more holdout scenarios failed"
fi
failures=0
for scenario in "${scenario_scripts[@]}"; do
[[ -f "$scenario" ]] || continue
scenario_name="$(basename "$scenario")"
log "--- Running scenario: $scenario_name ---"
if bash "$scenario"; then
log " PASSED: $scenario_name"
else
log " FAILED: $scenario_name"
failures=$((failures + 1))
fi
done
if (( failures > 0 )); then
gate_fail "$failures of ${#scenario_scripts[@]} scenario(s) failed"
fi
log "Gate PASSED (${#scenario_scripts[@]} scenario(s))"
exit 0