From f6fe37dcc01eab0a151405d89848e501b2c91d40 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 3 Mar 2026 20:59:32 +0000 Subject: [PATCH] fix: address PR #438 review findings - Fix HOLDOUT_SCENARIOS_DIR to use absolute path (resolves Playwright testDir issue) - Remove dead SCENARIOS_DIR variable - Replace fallback with explicit error in holdout.config.ts - Add SSH key requirement comment --- scripts/harb-evaluator/evaluate.sh | 5 +++-- scripts/harb-evaluator/holdout.config.ts | 10 ++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/scripts/harb-evaluator/evaluate.sh b/scripts/harb-evaluator/evaluate.sh index e691637..303c75e 100755 --- a/scripts/harb-evaluator/evaluate.sh +++ b/scripts/harb-evaluator/evaluate.sh @@ -24,7 +24,6 @@ readonly REPO_REMOTE="${HARB_REPO_REMOTE:-origin}" readonly CODEBERG_REPO="${CODEBERG_REPO:-johba/harb}" readonly REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" readonly EVALUATOR_DIR="$(cd "$(dirname "$0")" && pwd)" -readonly SCENARIOS_DIR="$EVALUATOR_DIR/scenarios" readonly ANVIL_TIMEOUT=120 # seconds to wait for anvil healthy readonly BOOTSTRAP_TIMEOUT=180 # seconds to wait for bootstrap container exit @@ -166,6 +165,7 @@ log "Installing Playwright browser binaries..." # ── Clone holdout scenarios ──────────────────────────────────────────── # The holdout scenarios live in a separate repo so the dev-agent cannot # see them. Clone into .holdout-scenarios/ inside the worktree. +# NOTE: Requires SSH key with read access to johba/harb-holdout-scenarios on Codeberg. readonly HOLDOUT_REPO="ssh://git@codeberg.org/johba/harb-holdout-scenarios.git" readonly HOLDOUT_DIR="$WORKTREE_DIR/.holdout-scenarios" log "Cloning holdout scenarios from $HOLDOUT_REPO..." @@ -173,7 +173,8 @@ git clone --quiet "$HOLDOUT_REPO" "$HOLDOUT_DIR" \ || infra_error "Failed to clone holdout scenarios repo" # Export the scenarios directory for holdout.config.ts -export HOLDOUT_SCENARIOS_DIR=".holdout-scenarios/scenarios" +# Must be absolute path — Playwright resolves testDir relative to config file's directory. +export HOLDOUT_SCENARIOS_DIR="$HOLDOUT_DIR/scenarios" # ── Boot the stack ───────────────────────────────────────────────────── cd "$WORKTREE_DIR" diff --git a/scripts/harb-evaluator/holdout.config.ts b/scripts/harb-evaluator/holdout.config.ts index 1409390..562fef1 100644 --- a/scripts/harb-evaluator/holdout.config.ts +++ b/scripts/harb-evaluator/holdout.config.ts @@ -14,10 +14,16 @@ import { defineConfig, devices } from '@playwright/test'; * STACK_RPC_URL – Anvil JSON-RPC endpoint * STACK_WEBAPP_URL – Vite dev server URL * STACK_GRAPHQL_URL – Ponder GraphQL endpoint - * HOLDOUT_SCENARIOS_DIR – Path to cloned scenarios (default: scripts/harb-evaluator/scenarios) + * HOLDOUT_SCENARIOS_DIR – Path to cloned scenarios */ + +const scenariosDir = process.env.HOLDOUT_SCENARIOS_DIR; +if (!scenariosDir) { + throw new Error('HOLDOUT_SCENARIOS_DIR env var required — run via evaluate.sh'); +} + export default defineConfig({ - testDir: process.env.HOLDOUT_SCENARIOS_DIR ?? './scenarios', + testDir: scenariosDir, fullyParallel: false, // evaluate.sh sets CI=true before invoking playwright, so forbidOnly is always // active in the evaluator context. Accidental test.only() in any scenario file