diff --git a/scripts/harb-evaluator/evaluate.sh b/scripts/harb-evaluator/evaluate.sh index e691637..303c75e 100755 --- a/scripts/harb-evaluator/evaluate.sh +++ b/scripts/harb-evaluator/evaluate.sh @@ -24,7 +24,6 @@ readonly REPO_REMOTE="${HARB_REPO_REMOTE:-origin}" readonly CODEBERG_REPO="${CODEBERG_REPO:-johba/harb}" readonly REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" readonly EVALUATOR_DIR="$(cd "$(dirname "$0")" && pwd)" -readonly SCENARIOS_DIR="$EVALUATOR_DIR/scenarios" readonly ANVIL_TIMEOUT=120 # seconds to wait for anvil healthy readonly BOOTSTRAP_TIMEOUT=180 # seconds to wait for bootstrap container exit @@ -166,6 +165,7 @@ log "Installing Playwright browser binaries..." # ── Clone holdout scenarios ──────────────────────────────────────────── # The holdout scenarios live in a separate repo so the dev-agent cannot # see them. Clone into .holdout-scenarios/ inside the worktree. +# NOTE: Requires SSH key with read access to johba/harb-holdout-scenarios on Codeberg. readonly HOLDOUT_REPO="ssh://git@codeberg.org/johba/harb-holdout-scenarios.git" readonly HOLDOUT_DIR="$WORKTREE_DIR/.holdout-scenarios" log "Cloning holdout scenarios from $HOLDOUT_REPO..." @@ -173,7 +173,8 @@ git clone --quiet "$HOLDOUT_REPO" "$HOLDOUT_DIR" \ || infra_error "Failed to clone holdout scenarios repo" # Export the scenarios directory for holdout.config.ts -export HOLDOUT_SCENARIOS_DIR=".holdout-scenarios/scenarios" +# Must be absolute path — Playwright resolves testDir relative to config file's directory. +export HOLDOUT_SCENARIOS_DIR="$HOLDOUT_DIR/scenarios" # ── Boot the stack ───────────────────────────────────────────────────── cd "$WORKTREE_DIR" diff --git a/scripts/harb-evaluator/holdout.config.ts b/scripts/harb-evaluator/holdout.config.ts index 1409390..562fef1 100644 --- a/scripts/harb-evaluator/holdout.config.ts +++ b/scripts/harb-evaluator/holdout.config.ts @@ -14,10 +14,16 @@ import { defineConfig, devices } from '@playwright/test'; * STACK_RPC_URL – Anvil JSON-RPC endpoint * STACK_WEBAPP_URL – Vite dev server URL * STACK_GRAPHQL_URL – Ponder GraphQL endpoint - * HOLDOUT_SCENARIOS_DIR – Path to cloned scenarios (default: scripts/harb-evaluator/scenarios) + * HOLDOUT_SCENARIOS_DIR – Path to cloned scenarios */ + +const scenariosDir = process.env.HOLDOUT_SCENARIOS_DIR; +if (!scenariosDir) { + throw new Error('HOLDOUT_SCENARIOS_DIR env var required — run via evaluate.sh'); +} + export default defineConfig({ - testDir: process.env.HOLDOUT_SCENARIOS_DIR ?? './scenarios', + testDir: scenariosDir, fullyParallel: false, // evaluate.sh sets CI=true before invoking playwright, so forbidOnly is always // active in the evaluator context. Accidental test.only() in any scenario file