fix: address PR #438 review findings

- Fix HOLDOUT_SCENARIOS_DIR to use absolute path (resolves Playwright testDir issue)
- Remove dead SCENARIOS_DIR variable
- Replace fallback with explicit error in holdout.config.ts
- Add SSH key requirement comment
This commit is contained in:
openhands 2026-03-03 20:59:32 +00:00
parent 69f6a87e20
commit f6fe37dcc0
2 changed files with 11 additions and 4 deletions

View file

@ -24,7 +24,6 @@ readonly REPO_REMOTE="${HARB_REPO_REMOTE:-origin}"
readonly CODEBERG_REPO="${CODEBERG_REPO:-johba/harb}"
readonly REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
readonly EVALUATOR_DIR="$(cd "$(dirname "$0")" && pwd)"
readonly SCENARIOS_DIR="$EVALUATOR_DIR/scenarios"
readonly ANVIL_TIMEOUT=120 # seconds to wait for anvil healthy
readonly BOOTSTRAP_TIMEOUT=180 # seconds to wait for bootstrap container exit
@ -166,6 +165,7 @@ log "Installing Playwright browser binaries..."
# ── Clone holdout scenarios ────────────────────────────────────────────
# The holdout scenarios live in a separate repo so the dev-agent cannot
# see them. Clone into .holdout-scenarios/ inside the worktree.
# NOTE: Requires SSH key with read access to johba/harb-holdout-scenarios on Codeberg.
readonly HOLDOUT_REPO="ssh://git@codeberg.org/johba/harb-holdout-scenarios.git"
readonly HOLDOUT_DIR="$WORKTREE_DIR/.holdout-scenarios"
log "Cloning holdout scenarios from $HOLDOUT_REPO..."
@ -173,7 +173,8 @@ git clone --quiet "$HOLDOUT_REPO" "$HOLDOUT_DIR" \
|| infra_error "Failed to clone holdout scenarios repo"
# Export the scenarios directory for holdout.config.ts
export HOLDOUT_SCENARIOS_DIR=".holdout-scenarios/scenarios"
# Must be absolute path — Playwright resolves testDir relative to config file's directory.
export HOLDOUT_SCENARIOS_DIR="$HOLDOUT_DIR/scenarios"
# ── Boot the stack ─────────────────────────────────────────────────────
cd "$WORKTREE_DIR"

View file

@ -14,10 +14,16 @@ import { defineConfig, devices } from '@playwright/test';
* STACK_RPC_URL Anvil JSON-RPC endpoint
* STACK_WEBAPP_URL Vite dev server URL
* STACK_GRAPHQL_URL Ponder GraphQL endpoint
* HOLDOUT_SCENARIOS_DIR Path to cloned scenarios (default: scripts/harb-evaluator/scenarios)
* HOLDOUT_SCENARIOS_DIR Path to cloned scenarios
*/
const scenariosDir = process.env.HOLDOUT_SCENARIOS_DIR;
if (!scenariosDir) {
throw new Error('HOLDOUT_SCENARIOS_DIR env var required — run via evaluate.sh');
}
export default defineConfig({
testDir: process.env.HOLDOUT_SCENARIOS_DIR ?? './scenarios',
testDir: scenariosDir,
fullyParallel: false,
// evaluate.sh sets CI=true before invoking playwright, so forbidOnly is always
// active in the evaluator context. Accidental test.only() in any scenario file