#!/usr/bin/env bash # evaluate.sh — Spin up a fresh containerised harb stack from a PR branch, # run holdout scenario scripts against it, then tear it down. # # Usage: evaluate.sh # # Exit codes: # 0 gate passed (all scenarios succeeded, or no scenarios found) # 1 gate failed (one or more scenario scripts returned non-zero) # 2 infra error (stack failed to start, prerequisite missing, etc.) # # Environment overrides: # HARB_REPO_REMOTE git remote to fetch from (default: origin) # CODEBERG_REPO Gitea/Codeberg repo path (default: johba/harb) # # NOTE: host port isolation — docker-compose.yml binds fixed host ports # (8545, 42069, 5173, 8081, 5100). Concurrent evaluation runs on the same # host will collide on those ports. This script is designed for sequential use. set -euo pipefail # ── Constants ────────────────────────────────────────────────────────── readonly REPO_REMOTE="${HARB_REPO_REMOTE:-origin}" readonly CODEBERG_REPO="${CODEBERG_REPO:-johba/harb}" readonly REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" readonly EVALUATOR_DIR="$(cd "$(dirname "$0")" && pwd)" readonly ANVIL_TIMEOUT=120 # seconds to wait for anvil healthy readonly BOOTSTRAP_TIMEOUT=180 # seconds to wait for bootstrap container exit readonly PONDER_TIMEOUT=300 # seconds to wait for ponder /health readonly PONDER_READY_TIMEOUT=360 # seconds to wait for ponder /ready (fully indexed) readonly POLL_INTERVAL=5 # ── Logging helpers ──────────────────────────────────────────────────── log() { echo "[eval] $*"; } infra_error() { echo "[eval] INFRA ERROR: $*" >&2; exit 2; } gate_fail() { echo "[eval] GATE FAILED: $*" >&2; exit 1; } usage() { cat >&2 < Checks out the PR branch into an isolated git worktree, boots a fresh docker compose stack, runs scenario scripts, then tears down. Exit codes: 0 gate passed 1 gate failed 2 infra error EOF exit 2 } # ── Argument parsing ─────────────────────────────────────────────────── [[ $# -lt 1 ]] && usage PR_NUMBER="$1" [[ "$PR_NUMBER" =~ ^[0-9]+$ ]] || infra_error "Invalid PR number: '$PR_NUMBER'" # ── Prerequisites ────────────────────────────────────────────────────── if docker compose version &>/dev/null 2>&1; then COMPOSE_CMD="docker compose" # Compose v2 uses hyphens in container names: PROJECT-SERVICE-1 _COMPOSE_SEP="-" elif command -v docker-compose &>/dev/null; then COMPOSE_CMD="docker-compose" # Compose v1 uses underscores in container names: PROJECT_SERVICE_1 _COMPOSE_SEP="_" else infra_error "docker compose not found. Install Docker with the compose plugin." fi command -v git &>/dev/null || infra_error "git not found" command -v curl &>/dev/null || infra_error "curl not found" # ── Fetch PR branch name ─────────────────────────────────────────────── # Try the Codeberg REST API first (requires ~/.netrc with credentials). PR_BRANCH="" _FETCHED_ALL=false if [[ -f "$HOME/.netrc" ]]; then log "Resolving PR #$PR_NUMBER branch via Codeberg API..." api_json="$(curl --netrc --silent --max-time 10 \ "https://codeberg.org/api/v1/repos/$CODEBERG_REPO/pulls/$PR_NUMBER" 2>/dev/null)" || true if [[ -n "$api_json" ]]; then if command -v jq &>/dev/null; then PR_BRANCH="$(echo "$api_json" | jq -r '.head.ref // empty' 2>/dev/null)" || true else # jq not available — use python3 for reliable nested key extraction. # grep+sed is intentionally omitted: the Gitea response has multiple "ref" # keys in nested objects and grep cannot safely target only head.ref. PR_BRANCH="$(echo "$api_json" | \ python3 -c "import json,sys; print(json.load(sys.stdin)['head']['ref'])" 2>/dev/null)" || true fi fi fi # Fall back: fetch all remote refs and match common harb branch patterns. if [[ -z "$PR_BRANCH" ]]; then log "API lookup skipped or failed; scanning remote branches..." cd "$REPO_ROOT" git fetch "$REPO_REMOTE" --prune 2>/dev/null || infra_error "git fetch $REPO_REMOTE failed" _FETCHED_ALL=true PR_BRANCH="$(git branch -r 2>/dev/null | \ grep -E "(fix|feat|chore|refactor|hotfix)/.*[-/]${PR_NUMBER}[^0-9]?$|issue-${PR_NUMBER}$" | \ head -n1 | sed 's|.*/||' | tr -d ' ')" || true fi [[ -n "$PR_BRANCH" ]] || infra_error "Could not determine branch for PR #$PR_NUMBER" log "PR #$PR_NUMBER => branch: $PR_BRANCH" # ── Create isolated worktree ─────────────────────────────────────────── # Use mktemp -u to generate a unique path without creating the directory; # git worktree add creates it. This avoids failures on older git that # rejects a pre-existing (even empty) target directory. WORKTREE_DIR="$(mktemp -u /tmp/harb-eval-${PR_NUMBER}-XXXXXX)" # Use a project name that is unique per PR and safe for Docker labels. COMPOSE_PROJECT="harb-eval-${PR_NUMBER}" cleanup() { local rc=$? log "--- cleanup (exit $rc) ---" if [[ -d "$WORKTREE_DIR" ]]; then log "Tearing down stack (project: $COMPOSE_PROJECT)..." (cd "$WORKTREE_DIR" && $COMPOSE_CMD -p "$COMPOSE_PROJECT" down -v --remove-orphans 2>/dev/null) || true fi log "Removing worktree $WORKTREE_DIR..." cd "$REPO_ROOT" git worktree remove --force "$WORKTREE_DIR" 2>/dev/null || rm -rf "$WORKTREE_DIR" || true exit $rc } trap cleanup EXIT INT TERM # Fetch the specific branch only if we haven't already fetched everything above. cd "$REPO_ROOT" if [[ "$_FETCHED_ALL" != "true" ]]; then git fetch "$REPO_REMOTE" "$PR_BRANCH" 2>/dev/null || \ infra_error "Could not fetch branch '$PR_BRANCH' from $REPO_REMOTE" fi log "Creating worktree at $WORKTREE_DIR (branch: $PR_BRANCH)..." git worktree add "$WORKTREE_DIR" "remotes/$REPO_REMOTE/$PR_BRANCH" \ || infra_error "git worktree add failed for branch $PR_BRANCH" # ── Build kraiken-lib in the worktree ───────────────────────────────── log "Building kraiken-lib..." (cd "$WORKTREE_DIR" && ./scripts/build-kraiken-lib.sh) \ || infra_error "kraiken-lib build failed" # ── Install root npm dependencies (needed for npx playwright test) ───── # --ignore-scripts: prevents husky from touching the permanent repo's .git/hooks # from inside this ephemeral worktree. # --quiet: suppresses normal npm output while still printing errors. log "Installing root npm dependencies..." (cd "$WORKTREE_DIR" && npm install --no-audit --no-fund --ignore-scripts --quiet) \ || infra_error "npm install failed" # ── Install Playwright browser binaries ──────────────────────────────── # Browser binaries are version-pinned per Playwright revision. If the # revision resolved by ^1.55.1 is not already cached on this host, # playwright test aborts immediately with a cryptic "Executable doesn't exist" error. log "Installing Playwright browser binaries..." (cd "$WORKTREE_DIR" && npx playwright install chromium) \ || infra_error "playwright install chromium failed" # ── Clone holdout scenarios ──────────────────────────────────────────── # The holdout scenarios live in a separate repo so the dev-agent cannot # see them. Clone into .holdout-scenarios/ inside the worktree. # NOTE: Requires SSH key with read access to johba/harb-holdout-scenarios on Codeberg. readonly HOLDOUT_REPO="ssh://git@codeberg.org/johba/harb-holdout-scenarios.git" readonly HOLDOUT_DIR="$WORKTREE_DIR/.holdout-scenarios" log "Cloning holdout scenarios from $HOLDOUT_REPO..." git clone --quiet "$HOLDOUT_REPO" "$HOLDOUT_DIR" \ || infra_error "Failed to clone holdout scenarios repo" # Export the scenarios directory for holdout.config.ts # Must be absolute path — Playwright resolves testDir relative to config file's directory. export HOLDOUT_SCENARIOS_DIR="$HOLDOUT_DIR/scenarios" # ── Boot the stack ───────────────────────────────────────────────────── cd "$WORKTREE_DIR" log "Starting containerised stack (project: $COMPOSE_PROJECT)..." $COMPOSE_CMD -p "$COMPOSE_PROJECT" up -d \ || infra_error "docker compose up failed" # Helper: resolve the container name for a service in this project. # Compose v2 uses hyphens (PROJECT-SERVICE-1); v1 uses underscores (PROJECT_SERVICE_1). container_name() { echo "${COMPOSE_PROJECT}${_COMPOSE_SEP}$1${_COMPOSE_SEP}1"; } wait_healthy() { local service="$1" timeout="$2" local container container="$(container_name "$service")" log "Waiting for $service to be healthy (${timeout}s)..." local deadline=$((SECONDS + timeout)) while (( SECONDS < deadline )); do local status status="$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "missing")" if [[ "$status" == "healthy" ]]; then log " $service healthy" return 0 fi sleep "$POLL_INTERVAL" done docker logs "$container" 2>&1 | tail -20 || true infra_error "$service did not become healthy within ${timeout}s" } wait_exited() { local service="$1" timeout="$2" local container container="$(container_name "$service")" log "Waiting for $service container to complete (${timeout}s)..." local deadline=$((SECONDS + timeout)) while (( SECONDS < deadline )); do local status status="$(docker inspect --format='{{.State.Status}}' "$container" 2>/dev/null || echo "missing")" if [[ "$status" == "exited" || "$status" == "dead" ]]; then local exit_code exit_code="$(docker inspect --format='{{.State.ExitCode}}' "$container" 2>/dev/null || echo "1")" if [[ "$exit_code" != "0" ]]; then docker logs "$container" 2>&1 | tail -30 || true infra_error "$service container exited with code $exit_code" fi log " $service completed successfully" return 0 fi sleep "$POLL_INTERVAL" done docker logs "$container" 2>&1 | tail -20 || true infra_error "$service did not complete within ${timeout}s" } # Phase 1: base services wait_healthy anvil "$ANVIL_TIMEOUT" # Phase 2: bootstrap (deploys contracts, writes contracts.env) wait_exited bootstrap "$BOOTSTRAP_TIMEOUT" # ── Extract contract addresses ───────────────────────────────────────── CONTRACTS_ENV="$WORKTREE_DIR/tmp/containers/contracts.env" [[ -f "$CONTRACTS_ENV" ]] \ || infra_error "contracts.env not found at $CONTRACTS_ENV" log "Reading contract addresses from contracts.env..." # shellcheck source=/dev/null source "$CONTRACTS_ENV" # Validate expected variables after sourcing — guards against set -u crashes # if a future bootstrap refactor renames any of these. KRAIKEN="${KRAIKEN:-}" STAKE="${STAKE:-}" LIQUIDITY_MANAGER="${LIQUIDITY_MANAGER:-}" [[ -n "$KRAIKEN" ]] || infra_error "KRAIKEN not set in contracts.env" [[ -n "$STAKE" ]] || infra_error "STAKE not set in contracts.env" [[ -n "$LIQUIDITY_MANAGER" ]] || infra_error "LIQUIDITY_MANAGER not set in contracts.env" log " KRAIKEN=$KRAIKEN" log " STAKE=$STAKE" log " LIQUIDITY_MANAGER=$LIQUIDITY_MANAGER" # Phase 3: ponder must be healthy and fully indexed before running scenarios wait_healthy ponder "$PONDER_TIMEOUT" log "Waiting for Ponder to finish historical indexing (${PONDER_READY_TIMEOUT}s)..." ponder_ready=false ponder_deadline=$((SECONDS + PONDER_READY_TIMEOUT)) while (( SECONDS < ponder_deadline )); do http_code="$(curl -sf -o /dev/null -w '%{http_code}' --max-time 3 \ http://127.0.0.1:42069/ready 2>/dev/null || echo "000")" if [[ "$http_code" == "200" ]]; then log " Ponder fully indexed" ponder_ready=true break fi sleep "$POLL_INTERVAL" done if [[ "$ponder_ready" != "true" ]]; then infra_error "Ponder did not finish indexing within ${PONDER_READY_TIMEOUT}s" fi # ── Export stack endpoints ───────────────────────────────────────────── export EVAL_PR_NUMBER="$PR_NUMBER" export EVAL_BRANCH="$PR_BRANCH" export EVAL_WORKTREE="$WORKTREE_DIR" export EVAL_RPC_URL="http://127.0.0.1:8545" export EVAL_GRAPHQL_URL="http://127.0.0.1:42069/graphql" export EVAL_WEBAPP_URL="http://127.0.0.1:5173" export EVAL_KRAIKEN="$KRAIKEN" export EVAL_STAKE="$STAKE" export EVAL_LIQUIDITY_MANAGER="$LIQUIDITY_MANAGER" # Alias as STACK_* so getStackConfig() in tests/setup/stack.ts resolves correctly export STACK_RPC_URL="$EVAL_RPC_URL" export STACK_GRAPHQL_URL="$EVAL_GRAPHQL_URL" export STACK_WEBAPP_URL="$EVAL_WEBAPP_URL" log "Stack ready. Endpoints:" log " RPC: $EVAL_RPC_URL" log " GraphQL: $EVAL_GRAPHQL_URL" log " WebApp: $EVAL_WEBAPP_URL" # ── Run holdout Playwright scenarios ────────────────────────────────── # CI=true triggers forbidOnly in holdout.config.ts so accidental test.only() # in any scenario file causes an immediate failure rather than a silent partial run. log "Running holdout scenarios via Playwright..." cd "$WORKTREE_DIR" if CI=true npx playwright test --config scripts/harb-evaluator/holdout.config.ts; then log "Gate PASSED" exit 0 else gate_fail "One or more holdout scenarios failed" fi