From 43c8d79afd166c83d32ffcd1742c0b0815667880 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 1 Mar 2026 09:04:33 +0000 Subject: [PATCH 1/2] fix: Holdout evaluator: fresh containerised stack per run (#380) Adds scripts/harb-evaluator/evaluate.sh which: - Accepts a PR number, resolves the branch via Codeberg API or git remote scan - Checks out that branch into an isolated git worktree - Boots a fresh docker compose stack with a unique COMPOSE_PROJECT name - Waits for anvil healthy, bootstrap complete, ponder healthy + indexed - Sources contract addresses from tmp/containers/contracts.env (never hardcoded) - Exports EVAL_* env vars and runs any *.sh scripts under scenarios/ - Always tears down the stack and removes the worktree on exit (pass or fail) - Returns 0 (gate passed), 1 (gate failed), or 2 (infra error) Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/evaluate.sh | 270 +++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100755 scripts/harb-evaluator/evaluate.sh diff --git a/scripts/harb-evaluator/evaluate.sh b/scripts/harb-evaluator/evaluate.sh new file mode 100755 index 0000000..456fda8 --- /dev/null +++ b/scripts/harb-evaluator/evaluate.sh @@ -0,0 +1,270 @@ +#!/usr/bin/env bash +# evaluate.sh — Spin up a fresh containerised harb stack from a PR branch, +# run holdout scenario scripts against it, then tear it down. +# +# Usage: evaluate.sh +# +# Exit codes: +# 0 gate passed (all scenarios succeeded, or no scenarios found) +# 1 gate failed (one or more scenario scripts returned non-zero) +# 2 infra error (stack failed to start, prerequisite missing, etc.) +# +# Environment overrides: +# HARB_REPO_REMOTE git remote to fetch from (default: origin) +# CODEBERG_REPO Gitea/Codeberg repo path (default: johba/harb) + +set -euo pipefail + +# ── Constants ────────────────────────────────────────────────────────── +readonly REPO_REMOTE="${HARB_REPO_REMOTE:-origin}" +readonly CODEBERG_REPO="${CODEBERG_REPO:-johba/harb}" +readonly REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" +readonly EVALUATOR_DIR="$(cd "$(dirname "$0")" && pwd)" +readonly SCENARIOS_DIR="$EVALUATOR_DIR/scenarios" + +readonly ANVIL_TIMEOUT=120 # seconds to wait for anvil healthy +readonly BOOTSTRAP_TIMEOUT=180 # seconds to wait for bootstrap container exit +readonly PONDER_TIMEOUT=300 # seconds to wait for ponder /health +readonly PONDER_READY_TIMEOUT=360 # seconds to wait for ponder /ready (fully indexed) +readonly POLL_INTERVAL=5 + +# ── Logging helpers ──────────────────────────────────────────────────── +log() { echo "[eval] $*"; } +infra_error() { echo "[eval] INFRA ERROR: $*" >&2; exit 2; } +gate_fail() { echo "[eval] GATE FAILED: $*" >&2; exit 1; } + +usage() { + cat >&2 < + +Checks out the PR branch into an isolated git worktree, boots a fresh +docker compose stack, runs scenario scripts, then tears down. + +Exit codes: + 0 gate passed + 1 gate failed + 2 infra error +EOF + exit 2 +} + +# ── Argument parsing ─────────────────────────────────────────────────── +[[ $# -lt 1 ]] && usage +PR_NUMBER="$1" +[[ "$PR_NUMBER" =~ ^[0-9]+$ ]] || infra_error "Invalid PR number: '$PR_NUMBER'" + +# ── Prerequisites ────────────────────────────────────────────────────── +if docker compose version &>/dev/null 2>&1; then + COMPOSE_CMD="docker compose" +elif command -v docker-compose &>/dev/null; then + COMPOSE_CMD="docker-compose" +else + infra_error "docker compose not found. Install Docker with the compose plugin." +fi + +command -v git &>/dev/null || infra_error "git not found" +command -v curl &>/dev/null || infra_error "curl not found" + +# ── Fetch PR branch name ─────────────────────────────────────────────── +# Try the Codeberg REST API first (requires ~/.netrc with credentials). +PR_BRANCH="" +if [[ -f "$HOME/.netrc" ]]; then + log "Resolving PR #$PR_NUMBER branch via Codeberg API..." + api_json="$(curl --netrc --silent --max-time 10 \ + "https://codeberg.org/api/v1/repos/$CODEBERG_REPO/pulls/$PR_NUMBER" 2>/dev/null)" || true + + if [[ -n "$api_json" ]]; then + if command -v jq &>/dev/null; then + PR_BRANCH="$(echo "$api_json" | jq -r '.head.ref // empty' 2>/dev/null)" || true + else + # jq not available — extract with python3 or grep+sed + PR_BRANCH="$(echo "$api_json" | \ + python3 -c "import json,sys; print(json.load(sys.stdin)['head']['ref'])" 2>/dev/null)" || \ + PR_BRANCH="$(echo "$api_json" | grep -o '"ref":"[^"]*"' | head -n1 | sed 's/"ref":"//;s/"//')" || true + fi + fi +fi + +# Fall back: fetch all remote refs and match common harb branch patterns. +if [[ -z "$PR_BRANCH" ]]; then + log "API lookup skipped or failed; scanning remote branches..." + cd "$REPO_ROOT" + git fetch "$REPO_REMOTE" --prune 2>/dev/null || infra_error "git fetch $REPO_REMOTE failed" + PR_BRANCH="$(git branch -r 2>/dev/null | \ + grep -E "(fix|feat|chore|refactor|hotfix)/.*[-/]${PR_NUMBER}[^0-9]?$|issue-${PR_NUMBER}$" | \ + head -n1 | sed 's|.*/||' | tr -d ' ')" || true +fi + +[[ -n "$PR_BRANCH" ]] || infra_error "Could not determine branch for PR #$PR_NUMBER" +log "PR #$PR_NUMBER => branch: $PR_BRANCH" + +# ── Create isolated worktree ─────────────────────────────────────────── +WORKTREE_DIR="$(mktemp -d /tmp/harb-eval-${PR_NUMBER}-XXXXXX)" +# Use a project name that is unique per PR and safe for Docker labels. +COMPOSE_PROJECT="harb-eval-${PR_NUMBER}" + +cleanup() { + local rc=$? + log "--- cleanup (exit $rc) ---" + if [[ -d "$WORKTREE_DIR" ]]; then + log "Tearing down stack (project: $COMPOSE_PROJECT)..." + (cd "$WORKTREE_DIR" && $COMPOSE_CMD -p "$COMPOSE_PROJECT" down -v --remove-orphans 2>/dev/null) || true + fi + log "Removing worktree $WORKTREE_DIR..." + cd "$REPO_ROOT" + git worktree remove --force "$WORKTREE_DIR" 2>/dev/null || rm -rf "$WORKTREE_DIR" || true + exit $rc +} +trap cleanup EXIT INT TERM + +# Ensure the branch is locally available before adding the worktree. +cd "$REPO_ROOT" +git fetch "$REPO_REMOTE" "$PR_BRANCH" 2>/dev/null || \ + infra_error "Could not fetch branch '$PR_BRANCH' from $REPO_REMOTE" + +log "Creating worktree at $WORKTREE_DIR (branch: $PR_BRANCH)..." +git worktree add "$WORKTREE_DIR" "remotes/$REPO_REMOTE/$PR_BRANCH" \ + || infra_error "git worktree add failed for branch $PR_BRANCH" + +# ── Build kraiken-lib in the worktree ───────────────────────────────── +log "Building kraiken-lib..." +(cd "$WORKTREE_DIR" && ./scripts/build-kraiken-lib.sh) \ + || infra_error "kraiken-lib build failed" + +# ── Boot the stack ───────────────────────────────────────────────────── +cd "$WORKTREE_DIR" +log "Starting containerised stack (project: $COMPOSE_PROJECT)..." +$COMPOSE_CMD -p "$COMPOSE_PROJECT" up -d \ + || infra_error "docker compose up failed" + +# Helper: get status/health of a named container in this project. +container_name() { echo "${COMPOSE_PROJECT}-$1-1"; } + +wait_healthy() { + local service="$1" timeout="$2" + local container + container="$(container_name "$service")" + log "Waiting for $service to be healthy (${timeout}s)..." + local deadline=$((SECONDS + timeout)) + while (( SECONDS < deadline )); do + local status + status="$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "missing")" + if [[ "$status" == "healthy" ]]; then + log " $service healthy" + return 0 + fi + sleep "$POLL_INTERVAL" + done + docker logs "$container" 2>&1 | tail -20 || true + infra_error "$service did not become healthy within ${timeout}s" +} + +wait_exited() { + local service="$1" timeout="$2" + local container + container="$(container_name "$service")" + log "Waiting for $service container to complete (${timeout}s)..." + local deadline=$((SECONDS + timeout)) + while (( SECONDS < deadline )); do + local status + status="$(docker inspect --format='{{.State.Status}}' "$container" 2>/dev/null || echo "missing")" + if [[ "$status" == "exited" ]]; then + local exit_code + exit_code="$(docker inspect --format='{{.State.ExitCode}}' "$container" 2>/dev/null || echo "1")" + if [[ "$exit_code" != "0" ]]; then + docker logs "$container" 2>&1 | tail -30 || true + infra_error "$service container exited with code $exit_code" + fi + log " $service completed successfully" + return 0 + fi + sleep "$POLL_INTERVAL" + done + docker logs "$container" 2>&1 | tail -20 || true + infra_error "$service did not complete within ${timeout}s" +} + +# Phase 1: base services +wait_healthy anvil "$ANVIL_TIMEOUT" + +# Phase 2: bootstrap (deploys contracts, writes contracts.env) +wait_exited bootstrap "$BOOTSTRAP_TIMEOUT" + +# ── Extract contract addresses ───────────────────────────────────────── +CONTRACTS_ENV="$WORKTREE_DIR/tmp/containers/contracts.env" +[[ -f "$CONTRACTS_ENV" ]] \ + || infra_error "contracts.env not found at $CONTRACTS_ENV" + +log "Reading contract addresses from contracts.env..." +# shellcheck source=/dev/null +source "$CONTRACTS_ENV" +log " KRAIKEN=$KRAIKEN" +log " STAKE=$STAKE" +log " LIQUIDITY_MANAGER=$LIQUIDITY_MANAGER" + +# Phase 3: ponder must be healthy and fully indexed before running scenarios +wait_healthy ponder "$PONDER_TIMEOUT" + +log "Waiting for Ponder to finish historical indexing (${PONDER_READY_TIMEOUT}s)..." +ponder_ready=false +ponder_deadline=$((SECONDS + PONDER_READY_TIMEOUT)) +while (( SECONDS < ponder_deadline )); do + http_code="$(curl -sf -o /dev/null -w '%{http_code}' --max-time 3 \ + http://127.0.0.1:42069/ready 2>/dev/null || echo "000")" + if [[ "$http_code" == "200" ]]; then + log " Ponder fully indexed" + ponder_ready=true + break + fi + sleep "$POLL_INTERVAL" +done +if [[ "$ponder_ready" != "true" ]]; then + log "WARNING: Ponder not fully indexed after ${PONDER_READY_TIMEOUT}s — continuing anyway" +fi + +# ── Export stack endpoints for scenario scripts ──────────────────────── +export EVAL_PR_NUMBER="$PR_NUMBER" +export EVAL_BRANCH="$PR_BRANCH" +export EVAL_WORKTREE="$WORKTREE_DIR" +export EVAL_RPC_URL="http://127.0.0.1:8545" +export EVAL_GRAPHQL_URL="http://127.0.0.1:42069/graphql" +export EVAL_WEBAPP_URL="http://127.0.0.1:5173" +export EVAL_KRAIKEN="$KRAIKEN" +export EVAL_STAKE="$STAKE" +export EVAL_LIQUIDITY_MANAGER="$LIQUIDITY_MANAGER" + +log "Stack ready. Endpoints:" +log " RPC: $EVAL_RPC_URL" +log " GraphQL: $EVAL_GRAPHQL_URL" +log " WebApp: $EVAL_WEBAPP_URL" + +# ── Run scenario scripts ─────────────────────────────────────────────── +shopt -s nullglob +scenario_scripts=("$SCENARIOS_DIR"/*.sh) +shopt -u nullglob + +if [[ ${#scenario_scripts[@]} -eq 0 ]]; then + log "No scenario scripts found in $SCENARIOS_DIR" + log "Gate PASSED (no scenarios)" + exit 0 +fi + +failures=0 +for scenario in "${scenario_scripts[@]}"; do + [[ -f "$scenario" ]] || continue + scenario_name="$(basename "$scenario")" + log "--- Running scenario: $scenario_name ---" + if bash "$scenario"; then + log " PASSED: $scenario_name" + else + log " FAILED: $scenario_name" + failures=$((failures + 1)) + fi +done + +if (( failures > 0 )); then + gate_fail "$failures of ${#scenario_scripts[@]} scenario(s) failed" +fi + +log "Gate PASSED (${#scenario_scripts[@]} scenario(s))" +exit 0 From 1d6dd8c628bdf867fd5d1e745b20cbbdacc53dd4 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 1 Mar 2026 10:12:34 +0000 Subject: [PATCH 2/2] fix: address review feedback on evaluate.sh (#380) - container_name(): derive separator from compose version (_COMPOSE_SEP), so v1 (underscores) and v2 (hyphens) both resolve the right container name - Drop buggy grep fallback for JSON branch extraction; python3 path is sufficient and safe; grep cannot reliably target only head.ref in the nested Gitea response - Validate KRAIKEN/STAKE/LIQUIDITY_MANAGER after sourcing contracts.env to catch renamed variables with a clear infra_error instead of a cryptic 'unbound variable' abort - wait_exited: handle Docker 'dead' state alongside 'exited' to fail fast instead of polling the full timeout - Ponder /ready timeout is now infra_error (was a logged warning); scenarios must not run against a partially-indexed chain - Avoid double git fetch: only fetch the specific branch if the earlier --prune fetch (fallback path) has not already retrieved all remote refs - Use mktemp -u so git worktree add creates the directory itself, avoiding failures on older git versions that reject a pre-existing target path Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/evaluate.sh | 49 +++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/scripts/harb-evaluator/evaluate.sh b/scripts/harb-evaluator/evaluate.sh index 456fda8..40e48f6 100755 --- a/scripts/harb-evaluator/evaluate.sh +++ b/scripts/harb-evaluator/evaluate.sh @@ -12,6 +12,10 @@ # Environment overrides: # HARB_REPO_REMOTE git remote to fetch from (default: origin) # CODEBERG_REPO Gitea/Codeberg repo path (default: johba/harb) +# +# NOTE: host port isolation — docker-compose.yml binds fixed host ports +# (8545, 42069, 5173, 8081, 5100). Concurrent evaluation runs on the same +# host will collide on those ports. This script is designed for sequential use. set -euo pipefail @@ -56,8 +60,12 @@ PR_NUMBER="$1" # ── Prerequisites ────────────────────────────────────────────────────── if docker compose version &>/dev/null 2>&1; then COMPOSE_CMD="docker compose" + # Compose v2 uses hyphens in container names: PROJECT-SERVICE-1 + _COMPOSE_SEP="-" elif command -v docker-compose &>/dev/null; then COMPOSE_CMD="docker-compose" + # Compose v1 uses underscores in container names: PROJECT_SERVICE_1 + _COMPOSE_SEP="_" else infra_error "docker compose not found. Install Docker with the compose plugin." fi @@ -68,6 +76,7 @@ command -v curl &>/dev/null || infra_error "curl not found" # ── Fetch PR branch name ─────────────────────────────────────────────── # Try the Codeberg REST API first (requires ~/.netrc with credentials). PR_BRANCH="" +_FETCHED_ALL=false if [[ -f "$HOME/.netrc" ]]; then log "Resolving PR #$PR_NUMBER branch via Codeberg API..." api_json="$(curl --netrc --silent --max-time 10 \ @@ -77,10 +86,11 @@ if [[ -f "$HOME/.netrc" ]]; then if command -v jq &>/dev/null; then PR_BRANCH="$(echo "$api_json" | jq -r '.head.ref // empty' 2>/dev/null)" || true else - # jq not available — extract with python3 or grep+sed + # jq not available — use python3 for reliable nested key extraction. + # grep+sed is intentionally omitted: the Gitea response has multiple "ref" + # keys in nested objects and grep cannot safely target only head.ref. PR_BRANCH="$(echo "$api_json" | \ - python3 -c "import json,sys; print(json.load(sys.stdin)['head']['ref'])" 2>/dev/null)" || \ - PR_BRANCH="$(echo "$api_json" | grep -o '"ref":"[^"]*"' | head -n1 | sed 's/"ref":"//;s/"//')" || true + python3 -c "import json,sys; print(json.load(sys.stdin)['head']['ref'])" 2>/dev/null)" || true fi fi fi @@ -90,6 +100,7 @@ if [[ -z "$PR_BRANCH" ]]; then log "API lookup skipped or failed; scanning remote branches..." cd "$REPO_ROOT" git fetch "$REPO_REMOTE" --prune 2>/dev/null || infra_error "git fetch $REPO_REMOTE failed" + _FETCHED_ALL=true PR_BRANCH="$(git branch -r 2>/dev/null | \ grep -E "(fix|feat|chore|refactor|hotfix)/.*[-/]${PR_NUMBER}[^0-9]?$|issue-${PR_NUMBER}$" | \ head -n1 | sed 's|.*/||' | tr -d ' ')" || true @@ -99,7 +110,10 @@ fi log "PR #$PR_NUMBER => branch: $PR_BRANCH" # ── Create isolated worktree ─────────────────────────────────────────── -WORKTREE_DIR="$(mktemp -d /tmp/harb-eval-${PR_NUMBER}-XXXXXX)" +# Use mktemp -u to generate a unique path without creating the directory; +# git worktree add creates it. This avoids failures on older git that +# rejects a pre-existing (even empty) target directory. +WORKTREE_DIR="$(mktemp -u /tmp/harb-eval-${PR_NUMBER}-XXXXXX)" # Use a project name that is unique per PR and safe for Docker labels. COMPOSE_PROJECT="harb-eval-${PR_NUMBER}" @@ -117,10 +131,12 @@ cleanup() { } trap cleanup EXIT INT TERM -# Ensure the branch is locally available before adding the worktree. +# Fetch the specific branch only if we haven't already fetched everything above. cd "$REPO_ROOT" -git fetch "$REPO_REMOTE" "$PR_BRANCH" 2>/dev/null || \ - infra_error "Could not fetch branch '$PR_BRANCH' from $REPO_REMOTE" +if [[ "$_FETCHED_ALL" != "true" ]]; then + git fetch "$REPO_REMOTE" "$PR_BRANCH" 2>/dev/null || \ + infra_error "Could not fetch branch '$PR_BRANCH' from $REPO_REMOTE" +fi log "Creating worktree at $WORKTREE_DIR (branch: $PR_BRANCH)..." git worktree add "$WORKTREE_DIR" "remotes/$REPO_REMOTE/$PR_BRANCH" \ @@ -137,8 +153,9 @@ log "Starting containerised stack (project: $COMPOSE_PROJECT)..." $COMPOSE_CMD -p "$COMPOSE_PROJECT" up -d \ || infra_error "docker compose up failed" -# Helper: get status/health of a named container in this project. -container_name() { echo "${COMPOSE_PROJECT}-$1-1"; } +# Helper: resolve the container name for a service in this project. +# Compose v2 uses hyphens (PROJECT-SERVICE-1); v1 uses underscores (PROJECT_SERVICE_1). +container_name() { echo "${COMPOSE_PROJECT}${_COMPOSE_SEP}$1${_COMPOSE_SEP}1"; } wait_healthy() { local service="$1" timeout="$2" @@ -168,7 +185,7 @@ wait_exited() { while (( SECONDS < deadline )); do local status status="$(docker inspect --format='{{.State.Status}}' "$container" 2>/dev/null || echo "missing")" - if [[ "$status" == "exited" ]]; then + if [[ "$status" == "exited" || "$status" == "dead" ]]; then local exit_code exit_code="$(docker inspect --format='{{.State.ExitCode}}' "$container" 2>/dev/null || echo "1")" if [[ "$exit_code" != "0" ]]; then @@ -198,6 +215,16 @@ CONTRACTS_ENV="$WORKTREE_DIR/tmp/containers/contracts.env" log "Reading contract addresses from contracts.env..." # shellcheck source=/dev/null source "$CONTRACTS_ENV" + +# Validate expected variables after sourcing — guards against set -u crashes +# if a future bootstrap refactor renames any of these. +KRAIKEN="${KRAIKEN:-}" +STAKE="${STAKE:-}" +LIQUIDITY_MANAGER="${LIQUIDITY_MANAGER:-}" +[[ -n "$KRAIKEN" ]] || infra_error "KRAIKEN not set in contracts.env" +[[ -n "$STAKE" ]] || infra_error "STAKE not set in contracts.env" +[[ -n "$LIQUIDITY_MANAGER" ]] || infra_error "LIQUIDITY_MANAGER not set in contracts.env" + log " KRAIKEN=$KRAIKEN" log " STAKE=$STAKE" log " LIQUIDITY_MANAGER=$LIQUIDITY_MANAGER" @@ -219,7 +246,7 @@ while (( SECONDS < ponder_deadline )); do sleep "$POLL_INTERVAL" done if [[ "$ponder_ready" != "true" ]]; then - log "WARNING: Ponder not fully indexed after ${PONDER_READY_TIMEOUT}s — continuing anyway" + infra_error "Ponder did not finish indexing within ${PONDER_READY_TIMEOUT}s" fi # ── Export stack endpoints for scenario scripts ────────────────────────