2026-03-01 09:04:33 +00:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
# evaluate.sh — Spin up a fresh containerised harb stack from a PR branch,
|
|
|
|
|
# run holdout scenario scripts against it, then tear it down.
|
|
|
|
|
#
|
|
|
|
|
# Usage: evaluate.sh <pr-number>
|
|
|
|
|
#
|
|
|
|
|
# Exit codes:
|
|
|
|
|
# 0 gate passed (all scenarios succeeded, or no scenarios found)
|
|
|
|
|
# 1 gate failed (one or more scenario scripts returned non-zero)
|
|
|
|
|
# 2 infra error (stack failed to start, prerequisite missing, etc.)
|
|
|
|
|
#
|
|
|
|
|
# Environment overrides:
|
|
|
|
|
# HARB_REPO_REMOTE git remote to fetch from (default: origin)
|
|
|
|
|
# CODEBERG_REPO Gitea/Codeberg repo path (default: johba/harb)
|
2026-03-01 10:12:34 +00:00
|
|
|
#
|
|
|
|
|
# NOTE: host port isolation — docker-compose.yml binds fixed host ports
|
|
|
|
|
# (8545, 42069, 5173, 8081, 5100). Concurrent evaluation runs on the same
|
|
|
|
|
# host will collide on those ports. This script is designed for sequential use.
|
2026-03-01 09:04:33 +00:00
|
|
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
|
|
|
# ── Constants ──────────────────────────────────────────────────────────
|
|
|
|
|
readonly REPO_REMOTE="${HARB_REPO_REMOTE:-origin}"
|
|
|
|
|
readonly CODEBERG_REPO="${CODEBERG_REPO:-johba/harb}"
|
|
|
|
|
readonly REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
|
|
|
|
readonly EVALUATOR_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
|
|
|
readonly SCENARIOS_DIR="$EVALUATOR_DIR/scenarios"
|
|
|
|
|
|
|
|
|
|
readonly ANVIL_TIMEOUT=120 # seconds to wait for anvil healthy
|
|
|
|
|
readonly BOOTSTRAP_TIMEOUT=180 # seconds to wait for bootstrap container exit
|
|
|
|
|
readonly PONDER_TIMEOUT=300 # seconds to wait for ponder /health
|
|
|
|
|
readonly PONDER_READY_TIMEOUT=360 # seconds to wait for ponder /ready (fully indexed)
|
|
|
|
|
readonly POLL_INTERVAL=5
|
|
|
|
|
|
|
|
|
|
# ── Logging helpers ────────────────────────────────────────────────────
|
|
|
|
|
log() { echo "[eval] $*"; }
|
|
|
|
|
infra_error() { echo "[eval] INFRA ERROR: $*" >&2; exit 2; }
|
|
|
|
|
gate_fail() { echo "[eval] GATE FAILED: $*" >&2; exit 1; }
|
|
|
|
|
|
|
|
|
|
usage() {
|
|
|
|
|
cat >&2 <<EOF
|
|
|
|
|
Usage: $0 <pr-number>
|
|
|
|
|
|
|
|
|
|
Checks out the PR branch into an isolated git worktree, boots a fresh
|
|
|
|
|
docker compose stack, runs scenario scripts, then tears down.
|
|
|
|
|
|
|
|
|
|
Exit codes:
|
|
|
|
|
0 gate passed
|
|
|
|
|
1 gate failed
|
|
|
|
|
2 infra error
|
|
|
|
|
EOF
|
|
|
|
|
exit 2
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# ── Argument parsing ───────────────────────────────────────────────────
|
|
|
|
|
[[ $# -lt 1 ]] && usage
|
|
|
|
|
PR_NUMBER="$1"
|
|
|
|
|
[[ "$PR_NUMBER" =~ ^[0-9]+$ ]] || infra_error "Invalid PR number: '$PR_NUMBER'"
|
|
|
|
|
|
|
|
|
|
# ── Prerequisites ──────────────────────────────────────────────────────
|
|
|
|
|
if docker compose version &>/dev/null 2>&1; then
|
|
|
|
|
COMPOSE_CMD="docker compose"
|
2026-03-01 10:12:34 +00:00
|
|
|
# Compose v2 uses hyphens in container names: PROJECT-SERVICE-1
|
|
|
|
|
_COMPOSE_SEP="-"
|
2026-03-01 09:04:33 +00:00
|
|
|
elif command -v docker-compose &>/dev/null; then
|
|
|
|
|
COMPOSE_CMD="docker-compose"
|
2026-03-01 10:12:34 +00:00
|
|
|
# Compose v1 uses underscores in container names: PROJECT_SERVICE_1
|
|
|
|
|
_COMPOSE_SEP="_"
|
2026-03-01 09:04:33 +00:00
|
|
|
else
|
|
|
|
|
infra_error "docker compose not found. Install Docker with the compose plugin."
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
command -v git &>/dev/null || infra_error "git not found"
|
|
|
|
|
command -v curl &>/dev/null || infra_error "curl not found"
|
|
|
|
|
|
|
|
|
|
# ── Fetch PR branch name ───────────────────────────────────────────────
|
|
|
|
|
# Try the Codeberg REST API first (requires ~/.netrc with credentials).
|
|
|
|
|
PR_BRANCH=""
|
2026-03-01 10:12:34 +00:00
|
|
|
_FETCHED_ALL=false
|
2026-03-01 09:04:33 +00:00
|
|
|
if [[ -f "$HOME/.netrc" ]]; then
|
|
|
|
|
log "Resolving PR #$PR_NUMBER branch via Codeberg API..."
|
|
|
|
|
api_json="$(curl --netrc --silent --max-time 10 \
|
|
|
|
|
"https://codeberg.org/api/v1/repos/$CODEBERG_REPO/pulls/$PR_NUMBER" 2>/dev/null)" || true
|
|
|
|
|
|
|
|
|
|
if [[ -n "$api_json" ]]; then
|
|
|
|
|
if command -v jq &>/dev/null; then
|
|
|
|
|
PR_BRANCH="$(echo "$api_json" | jq -r '.head.ref // empty' 2>/dev/null)" || true
|
|
|
|
|
else
|
2026-03-01 10:12:34 +00:00
|
|
|
# jq not available — use python3 for reliable nested key extraction.
|
|
|
|
|
# grep+sed is intentionally omitted: the Gitea response has multiple "ref"
|
|
|
|
|
# keys in nested objects and grep cannot safely target only head.ref.
|
2026-03-01 09:04:33 +00:00
|
|
|
PR_BRANCH="$(echo "$api_json" | \
|
2026-03-01 10:12:34 +00:00
|
|
|
python3 -c "import json,sys; print(json.load(sys.stdin)['head']['ref'])" 2>/dev/null)" || true
|
2026-03-01 09:04:33 +00:00
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Fall back: fetch all remote refs and match common harb branch patterns.
|
|
|
|
|
if [[ -z "$PR_BRANCH" ]]; then
|
|
|
|
|
log "API lookup skipped or failed; scanning remote branches..."
|
|
|
|
|
cd "$REPO_ROOT"
|
|
|
|
|
git fetch "$REPO_REMOTE" --prune 2>/dev/null || infra_error "git fetch $REPO_REMOTE failed"
|
2026-03-01 10:12:34 +00:00
|
|
|
_FETCHED_ALL=true
|
2026-03-01 09:04:33 +00:00
|
|
|
PR_BRANCH="$(git branch -r 2>/dev/null | \
|
|
|
|
|
grep -E "(fix|feat|chore|refactor|hotfix)/.*[-/]${PR_NUMBER}[^0-9]?$|issue-${PR_NUMBER}$" | \
|
|
|
|
|
head -n1 | sed 's|.*/||' | tr -d ' ')" || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
[[ -n "$PR_BRANCH" ]] || infra_error "Could not determine branch for PR #$PR_NUMBER"
|
|
|
|
|
log "PR #$PR_NUMBER => branch: $PR_BRANCH"
|
|
|
|
|
|
|
|
|
|
# ── Create isolated worktree ───────────────────────────────────────────
|
2026-03-01 10:12:34 +00:00
|
|
|
# Use mktemp -u to generate a unique path without creating the directory;
|
|
|
|
|
# git worktree add creates it. This avoids failures on older git that
|
|
|
|
|
# rejects a pre-existing (even empty) target directory.
|
|
|
|
|
WORKTREE_DIR="$(mktemp -u /tmp/harb-eval-${PR_NUMBER}-XXXXXX)"
|
2026-03-01 09:04:33 +00:00
|
|
|
# Use a project name that is unique per PR and safe for Docker labels.
|
|
|
|
|
COMPOSE_PROJECT="harb-eval-${PR_NUMBER}"
|
|
|
|
|
|
|
|
|
|
cleanup() {
|
|
|
|
|
local rc=$?
|
|
|
|
|
log "--- cleanup (exit $rc) ---"
|
|
|
|
|
if [[ -d "$WORKTREE_DIR" ]]; then
|
|
|
|
|
log "Tearing down stack (project: $COMPOSE_PROJECT)..."
|
|
|
|
|
(cd "$WORKTREE_DIR" && $COMPOSE_CMD -p "$COMPOSE_PROJECT" down -v --remove-orphans 2>/dev/null) || true
|
|
|
|
|
fi
|
|
|
|
|
log "Removing worktree $WORKTREE_DIR..."
|
|
|
|
|
cd "$REPO_ROOT"
|
|
|
|
|
git worktree remove --force "$WORKTREE_DIR" 2>/dev/null || rm -rf "$WORKTREE_DIR" || true
|
|
|
|
|
exit $rc
|
|
|
|
|
}
|
|
|
|
|
trap cleanup EXIT INT TERM
|
|
|
|
|
|
2026-03-01 10:12:34 +00:00
|
|
|
# Fetch the specific branch only if we haven't already fetched everything above.
|
2026-03-01 09:04:33 +00:00
|
|
|
cd "$REPO_ROOT"
|
2026-03-01 10:12:34 +00:00
|
|
|
if [[ "$_FETCHED_ALL" != "true" ]]; then
|
|
|
|
|
git fetch "$REPO_REMOTE" "$PR_BRANCH" 2>/dev/null || \
|
|
|
|
|
infra_error "Could not fetch branch '$PR_BRANCH' from $REPO_REMOTE"
|
|
|
|
|
fi
|
2026-03-01 09:04:33 +00:00
|
|
|
|
|
|
|
|
log "Creating worktree at $WORKTREE_DIR (branch: $PR_BRANCH)..."
|
|
|
|
|
git worktree add "$WORKTREE_DIR" "remotes/$REPO_REMOTE/$PR_BRANCH" \
|
|
|
|
|
|| infra_error "git worktree add failed for branch $PR_BRANCH"
|
|
|
|
|
|
|
|
|
|
# ── Build kraiken-lib in the worktree ─────────────────────────────────
|
|
|
|
|
log "Building kraiken-lib..."
|
|
|
|
|
(cd "$WORKTREE_DIR" && ./scripts/build-kraiken-lib.sh) \
|
|
|
|
|
|| infra_error "kraiken-lib build failed"
|
|
|
|
|
|
2026-03-01 11:24:15 +00:00
|
|
|
# ── Install root npm dependencies (needed for npx playwright test) ─────
|
2026-03-01 12:04:35 +00:00
|
|
|
# --ignore-scripts: prevents husky from touching the permanent repo's .git/hooks
|
|
|
|
|
# from inside this ephemeral worktree.
|
|
|
|
|
# --quiet: suppresses normal npm output while still printing errors.
|
2026-03-01 11:24:15 +00:00
|
|
|
log "Installing root npm dependencies..."
|
2026-03-01 12:04:35 +00:00
|
|
|
(cd "$WORKTREE_DIR" && npm install --no-audit --no-fund --ignore-scripts --quiet) \
|
2026-03-01 11:24:15 +00:00
|
|
|
|| infra_error "npm install failed"
|
|
|
|
|
|
2026-03-01 12:04:35 +00:00
|
|
|
# ── Install Playwright browser binaries ────────────────────────────────
|
|
|
|
|
# Browser binaries are version-pinned per Playwright revision. If the
|
|
|
|
|
# revision resolved by ^1.55.1 is not already cached on this host,
|
|
|
|
|
# playwright test aborts immediately with a cryptic "Executable doesn't exist" error.
|
|
|
|
|
log "Installing Playwright browser binaries..."
|
|
|
|
|
(cd "$WORKTREE_DIR" && npx playwright install chromium) \
|
|
|
|
|
|| infra_error "playwright install chromium failed"
|
|
|
|
|
|
2026-03-01 09:04:33 +00:00
|
|
|
# ── Boot the stack ─────────────────────────────────────────────────────
|
|
|
|
|
cd "$WORKTREE_DIR"
|
|
|
|
|
log "Starting containerised stack (project: $COMPOSE_PROJECT)..."
|
|
|
|
|
$COMPOSE_CMD -p "$COMPOSE_PROJECT" up -d \
|
|
|
|
|
|| infra_error "docker compose up failed"
|
|
|
|
|
|
2026-03-01 10:12:34 +00:00
|
|
|
# Helper: resolve the container name for a service in this project.
|
|
|
|
|
# Compose v2 uses hyphens (PROJECT-SERVICE-1); v1 uses underscores (PROJECT_SERVICE_1).
|
|
|
|
|
container_name() { echo "${COMPOSE_PROJECT}${_COMPOSE_SEP}$1${_COMPOSE_SEP}1"; }
|
2026-03-01 09:04:33 +00:00
|
|
|
|
|
|
|
|
wait_healthy() {
|
|
|
|
|
local service="$1" timeout="$2"
|
|
|
|
|
local container
|
|
|
|
|
container="$(container_name "$service")"
|
|
|
|
|
log "Waiting for $service to be healthy (${timeout}s)..."
|
|
|
|
|
local deadline=$((SECONDS + timeout))
|
|
|
|
|
while (( SECONDS < deadline )); do
|
|
|
|
|
local status
|
|
|
|
|
status="$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "missing")"
|
|
|
|
|
if [[ "$status" == "healthy" ]]; then
|
|
|
|
|
log " $service healthy"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
sleep "$POLL_INTERVAL"
|
|
|
|
|
done
|
|
|
|
|
docker logs "$container" 2>&1 | tail -20 || true
|
|
|
|
|
infra_error "$service did not become healthy within ${timeout}s"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
wait_exited() {
|
|
|
|
|
local service="$1" timeout="$2"
|
|
|
|
|
local container
|
|
|
|
|
container="$(container_name "$service")"
|
|
|
|
|
log "Waiting for $service container to complete (${timeout}s)..."
|
|
|
|
|
local deadline=$((SECONDS + timeout))
|
|
|
|
|
while (( SECONDS < deadline )); do
|
|
|
|
|
local status
|
|
|
|
|
status="$(docker inspect --format='{{.State.Status}}' "$container" 2>/dev/null || echo "missing")"
|
2026-03-01 10:12:34 +00:00
|
|
|
if [[ "$status" == "exited" || "$status" == "dead" ]]; then
|
2026-03-01 09:04:33 +00:00
|
|
|
local exit_code
|
|
|
|
|
exit_code="$(docker inspect --format='{{.State.ExitCode}}' "$container" 2>/dev/null || echo "1")"
|
|
|
|
|
if [[ "$exit_code" != "0" ]]; then
|
|
|
|
|
docker logs "$container" 2>&1 | tail -30 || true
|
|
|
|
|
infra_error "$service container exited with code $exit_code"
|
|
|
|
|
fi
|
|
|
|
|
log " $service completed successfully"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
sleep "$POLL_INTERVAL"
|
|
|
|
|
done
|
|
|
|
|
docker logs "$container" 2>&1 | tail -20 || true
|
|
|
|
|
infra_error "$service did not complete within ${timeout}s"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Phase 1: base services
|
|
|
|
|
wait_healthy anvil "$ANVIL_TIMEOUT"
|
|
|
|
|
|
|
|
|
|
# Phase 2: bootstrap (deploys contracts, writes contracts.env)
|
|
|
|
|
wait_exited bootstrap "$BOOTSTRAP_TIMEOUT"
|
|
|
|
|
|
|
|
|
|
# ── Extract contract addresses ─────────────────────────────────────────
|
|
|
|
|
CONTRACTS_ENV="$WORKTREE_DIR/tmp/containers/contracts.env"
|
|
|
|
|
[[ -f "$CONTRACTS_ENV" ]] \
|
|
|
|
|
|| infra_error "contracts.env not found at $CONTRACTS_ENV"
|
|
|
|
|
|
|
|
|
|
log "Reading contract addresses from contracts.env..."
|
|
|
|
|
# shellcheck source=/dev/null
|
|
|
|
|
source "$CONTRACTS_ENV"
|
2026-03-01 10:12:34 +00:00
|
|
|
|
|
|
|
|
# Validate expected variables after sourcing — guards against set -u crashes
|
|
|
|
|
# if a future bootstrap refactor renames any of these.
|
|
|
|
|
KRAIKEN="${KRAIKEN:-}"
|
|
|
|
|
STAKE="${STAKE:-}"
|
|
|
|
|
LIQUIDITY_MANAGER="${LIQUIDITY_MANAGER:-}"
|
|
|
|
|
[[ -n "$KRAIKEN" ]] || infra_error "KRAIKEN not set in contracts.env"
|
|
|
|
|
[[ -n "$STAKE" ]] || infra_error "STAKE not set in contracts.env"
|
|
|
|
|
[[ -n "$LIQUIDITY_MANAGER" ]] || infra_error "LIQUIDITY_MANAGER not set in contracts.env"
|
|
|
|
|
|
2026-03-01 09:04:33 +00:00
|
|
|
log " KRAIKEN=$KRAIKEN"
|
|
|
|
|
log " STAKE=$STAKE"
|
|
|
|
|
log " LIQUIDITY_MANAGER=$LIQUIDITY_MANAGER"
|
|
|
|
|
|
|
|
|
|
# Phase 3: ponder must be healthy and fully indexed before running scenarios
|
|
|
|
|
wait_healthy ponder "$PONDER_TIMEOUT"
|
|
|
|
|
|
|
|
|
|
log "Waiting for Ponder to finish historical indexing (${PONDER_READY_TIMEOUT}s)..."
|
|
|
|
|
ponder_ready=false
|
|
|
|
|
ponder_deadline=$((SECONDS + PONDER_READY_TIMEOUT))
|
|
|
|
|
while (( SECONDS < ponder_deadline )); do
|
|
|
|
|
http_code="$(curl -sf -o /dev/null -w '%{http_code}' --max-time 3 \
|
|
|
|
|
http://127.0.0.1:42069/ready 2>/dev/null || echo "000")"
|
|
|
|
|
if [[ "$http_code" == "200" ]]; then
|
|
|
|
|
log " Ponder fully indexed"
|
|
|
|
|
ponder_ready=true
|
|
|
|
|
break
|
|
|
|
|
fi
|
|
|
|
|
sleep "$POLL_INTERVAL"
|
|
|
|
|
done
|
|
|
|
|
if [[ "$ponder_ready" != "true" ]]; then
|
2026-03-01 10:12:34 +00:00
|
|
|
infra_error "Ponder did not finish indexing within ${PONDER_READY_TIMEOUT}s"
|
2026-03-01 09:04:33 +00:00
|
|
|
fi
|
|
|
|
|
|
2026-03-01 11:24:15 +00:00
|
|
|
# ── Export stack endpoints ─────────────────────────────────────────────
|
2026-03-01 09:04:33 +00:00
|
|
|
export EVAL_PR_NUMBER="$PR_NUMBER"
|
|
|
|
|
export EVAL_BRANCH="$PR_BRANCH"
|
|
|
|
|
export EVAL_WORKTREE="$WORKTREE_DIR"
|
|
|
|
|
export EVAL_RPC_URL="http://127.0.0.1:8545"
|
|
|
|
|
export EVAL_GRAPHQL_URL="http://127.0.0.1:42069/graphql"
|
|
|
|
|
export EVAL_WEBAPP_URL="http://127.0.0.1:5173"
|
|
|
|
|
export EVAL_KRAIKEN="$KRAIKEN"
|
|
|
|
|
export EVAL_STAKE="$STAKE"
|
|
|
|
|
export EVAL_LIQUIDITY_MANAGER="$LIQUIDITY_MANAGER"
|
|
|
|
|
|
2026-03-01 11:24:15 +00:00
|
|
|
# Alias as STACK_* so getStackConfig() in tests/setup/stack.ts resolves correctly
|
|
|
|
|
export STACK_RPC_URL="$EVAL_RPC_URL"
|
|
|
|
|
export STACK_GRAPHQL_URL="$EVAL_GRAPHQL_URL"
|
|
|
|
|
export STACK_WEBAPP_URL="$EVAL_WEBAPP_URL"
|
|
|
|
|
|
2026-03-01 09:04:33 +00:00
|
|
|
log "Stack ready. Endpoints:"
|
|
|
|
|
log " RPC: $EVAL_RPC_URL"
|
|
|
|
|
log " GraphQL: $EVAL_GRAPHQL_URL"
|
|
|
|
|
log " WebApp: $EVAL_WEBAPP_URL"
|
|
|
|
|
|
2026-03-01 11:24:15 +00:00
|
|
|
# ── Run holdout Playwright scenarios ──────────────────────────────────
|
2026-03-01 12:04:35 +00:00
|
|
|
# CI=true triggers forbidOnly in holdout.config.ts so accidental test.only()
|
|
|
|
|
# in any scenario file causes an immediate failure rather than a silent partial run.
|
2026-03-01 11:24:15 +00:00
|
|
|
log "Running holdout scenarios via Playwright..."
|
|
|
|
|
cd "$WORKTREE_DIR"
|
2026-03-01 12:04:35 +00:00
|
|
|
if CI=true npx playwright test --config scripts/harb-evaluator/holdout.config.ts; then
|
2026-03-01 11:24:15 +00:00
|
|
|
log "Gate PASSED"
|
2026-03-01 09:04:33 +00:00
|
|
|
exit 0
|
2026-03-01 11:24:15 +00:00
|
|
|
else
|
|
|
|
|
gate_fail "One or more holdout scenarios failed"
|
2026-03-01 09:04:33 +00:00
|
|
|
fi
|