#!/usr/bin/env bash set -euo pipefail cd "$(dirname "$0")/.." # Timeout constants (in seconds) readonly ANVIL_TIMEOUT=30 # Anvil starts fast readonly POSTGRES_TIMEOUT=20 # Database init is quick readonly BOOTSTRAP_TIMEOUT=60 # Contract deployment + seeding readonly PONDER_TIMEOUT=90 # Must index bootstrap events readonly WEBAPP_TIMEOUT=90 # npm install + Vite startup readonly CADDY_TIMEOUT=10 # Proxy starts instantly readonly POLL_INTERVAL=2 # Check health every N seconds PID_FILE=/tmp/kraiken-watcher.pid PROJECT_NAME=${COMPOSE_PROJECT_NAME:-$(basename "$PWD")} cleanup_existing() { # Kill any existing watch scripts pkill -f "watch-kraiken-lib.sh" 2>/dev/null || true pkill -f "inotifywait.*$(pwd)/kraiken-lib" 2>/dev/null || true # Remove PID file rm -f "$PID_FILE" # Kill zombie podman processes pkill -9 -f "podman wait.*harb_" 2>/dev/null || true # Remove any existing containers (suppress errors if they don't exist) echo " Cleaning up existing containers..." podman ps -a --filter "name=harb_" --format "{{.Names}}" 2>/dev/null | \ xargs -r podman rm -f 2>&1 | grep -v "Error.*no container" || true } # Wait for container to be healthy (via healthcheck) wait_for_healthy() { local container=$1 local timeout_sec=$2 local max_attempts=$((timeout_sec / POLL_INTERVAL)) local start_time=$(date +%s) for i in $(seq 1 "$max_attempts"); do if podman healthcheck run "$container" &>/dev/null; then local elapsed=$(($(date +%s) - start_time)) echo " ✓ $container ready (${elapsed}s)" return 0 fi sleep "$POLL_INTERVAL" done echo "ERROR: $container failed to become healthy after ${timeout_sec}s" return 1 } # Wait for container to exit (used for bootstrap) wait_for_exited() { local container=$1 local timeout_sec=$2 local max_attempts=$((timeout_sec / POLL_INTERVAL)) local start_time=$(date +%s) for i in $(seq 1 "$max_attempts"); do local status status=$(podman inspect "$container" --format='{{.State.Status}}' 2>/dev/null || echo "unknown") if [[ "$status" == "exited" ]]; then local elapsed=$(($(date +%s) - start_time)) echo " ✓ $container completed (${elapsed}s)" return 0 fi sleep "$POLL_INTERVAL" done echo "ERROR: $container failed to complete after ${timeout_sec}s" return 1 } start_stack() { local stack_start_time=$(date +%s) # Clean up any existing processes first cleanup_existing # Show branch if set if [[ -n "${GIT_BRANCH:-}" ]]; then echo "Branch: $GIT_BRANCH" fi echo "Building kraiken-lib..." ./scripts/build-kraiken-lib.sh echo "Starting stack..." # Phase 1: Start base services (no dependencies) echo " Starting anvil & postgres..." podman-compose up -d anvil postgres 2>&1 | grep -v "STEP\|Copying\|Writing\|Getting\|fetch\|Installing\|Executing" || true wait_for_healthy harb_anvil_1 "$ANVIL_TIMEOUT" || exit 1 wait_for_healthy harb_postgres_1 "$POSTGRES_TIMEOUT" || exit 1 # Phase 2: Start bootstrap (depends on anvil & postgres healthy) echo " Starting bootstrap..." podman-compose up -d bootstrap >/dev/null 2>&1 wait_for_exited harb_bootstrap_1 "$BOOTSTRAP_TIMEOUT" || exit 1 # Phase 3: Start ponder (depends on bootstrap completed) echo " Starting ponder..." podman-compose up -d ponder >/dev/null 2>&1 wait_for_healthy harb_ponder_1 "$PONDER_TIMEOUT" || exit 1 # Phase 4: Start frontend services (depend on ponder healthy) echo " Starting webapp, landing, txn-bot..." podman-compose up -d webapp landing txn-bot >/dev/null 2>&1 wait_for_healthy harb_webapp_1 "$WEBAPP_TIMEOUT" || exit 1 # Phase 5: Start caddy (depends on frontend services) echo " Starting caddy..." podman-compose up -d caddy >/dev/null 2>&1 wait_for_healthy harb_caddy_1 "$CADDY_TIMEOUT" || exit 1 if [[ -z "${SKIP_WATCH:-}" ]]; then echo "Watching for kraiken-lib changes..." ./scripts/watch-kraiken-lib.sh & echo $! > "$PID_FILE" fi local total_time=$(($(date +%s) - stack_start_time)) echo "" echo "[ok] Stack started in ${total_time}s" echo " Web App: http://localhost:8081/app/" echo " GraphQL: http://localhost:8081/graphql" } stop_stack() { cleanup_existing podman-compose down echo "[ok] Stack stopped" } check_health() { echo "Checking health..." local services=(anvil postgres ponder webapp landing txn-bot caddy) for service in "${services[@]}"; do local container container=$(podman ps --all \ --filter "label=com.docker.compose.project=${PROJECT_NAME}" \ --filter "label=com.docker.compose.service=${service}" \ --format '{{.Names}}' | head -n1) if [[ -z "$container" ]]; then echo " [??] $service (not created)" continue fi if podman healthcheck run "$container" &>/dev/null; then echo " [ok] $service" else echo " [!!] $service" fi done } restart_light() { echo "Light restart: webapp + txn-bot only..." echo " Preserving Anvil state (contracts remain deployed)" local webapp_container txnbot_container webapp_container=$(podman ps --all \ --filter "label=com.docker.compose.project=${PROJECT_NAME}" \ --filter "label=com.docker.compose.service=webapp" \ --format '{{.Names}}' | head -n1) txnbot_container=$(podman ps --all \ --filter "label=com.docker.compose.project=${PROJECT_NAME}" \ --filter "label=com.docker.compose.service=txn-bot" \ --format '{{.Names}}' | head -n1) if [[ -z "$webapp_container" ]]; then echo "[!!] webapp container not found - run './scripts/dev.sh start' first" exit 1 fi local start_time=$(date +%s) echo " Restarting containers..." podman restart "$webapp_container" >/dev/null [[ -n "$txnbot_container" ]] && podman restart "$txnbot_container" >/dev/null echo " Waiting for webapp to be ready..." local max_attempts=30 local attempt=0 while ((attempt < max_attempts)); do if curl -s -f -o /dev/null http://localhost:5173/app/ 2>/dev/null; then local end_time=$(date +%s) local duration=$((end_time - start_time)) echo "[ok] Light restart complete (~${duration}s)" echo " Web App: http://localhost:8081/app/" return 0 fi sleep 2 ((attempt++)) done echo "[!!] Webapp failed to respond after ${max_attempts} attempts" exit 1 } restart_full() { echo "Full restart: all containers + bootstrap..." stop_stack start_stack echo "[ok] Full restart complete" } usage() { cat <