harb/scripts/dev.sh
johba 1c6f118f6b fix/node-modules-named-volumes (#94)
Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/94
2025-11-13 18:17:56 +01:00

363 lines
12 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")/.."
# Timeout constants (in seconds)
readonly ANVIL_TIMEOUT=60 # Anvil starts fast (increased for first-time setup)
readonly POSTGRES_TIMEOUT=30 # Database init is quick
readonly BOOTSTRAP_TIMEOUT=120 # Contract deployment + seeding
readonly PONDER_TIMEOUT=120 # Must index bootstrap events
readonly WEBAPP_TIMEOUT=120 # npm install + Vite startup
readonly CADDY_TIMEOUT=20 # Proxy starts instantly
readonly POLL_INTERVAL=2 # Check health every N seconds
readonly MAX_DOCKER_DISK_GB=20 # Maximum Docker disk usage in GB
PID_FILE=/tmp/kraiken-watcher.pid
PROJECT_NAME=${COMPOSE_PROJECT_NAME:-$(basename "$PWD")}
# Detect container runtime
if command -v docker compose &> /dev/null; then
COMPOSE_CMD="docker compose"
RUNTIME_CMD="docker"
elif command -v docker-compose &> /dev/null; then
COMPOSE_CMD="docker-compose"
RUNTIME_CMD="docker"
else
echo "Error: docker/docker-compose not found. Please install Docker."
echo ""
echo "Installation instructions:"
echo " Linux: https://docs.docker.com/engine/install/"
echo " Mac: brew install colima docker docker-compose && colima start"
exit 1
fi
container_name() {
local service="$1"
echo "${PROJECT_NAME}_${service}_1"
}
# Check Docker disk usage and warn if approaching limits
check_docker_disk_usage() {
if ! command -v docker &> /dev/null; then
return 0 # Skip if Docker not available
fi
# Get total Docker disk usage in GB (works on Linux and macOS)
local total_size_bytes
total_size_bytes=$(docker system df --format '{{.Size}}' 2>/dev/null | \
sed 's/[^0-9.]//g' | awk '{sum+=$1} END {print sum}' || echo "0")
# Parse the actual usage more accurately
local docker_df_output
docker_df_output=$(docker system df 2>/dev/null || echo "")
if [[ -z "$docker_df_output" ]]; then
return 0 # Docker not running
fi
# Extract total reclaimable space (more accurate than parsing Size)
local total_gb
total_gb=$(echo "$docker_df_output" | tail -n 1 | awk '{print $NF}' | sed 's/GB//; s/MB/\/1024/; s/KB/\/1048576/' | bc -l 2>/dev/null || echo "0")
# Alternative: sum up all TYPE sizes (column 3 has the SIZE)
local images_size containers_size volumes_size build_cache_size
images_size=$(echo "$docker_df_output" | grep "Images" | awk '{print $3}' | sed 's/GB$//; s/MB$/\/1024/; s/KB$/\/1048576/; s/B$/\/1073741824/' | sed 's/^$/0/' | bc -l 2>/dev/null || echo "0")
containers_size=$(echo "$docker_df_output" | grep "Containers" | awk '{print $3}' | sed 's/GB$//; s/MB$/\/1024/; s/KB$/\/1048576/; s/B$/\/1073741824/' | sed 's/^$/0/' | bc -l 2>/dev/null || echo "0")
volumes_size=$(echo "$docker_df_output" | grep "Local Volumes" | awk '{print $3}' | sed 's/GB$//; s/MB$/\/1024/; s/KB$/\/1048576/; s/B$/\/1073741824/' | sed 's/^$/0/' | bc -l 2>/dev/null || echo "0")
build_cache_size=$(echo "$docker_df_output" | grep "Build Cache" | awk '{print $3}' | sed 's/GB$//; s/MB$/\/1024/; s/KB$/\/1048576/; s/B$/\/1073741824/' | sed 's/^$/0/' | bc -l 2>/dev/null || echo "0")
total_gb=$(echo "$images_size + $containers_size + $volumes_size + $build_cache_size" | bc -l 2>/dev/null || echo "0")
# Round to 1 decimal place
total_gb=$(printf "%.1f" "$total_gb" 2>/dev/null || echo "0")
echo " Docker disk usage: ${total_gb}GB / ${MAX_DOCKER_DISK_GB}GB limit"
# Warn if approaching 80% of limit (16GB)
if (( $(echo "$total_gb > 16" | bc -l 2>/dev/null || echo "0") )); then
echo " [!!] WARNING: Docker disk usage is high!"
echo " [!!] Run './scripts/cleanup-disk.sh' to free up space"
fi
# Hard stop if over limit
if (( $(echo "$total_gb > $MAX_DOCKER_DISK_GB" | bc -l 2>/dev/null || echo "0") )); then
echo ""
echo "ERROR: Docker disk usage exceeds ${MAX_DOCKER_DISK_GB}GB limit!"
echo "Run './scripts/cleanup-disk.sh' to free up space, then try again."
exit 1
fi
}
cleanup_existing() {
# Kill any existing watch scripts
pkill -f "watch-kraiken-lib.sh" 2>/dev/null || true
pkill -f "inotifywait.*$(pwd)/kraiken-lib" 2>/dev/null || true
# Remove PID file
rm -f "$PID_FILE"
# Kill zombie container processes
pkill -9 -f "${RUNTIME_CMD} wait.*${PROJECT_NAME}_" 2>/dev/null || true
# Remove any existing containers (suppress errors if they don't exist)
echo " Cleaning up existing containers..."
${RUNTIME_CMD} ps -a --filter "label=com.docker.compose.project=${PROJECT_NAME}" --format "{{.Names}}" 2>/dev/null | \
xargs -r ${RUNTIME_CMD} rm -f 2>&1 | grep -v "Error.*no container" || true
}
# Wait for container to be healthy (via healthcheck)
wait_for_healthy() {
local container=$1
local timeout_sec=$2
local max_attempts=$((timeout_sec / POLL_INTERVAL))
local start_time=$(date +%s)
for i in $(seq 1 "$max_attempts"); do
# Docker doesn't have a standalone healthcheck command, check via inspect
local health_status
health_status=$(${RUNTIME_CMD} inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "unknown")
if [[ "$health_status" == "healthy" ]]; then
local elapsed=$(($(date +%s) - start_time))
echo "$container ready (${elapsed}s)"
return 0
fi
sleep "$POLL_INTERVAL"
done
echo "ERROR: $container failed to become healthy after ${timeout_sec}s"
return 1
}
# Wait for container to exit (used for bootstrap)
wait_for_exited() {
local container=$1
local timeout_sec=$2
local max_attempts=$((timeout_sec / POLL_INTERVAL))
local start_time=$(date +%s)
for i in $(seq 1 "$max_attempts"); do
local status
status=$(${RUNTIME_CMD} inspect "$container" --format='{{.State.Status}}' 2>/dev/null || echo "unknown")
if [[ "$status" == "exited" ]]; then
local elapsed=$(($(date +%s) - start_time))
echo "$container completed (${elapsed}s)"
return 0
fi
sleep "$POLL_INTERVAL"
done
echo "ERROR: $container failed to complete after ${timeout_sec}s"
return 1
}
start_stack() {
local stack_start_time=$(date +%s)
# Check Docker disk usage before starting
check_docker_disk_usage
# Clean up any existing processes first
cleanup_existing
# Show branch if set
if [[ -n "${GIT_BRANCH:-}" ]]; then
echo "Branch: $GIT_BRANCH"
fi
echo "Building kraiken-lib..."
./scripts/build-kraiken-lib.sh
echo "Starting stack..."
# Phase 1: Start base services (no dependencies)
echo " Starting anvil & postgres..."
${COMPOSE_CMD} up -d anvil postgres 2>&1 | grep -v "STEP\|Copying\|Writing\|Getting\|fetch\|Installing\|Executing" || true
wait_for_healthy "$(container_name anvil)" "$ANVIL_TIMEOUT" || exit 1
wait_for_healthy "$(container_name postgres)" "$POSTGRES_TIMEOUT" || exit 1
# Phase 2: Start bootstrap (depends on anvil & postgres healthy)
echo " Starting bootstrap..."
${COMPOSE_CMD} up -d bootstrap >/dev/null 2>&1
wait_for_exited "$(container_name bootstrap)" "$BOOTSTRAP_TIMEOUT" || exit 1
# Phase 3: Start ponder (depends on bootstrap completed)
echo " Starting ponder..."
${COMPOSE_CMD} up -d ponder >/dev/null 2>&1
wait_for_healthy "$(container_name ponder)" "$PONDER_TIMEOUT" || exit 1
# Phase 4: Start frontend services (depend on ponder healthy)
echo " Starting webapp, landing, txn-bot..."
${COMPOSE_CMD} up -d webapp landing txn-bot >/dev/null 2>&1
wait_for_healthy "$(container_name webapp)" "$WEBAPP_TIMEOUT" || exit 1
# Phase 5: Start caddy (depends on frontend services)
echo " Starting caddy..."
${COMPOSE_CMD} up -d caddy >/dev/null 2>&1
wait_for_healthy "$(container_name caddy)" "$CADDY_TIMEOUT" || exit 1
if [[ -z "${SKIP_WATCH:-}" ]]; then
echo "Watching for kraiken-lib changes..."
./scripts/watch-kraiken-lib.sh &
echo $! > "$PID_FILE"
fi
local total_time=$(($(date +%s) - stack_start_time))
echo ""
echo "[ok] Stack started in ${total_time}s"
echo " Web App: http://localhost:8081/app/"
echo " RPC Proxy: http://localhost:8081/api/rpc"
echo " GraphQL: http://localhost:8081/api/graphql"
}
stop_stack() {
cleanup_existing
${COMPOSE_CMD} down
# Aggressive pruning to prevent disk bloat
echo " Pruning Docker resources (images, containers, volumes, build cache)..."
# Prune build cache aggressively (this is usually the biggest culprit)
${RUNTIME_CMD} builder prune -af 2>&1 | grep -E "Total|deleted" || true
# Prune all unused data (containers, networks, images, volumes)
${RUNTIME_CMD} system prune -af --volumes 2>&1 | grep -E "Total reclaimed|deleted" || true
echo "[ok] Stack stopped and cleaned"
}
check_health() {
echo "Checking health..."
local services=(anvil postgres ponder webapp landing txn-bot caddy)
for service in "${services[@]}"; do
local container
container=$(${RUNTIME_CMD} ps --all \
--filter "label=com.docker.compose.project=${PROJECT_NAME}" \
--filter "label=com.docker.compose.service=${service}" \
--format '{{.Names}}' | head -n1)
if [[ -z "$container" ]]; then
echo " [??] $service (not created)"
continue
fi
local health_status
health_status=$(${RUNTIME_CMD} inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "unknown")
if [[ "$health_status" == "healthy" ]]; then
echo " [ok] $service"
else
echo " [!!] $service"
fi
done
}
restart_light() {
echo "Light restart: webapp + txn-bot only..."
echo " Preserving Anvil state (contracts remain deployed)"
local webapp_container txnbot_container
webapp_container=$(${RUNTIME_CMD} ps --all \
--filter "label=com.docker.compose.project=${PROJECT_NAME}" \
--filter "label=com.docker.compose.service=webapp" \
--format '{{.Names}}' | head -n1)
txnbot_container=$(${RUNTIME_CMD} ps --all \
--filter "label=com.docker.compose.project=${PROJECT_NAME}" \
--filter "label=com.docker.compose.service=txn-bot" \
--format '{{.Names}}' | head -n1)
if [[ -z "$webapp_container" ]]; then
echo "[!!] webapp container not found - run './scripts/dev.sh start' first"
exit 1
fi
local start_time=$(date +%s)
echo " Restarting containers..."
${RUNTIME_CMD} restart "$webapp_container" >/dev/null
[[ -n "$txnbot_container" ]] && ${RUNTIME_CMD} restart "$txnbot_container" >/dev/null
echo " Waiting for webapp to be ready..."
local max_attempts=30
local attempt=0
while ((attempt < max_attempts)); do
if curl -s -f -o /dev/null http://localhost:5173/app/ 2>/dev/null; then
local end_time=$(date +%s)
local duration=$((end_time - start_time))
echo "[ok] Light restart complete (~${duration}s)"
echo " Web App: http://localhost:8081/app/"
return 0
fi
sleep 2
((attempt++))
done
echo "[!!] Webapp failed to respond after ${max_attempts} attempts"
exit 1
}
restart_full() {
echo "Full restart: all containers + bootstrap..."
stop_stack
start_stack
echo "[ok] Full restart complete"
}
usage() {
cat <<EOF
Usage: $0 {start|stop|health|restart [--light|--full]}
Commands:
start Start all services (builds kraiken-lib, runs bootstrap)
stop Stop all services
health Check service health
restart Full restart (default: redeploys contracts)
restart --light Light restart (webapp + txnbot only, preserves state)
restart --full Full restart (same as 'restart')
Environment Variables:
GIT_BRANCH Branch to checkout in containers
Examples:
./scripts/dev.sh start
./scripts/dev.sh restart --light # Fast frontend iteration (~10-20s)
./scripts/dev.sh restart --full # Fresh contract deployment (~3-4min)
GIT_BRANCH=fix/something ./scripts/dev.sh start
./scripts/dev.sh health
EOF
exit 1
}
case "${1:-help}" in
start)
start_stack
;;
stop)
stop_stack
;;
health)
check_health
;;
restart)
case "${2:-}" in
--light)
restart_light
;;
--full|"")
restart_full
;;
*)
echo "Unknown restart mode: $2"
usage
;;
esac
;;
*)
usage
;;
esac