372 lines
12 KiB
Bash
Executable file
372 lines
12 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
cd "$(dirname "$0")/.."
|
|
|
|
# Timeout constants (in seconds)
|
|
readonly ANVIL_TIMEOUT=60 # Anvil starts fast (increased for first-time setup)
|
|
readonly POSTGRES_TIMEOUT=30 # Database init is quick
|
|
readonly BOOTSTRAP_TIMEOUT=120 # Contract deployment + seeding
|
|
readonly PONDER_TIMEOUT=120 # Must index bootstrap events
|
|
readonly WEBAPP_TIMEOUT=120 # npm install + Vite startup
|
|
readonly CADDY_TIMEOUT=20 # Proxy starts instantly
|
|
readonly POLL_INTERVAL=2 # Check health every N seconds
|
|
readonly MAX_DOCKER_DISK_GB=20 # Maximum Docker disk usage in GB
|
|
|
|
PID_FILE=/tmp/kraiken-watcher.pid
|
|
PROJECT_NAME=${COMPOSE_PROJECT_NAME:-$(basename "$PWD")}
|
|
|
|
# Detect container runtime
|
|
if docker compose version &> /dev/null; then
|
|
COMPOSE_CMD="docker compose"
|
|
RUNTIME_CMD="docker"
|
|
elif command -v docker-compose &> /dev/null; then
|
|
COMPOSE_CMD="docker-compose"
|
|
RUNTIME_CMD="docker"
|
|
else
|
|
echo "Error: docker/docker-compose not found. Please install Docker."
|
|
echo ""
|
|
echo "Installation instructions:"
|
|
echo " Linux: https://docs.docker.com/engine/install/"
|
|
echo " Mac: brew install colima docker docker-compose && colima start"
|
|
exit 1
|
|
fi
|
|
|
|
container_name() {
|
|
local service="$1"
|
|
# docker compose v2 uses hyphens; v1 used underscores
|
|
echo "${PROJECT_NAME}-${service}-1"
|
|
}
|
|
|
|
# Check Docker disk usage and warn if approaching limits
|
|
check_docker_disk_usage() {
|
|
if ! command -v docker &> /dev/null; then
|
|
return 0 # Skip if Docker not available
|
|
fi
|
|
|
|
# Get total Docker disk usage in GB (works on Linux and macOS)
|
|
local total_size_bytes
|
|
total_size_bytes=$(docker system df --format '{{.Size}}' 2>/dev/null | \
|
|
sed 's/[^0-9.]//g' | awk '{sum+=$1} END {print sum}' || echo "0")
|
|
|
|
# Parse the actual usage more accurately
|
|
local docker_df_output
|
|
docker_df_output=$(docker system df 2>/dev/null || echo "")
|
|
|
|
if [[ -z "$docker_df_output" ]]; then
|
|
return 0 # Docker not running
|
|
fi
|
|
|
|
# Extract total reclaimable space (more accurate than parsing Size)
|
|
local total_gb
|
|
total_gb=$(echo "$docker_df_output" | tail -n 1 | awk '{print $NF}' | sed 's/GB//; s/MB/\/1024/; s/KB/\/1048576/' | bc -l 2>/dev/null || echo "0")
|
|
|
|
# Alternative: sum up all TYPE sizes (column 3 has the SIZE)
|
|
local images_size containers_size volumes_size build_cache_size
|
|
images_size=$(echo "$docker_df_output" | grep "Images" | awk '{print $3}' | sed 's/GB$//; s/MB$/\/1024/; s/KB$/\/1048576/; s/B$/\/1073741824/' | sed 's/^$/0/' | bc -l 2>/dev/null || echo "0")
|
|
containers_size=$(echo "$docker_df_output" | grep "Containers" | awk '{print $3}' | sed 's/GB$//; s/MB$/\/1024/; s/KB$/\/1048576/; s/B$/\/1073741824/' | sed 's/^$/0/' | bc -l 2>/dev/null || echo "0")
|
|
volumes_size=$(echo "$docker_df_output" | grep "Local Volumes" | awk '{print $3}' | sed 's/GB$//; s/MB$/\/1024/; s/KB$/\/1048576/; s/B$/\/1073741824/' | sed 's/^$/0/' | bc -l 2>/dev/null || echo "0")
|
|
build_cache_size=$(echo "$docker_df_output" | grep "Build Cache" | awk '{print $3}' | sed 's/GB$//; s/MB$/\/1024/; s/KB$/\/1048576/; s/B$/\/1073741824/' | sed 's/^$/0/' | bc -l 2>/dev/null || echo "0")
|
|
|
|
total_gb=$(echo "$images_size + $containers_size + $volumes_size + $build_cache_size" | bc -l 2>/dev/null || echo "0")
|
|
|
|
# Round to 1 decimal place
|
|
total_gb=$(printf "%.1f" "$total_gb" 2>/dev/null || echo "0")
|
|
|
|
echo " Docker disk usage: ${total_gb}GB / ${MAX_DOCKER_DISK_GB}GB limit"
|
|
|
|
# Warn if approaching 80% of limit (16GB)
|
|
if (( $(echo "$total_gb > 16" | bc -l 2>/dev/null || echo "0") )); then
|
|
echo " [!!] WARNING: Docker disk usage is high!"
|
|
echo " [!!] Run './scripts/cleanup-disk.sh' to free up space"
|
|
fi
|
|
|
|
# Hard stop if over limit
|
|
if (( $(echo "$total_gb > $MAX_DOCKER_DISK_GB" | bc -l 2>/dev/null || echo "0") )); then
|
|
echo ""
|
|
echo "ERROR: Docker disk usage exceeds ${MAX_DOCKER_DISK_GB}GB limit!"
|
|
echo "Run './scripts/cleanup-disk.sh' to free up space, then try again."
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
cleanup_existing() {
|
|
# Kill any existing watch scripts
|
|
pkill -f "watch-kraiken-lib.sh" 2>/dev/null || true
|
|
pkill -f "inotifywait.*$(pwd)/kraiken-lib" 2>/dev/null || true
|
|
|
|
# Remove PID file
|
|
rm -f "$PID_FILE"
|
|
|
|
# Kill zombie container processes
|
|
pkill -9 -f "${RUNTIME_CMD} wait.*${PROJECT_NAME}_" 2>/dev/null || true
|
|
|
|
# Remove any existing containers (suppress errors if they don't exist)
|
|
echo " Cleaning up existing containers..."
|
|
${RUNTIME_CMD} ps -a --filter "label=com.docker.compose.project=${PROJECT_NAME}" --format "{{.Names}}" 2>/dev/null | \
|
|
xargs -r ${RUNTIME_CMD} rm -f 2>&1 | grep -v "Error.*no container" || true
|
|
}
|
|
|
|
# Wait for container to be healthy (via healthcheck)
|
|
wait_for_healthy() {
|
|
local container=$1
|
|
local timeout_sec=$2
|
|
local max_attempts=$((timeout_sec / POLL_INTERVAL))
|
|
local start_time=$(date +%s)
|
|
|
|
for i in $(seq 1 "$max_attempts"); do
|
|
# Docker doesn't have a standalone healthcheck command, check via inspect
|
|
local health_status
|
|
health_status=$(${RUNTIME_CMD} inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "unknown")
|
|
if [[ "$health_status" == "healthy" ]]; then
|
|
local elapsed=$(($(date +%s) - start_time))
|
|
echo " ✓ $container ready (${elapsed}s)"
|
|
return 0
|
|
fi
|
|
sleep "$POLL_INTERVAL"
|
|
done
|
|
|
|
echo "ERROR: $container failed to become healthy after ${timeout_sec}s"
|
|
return 1
|
|
}
|
|
|
|
# Wait for container to exit (used for bootstrap)
|
|
wait_for_exited() {
|
|
local container=$1
|
|
local timeout_sec=$2
|
|
local max_attempts=$((timeout_sec / POLL_INTERVAL))
|
|
local start_time=$(date +%s)
|
|
|
|
for i in $(seq 1 "$max_attempts"); do
|
|
local status
|
|
status=$(${RUNTIME_CMD} inspect "$container" --format='{{.State.Status}}' 2>/dev/null || echo "unknown")
|
|
if [[ "$status" == "exited" ]]; then
|
|
local elapsed=$(($(date +%s) - start_time))
|
|
echo " ✓ $container completed (${elapsed}s)"
|
|
return 0
|
|
fi
|
|
sleep "$POLL_INTERVAL"
|
|
done
|
|
|
|
echo "ERROR: $container failed to complete after ${timeout_sec}s"
|
|
return 1
|
|
}
|
|
|
|
start_stack() {
|
|
local stack_start_time=$(date +%s)
|
|
|
|
# Check Docker disk usage before starting
|
|
check_docker_disk_usage
|
|
|
|
# Clean up any existing processes first
|
|
cleanup_existing
|
|
|
|
# Show branch if set
|
|
if [[ -n "${GIT_BRANCH:-}" ]]; then
|
|
echo "Branch: $GIT_BRANCH"
|
|
fi
|
|
|
|
echo "Building kraiken-lib..."
|
|
./scripts/build-kraiken-lib.sh
|
|
|
|
echo "Starting stack..."
|
|
|
|
# Phase 1: Start base services (no dependencies)
|
|
echo " Starting anvil & postgres..."
|
|
${COMPOSE_CMD} up -d anvil postgres 2>&1 | grep -v "STEP\|Copying\|Writing\|Getting\|fetch\|Installing\|Executing" || true
|
|
|
|
wait_for_healthy "$(container_name anvil)" "$ANVIL_TIMEOUT" || exit 1
|
|
wait_for_healthy "$(container_name postgres)" "$POSTGRES_TIMEOUT" || exit 1
|
|
|
|
# Phase 2: Start bootstrap (depends on anvil & postgres healthy)
|
|
echo " Starting bootstrap..."
|
|
${COMPOSE_CMD} up -d bootstrap >/dev/null 2>&1
|
|
|
|
wait_for_exited "$(container_name bootstrap)" "$BOOTSTRAP_TIMEOUT" || exit 1
|
|
|
|
# Phase 3: Start ponder (depends on bootstrap completed)
|
|
echo " Starting ponder..."
|
|
${COMPOSE_CMD} up -d ponder >/dev/null 2>&1
|
|
|
|
wait_for_healthy "$(container_name ponder)" "$PONDER_TIMEOUT" || exit 1
|
|
|
|
# Phase 4: Start frontend services (depend on ponder healthy)
|
|
echo " Starting webapp, landing, txn-bot, otterscan..."
|
|
${COMPOSE_CMD} up -d webapp landing txn-bot otterscan >/dev/null 2>&1
|
|
|
|
wait_for_healthy "$(container_name webapp)" "$WEBAPP_TIMEOUT" || exit 1
|
|
|
|
# Phase 5: Start caddy (depends on frontend services)
|
|
echo " Starting caddy..."
|
|
${COMPOSE_CMD} up -d caddy >/dev/null 2>&1
|
|
|
|
wait_for_healthy "$(container_name caddy)" "$CADDY_TIMEOUT" || exit 1
|
|
|
|
# Smoke test: verify end-to-end connectivity through Caddy
|
|
echo " Running smoke test..."
|
|
./scripts/wait-for-service.sh http://localhost:8081/app/ 30 "caddy-proxy" || {
|
|
echo " [!!] Smoke test failed — Caddy proxy not serving /app/"
|
|
exit 1
|
|
}
|
|
|
|
if [[ -z "${SKIP_WATCH:-}" ]]; then
|
|
echo "Watching for kraiken-lib changes..."
|
|
./scripts/watch-kraiken-lib.sh &
|
|
echo $! > "$PID_FILE"
|
|
fi
|
|
|
|
local total_time=$(($(date +%s) - stack_start_time))
|
|
echo ""
|
|
echo "[ok] Stack started in ${total_time}s"
|
|
echo " Web App: http://localhost:8081/app/"
|
|
echo " Explorer: http://localhost:5100"
|
|
echo " RPC Proxy: http://localhost:8081/api/rpc"
|
|
echo " GraphQL: http://localhost:8081/api/graphql"
|
|
}
|
|
|
|
stop_stack() {
|
|
cleanup_existing
|
|
${COMPOSE_CMD} down
|
|
|
|
# Aggressive pruning to prevent disk bloat
|
|
echo " Pruning Docker resources (images, containers, volumes, build cache)..."
|
|
|
|
# Prune build cache aggressively (this is usually the biggest culprit)
|
|
${RUNTIME_CMD} builder prune -af 2>&1 | grep -E "Total|deleted" || true
|
|
|
|
# Prune all unused data (containers, networks, images, volumes)
|
|
${RUNTIME_CMD} system prune -af --volumes 2>&1 | grep -E "Total reclaimed|deleted" || true
|
|
|
|
echo "[ok] Stack stopped and cleaned"
|
|
}
|
|
|
|
check_health() {
|
|
echo "Checking health..."
|
|
local services=(anvil postgres ponder webapp landing txn-bot otterscan caddy)
|
|
for service in "${services[@]}"; do
|
|
local container
|
|
container=$(${RUNTIME_CMD} ps --all \
|
|
--filter "label=com.docker.compose.project=${PROJECT_NAME}" \
|
|
--filter "label=com.docker.compose.service=${service}" \
|
|
--format '{{.Names}}' | head -n1)
|
|
|
|
if [[ -z "$container" ]]; then
|
|
echo " [??] $service (not created)"
|
|
continue
|
|
fi
|
|
|
|
local health_status
|
|
health_status=$(${RUNTIME_CMD} inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "unknown")
|
|
if [[ "$health_status" == "healthy" ]]; then
|
|
echo " [ok] $service"
|
|
else
|
|
echo " [!!] $service"
|
|
fi
|
|
done
|
|
}
|
|
|
|
restart_light() {
|
|
echo "Light restart: webapp + txn-bot only..."
|
|
echo " Preserving Anvil state (contracts remain deployed)"
|
|
|
|
local webapp_container txnbot_container
|
|
webapp_container=$(${RUNTIME_CMD} ps --all \
|
|
--filter "label=com.docker.compose.project=${PROJECT_NAME}" \
|
|
--filter "label=com.docker.compose.service=webapp" \
|
|
--format '{{.Names}}' | head -n1)
|
|
|
|
txnbot_container=$(${RUNTIME_CMD} ps --all \
|
|
--filter "label=com.docker.compose.project=${PROJECT_NAME}" \
|
|
--filter "label=com.docker.compose.service=txn-bot" \
|
|
--format '{{.Names}}' | head -n1)
|
|
|
|
if [[ -z "$webapp_container" ]]; then
|
|
echo "[!!] webapp container not found - run './scripts/dev.sh start' first"
|
|
exit 1
|
|
fi
|
|
|
|
local start_time=$(date +%s)
|
|
|
|
echo " Restarting containers..."
|
|
${RUNTIME_CMD} restart "$webapp_container" >/dev/null
|
|
[[ -n "$txnbot_container" ]] && ${RUNTIME_CMD} restart "$txnbot_container" >/dev/null
|
|
|
|
echo " Waiting for webapp to be ready..."
|
|
local max_attempts=30
|
|
local attempt=0
|
|
while ((attempt < max_attempts)); do
|
|
if curl -s -f -o /dev/null http://localhost:5173/app/ 2>/dev/null; then
|
|
local end_time=$(date +%s)
|
|
local duration=$((end_time - start_time))
|
|
echo "[ok] Light restart complete (~${duration}s)"
|
|
echo " Web App: http://localhost:8081/app/"
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
((attempt++))
|
|
done
|
|
|
|
echo "[!!] Webapp failed to respond after ${max_attempts} attempts"
|
|
exit 1
|
|
}
|
|
|
|
restart_full() {
|
|
echo "Full restart: all containers + bootstrap..."
|
|
stop_stack
|
|
start_stack
|
|
echo "[ok] Full restart complete"
|
|
}
|
|
|
|
usage() {
|
|
cat <<EOF
|
|
Usage: $0 {start|stop|health|restart [--light|--full]}
|
|
|
|
Commands:
|
|
start Start all services (builds kraiken-lib, runs bootstrap)
|
|
stop Stop all services
|
|
health Check service health
|
|
restart Full restart (default: redeploys contracts)
|
|
restart --light Light restart (webapp + txnbot only, preserves state)
|
|
restart --full Full restart (same as 'restart')
|
|
|
|
Environment Variables:
|
|
GIT_BRANCH Branch to checkout in containers
|
|
|
|
Examples:
|
|
./scripts/dev.sh start
|
|
./scripts/dev.sh restart --light # Fast frontend iteration (~10-20s)
|
|
./scripts/dev.sh restart --full # Fresh contract deployment (~3-4min)
|
|
GIT_BRANCH=fix/something ./scripts/dev.sh start
|
|
./scripts/dev.sh health
|
|
EOF
|
|
exit 1
|
|
}
|
|
|
|
case "${1:-help}" in
|
|
start)
|
|
start_stack
|
|
;;
|
|
stop)
|
|
stop_stack
|
|
;;
|
|
health)
|
|
check_health
|
|
;;
|
|
restart)
|
|
case "${2:-}" in
|
|
--light)
|
|
restart_light
|
|
;;
|
|
--full|"")
|
|
restart_full
|
|
;;
|
|
*)
|
|
echo "Unknown restart mode: $2"
|
|
usage
|
|
;;
|
|
esac
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|