harb/tools/push3-evolution/evolution-daemon.sh

344 lines
13 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# =============================================================================
# evolution-daemon.sh — perpetual Push3 evolution loop
#
# Wraps the full per-run cycle so that a single command starts continuous
# evolution on a DigitalOcean (or similar) box with no manual intervention.
#
# Usage:
# cd <repo-root>
# BASE_RPC_URL=https://mainnet.base.org \
# ./tools/push3-evolution/evolution-daemon.sh
#
# Per-run cycle:
# 1. git pull origin master — sync latest code
# 2. git apply evolution.patch — unbounded AW, gas limit override
# 3. Clean stale /tmp/tmp.* dirs — prevent interference from killed runs
# 4. Run evolve.sh — full evolution pipeline
# 5. Results already in evolved/run_NNN/ (evolve.sh auto-increments)
# 6. Admission already done by evolve.sh (step 5 of its pipeline)
# 7. Write summary report — best fitness, improvement, duration
# 8. Notify via openclaw — SSH to main VPS
# 9. git apply --reverse — revert evolution patches
# 10. Loop
#
# Configuration:
# Load from tools/push3-evolution/evolution.conf (co-located with this script).
# BASE_RPC_URL must be set in the environment or in evolution.conf.
#
# Signals:
# SIGINT / SIGTERM — finish the current run cleanly, then exit.
# =============================================================================
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
CONF_FILE="$SCRIPT_DIR/evolution.conf"
PATCH_FILE="$SCRIPT_DIR/evolution.patch"
EVOLVE_SH="$SCRIPT_DIR/evolve.sh"
# =============================================================================
# Load config
# =============================================================================
if [ ! -f "$CONF_FILE" ]; then
echo "[daemon] ERROR: config file not found: $CONF_FILE" >&2
exit 2
fi
# Source the config so all variables are available.
# shellcheck source=evolution.conf
. "$CONF_FILE"
# Required: BASE_RPC_URL may come from the environment or from the conf file.
BASE_RPC_URL="${BASE_RPC_URL:-}"
if [ -z "$BASE_RPC_URL" ] && [ "${EVAL_MODE:-revm}" = "revm" ]; then
echo "[daemon] ERROR: BASE_RPC_URL is not set. Set it in the environment or in $CONF_FILE" >&2
exit 2
fi
export BASE_RPC_URL
# Resolve seed path (relative to repo root if not absolute).
SEED="${SEED:-tools/push3-evolution/seeds/optimizer_v3.push3}"
if [[ "$SEED" != /* ]]; then
SEED="$REPO_ROOT/$SEED"
fi
# Optional defaults for variables the conf might not set.
EVAL_MODE="${EVAL_MODE:-revm}"
POPULATION="${POPULATION:-20}"
GENERATIONS="${GENERATIONS:-30}"
MUTATION_RATE="${MUTATION_RATE:-1}"
ELITES="${ELITES:-2}"
DIVERSE_SEEDS="${DIVERSE_SEEDS:-true}"
OPENCLAW_SSH_TARGET="${OPENCLAW_SSH_TARGET:-}"
# Output directory (relative to repo root so evolve.sh's auto-increment finds prior runs).
OUTPUT_DIR="$REPO_ROOT/evolved"
# =============================================================================
# Patch state tracking
# =============================================================================
PATCH_APPLIED=false
cleanup_patch() {
if [ "$PATCH_APPLIED" = "true" ]; then
echo "[daemon] Reverting evolution patches…" >&2
(cd "$REPO_ROOT" && git apply --reverse "$PATCH_FILE") 2>/dev/null || true
PATCH_APPLIED=false
fi
}
# =============================================================================
# Signal handling — finish current run, then exit cleanly
# =============================================================================
STOP_REQUESTED=false
handle_signal() {
echo "" >&2
echo "[daemon] Stop requested — will exit after current run completes." >&2
STOP_REQUESTED=true
}
trap handle_signal SIGINT SIGTERM
trap cleanup_patch EXIT
# =============================================================================
# Helpers
# =============================================================================
log() {
echo "[daemon] $*" >&2
}
ts() {
date -u '+%Y-%m-%dT%H:%M:%SZ'
}
notify() {
local msg="$*"
if [ -n "$OPENCLAW_SSH_TARGET" ]; then
# Pass message via stdin to avoid shell-quoting issues with special characters.
printf '%s\n' "$msg" | \
ssh "$OPENCLAW_SSH_TARGET" 'read -r _msg; openclaw system event "$_msg"' 2>/dev/null || true
fi
}
# =============================================================================
# Pre-flight checks
# =============================================================================
[ -f "$EVOLVE_SH" ] || { log "ERROR: evolve.sh not found at $EVOLVE_SH"; exit 2; }
[ -x "$EVOLVE_SH" ] || chmod +x "$EVOLVE_SH"
[ -f "$SEED" ] || { log "ERROR: seed file not found: $SEED"; exit 2; }
if [ -f "$PATCH_FILE" ] && [ -s "$PATCH_FILE" ]; then
HAS_PATCH=true
else
HAS_PATCH=false
log "WARNING: patch file is empty or missing — no evolution-specific overrides will be applied"
fi
log "========================================================"
log "evolution-daemon.sh — $(ts)"
log " Repo: $REPO_ROOT"
log " Seed: $SEED"
log " Config: $CONF_FILE"
log " Patch: $PATCH_FILE (has_patch=$HAS_PATCH)"
log " Eval mode: $EVAL_MODE"
log " Population: $POPULATION"
log " Generations: $GENERATIONS"
log " Mutation: $MUTATION_RATE"
log " Elites: $ELITES"
log " Diverse: $DIVERSE_SEEDS"
log " Output dir: $OUTPUT_DIR"
log " Notify via: ${OPENCLAW_SSH_TARGET:-<disabled>}"
log "========================================================"
RUN_NUM=0
# =============================================================================
# Main loop
# =============================================================================
while true; do
RUN_NUM=$((RUN_NUM + 1))
RUN_START="$(date +%s)"
log ""
log "════════════════════════════════════════════════════"
log "Run #${RUN_NUM}$(ts)"
log "════════════════════════════════════════════════════"
# ── Step 1: Sync master ──────────────────────────────────────────────────────
log "[1/7] Syncing master…"
if (cd "$REPO_ROOT" && git pull origin master --ff-only 2>&1); then
log " git pull OK"
else
log " WARNING: git pull failed — continuing with current tree"
fi
# ── Step 2: Apply evolution patches ─────────────────────────────────────────
PATCH_APPLIED=false
if [ "$HAS_PATCH" = "true" ]; then
log "[2/7] Applying evolution patches…"
if (cd "$REPO_ROOT" && git apply "$PATCH_FILE"); then
PATCH_APPLIED=true
log " Patches applied OK"
else
log " ERROR: patch failed to apply — skipping run (evaluation semantics would differ from intended)"
log " Hint: evolution.patch may need regeneration if onchain/ files changed upstream."
continue
fi
else
log "[2/7] No patch file — skipping"
fi
# ── Step 3: Clean stale tmpdirs ─────────────────────────────────────────────
log "[3/7] Cleaning stale /tmp/tmp.* directories…"
STALE_COUNT=0
# Only remove directories older than 1 hour to avoid disturbing very recent runs.
while IFS= read -r -d '' STALE_DIR; do
rm -rf "$STALE_DIR"
STALE_COUNT=$((STALE_COUNT + 1))
done < <(find /tmp -maxdepth 1 -name 'tmp.*' -type d -mmin +60 -print0 2>/dev/null)
log " Removed $STALE_COUNT stale tmpdir(s)"
# ── Step 4: Run evolve.sh ────────────────────────────────────────────────────
log "[4/7] Starting evolve.sh…"
# Build argument array — avoids unquoted variable word-splitting.
EVOLVE_ARGS=(
--seed "$SEED"
--population "$POPULATION"
--generations "$GENERATIONS"
--mutation-rate "$MUTATION_RATE"
--elites "$ELITES"
--output "$OUTPUT_DIR"
)
[ "$DIVERSE_SEEDS" = "true" ] && EVOLVE_ARGS+=(--diverse-seeds)
# Stream evolve.sh output directly — do NOT buffer via $(...).
# A full run can take tens of minutes; buffering would make the daemon
# appear hung with no generation-level progress visible.
EVOLVE_EC=0
EVAL_MODE="$EVAL_MODE" \
BASE_RPC_URL="$BASE_RPC_URL" \
bash "$EVOLVE_SH" "${EVOLVE_ARGS[@]}" || EVOLVE_EC=$?
if [ "$EVOLVE_EC" -ne 0 ]; then
log " WARNING: evolve.sh exited $EVOLVE_EC — results may be incomplete"
else
log " evolve.sh completed OK"
fi
# ── Step 5: Locate the run directory just created ────────────────────────────
# evolve.sh already saves to evolved/run_NNN/ and admits to seed pool.
# Find the most recent run dir to extract summary data.
LATEST_RUN_DIR=""
LATEST_RUN_DIR=$(python3 - "$OUTPUT_DIR" <<'PYEOF' 2>/dev/null || true
import sys, os, re
base = sys.argv[1]
max_n = -1
best_dir = ''
if os.path.isdir(base):
for name in os.listdir(base):
m = re.fullmatch(r'run_(\d+)', name)
if m and os.path.isdir(os.path.join(base, name)):
n = int(m.group(1))
if n > max_n:
max_n = n
best_dir = os.path.join(base, name)
print(best_dir)
PYEOF
)
BEST_FITNESS=0
BEST_RUN_DIR="${LATEST_RUN_DIR:-<unknown>}"
if [ -n "$LATEST_RUN_DIR" ] && [ -d "$LATEST_RUN_DIR" ]; then
# Extract best fitness from the run's generation JSONL files.
BEST_FITNESS=$(python3 - "$LATEST_RUN_DIR" <<'PYEOF' 2>/dev/null || echo 0
import json, sys, os
run_dir = sys.argv[1]
best = 0
for fname in sorted(os.listdir(run_dir)):
if not (fname.startswith('generation_') and fname.endswith('.jsonl')):
continue
with open(os.path.join(run_dir, fname)) as f:
for line in f:
try:
d = json.loads(line)
fitness = int(d.get('fitness', 0))
if fitness > best:
best = fitness
except (json.JSONDecodeError, ValueError, TypeError):
pass
print(best)
PYEOF
)
log "[5/7] Results: dir=$LATEST_RUN_DIR best_fitness=$BEST_FITNESS"
else
log "[5/7] WARNING: could not locate run output directory"
fi
# ── Steps 6 (seed admission already done by evolve.sh) ──────────────────────
# evolve.sh step 5 handles pool admission automatically.
# ── Step 6: Write summary report ────────────────────────────────────────────
RUN_END="$(date +%s)"
DURATION=$(( RUN_END - RUN_START ))
DURATION_FMT="$(printf '%02d:%02d:%02d' $((DURATION/3600)) $(( (DURATION%3600)/60 )) $((DURATION%60)))"
if [ -n "$LATEST_RUN_DIR" ] && [ -d "$LATEST_RUN_DIR" ]; then
SUMMARY_FILE="$LATEST_RUN_DIR/daemon-summary.txt"
{
echo "=== Evolution Daemon Run Summary ==="
echo "Timestamp: $(ts)"
echo "Run dir: $LATEST_RUN_DIR"
echo "Daemon run #: $RUN_NUM"
echo "Duration: $DURATION_FMT"
echo "Best fitness: $BEST_FITNESS"
echo "Eval mode: $EVAL_MODE"
echo "Population: $POPULATION"
echo "Generations: $GENERATIONS"
echo "Diverse seeds: $DIVERSE_SEEDS"
echo "Patch applied: $PATCH_APPLIED"
echo "evolve.sh exit:$EVOLVE_EC"
} > "$SUMMARY_FILE"
log "[6/7] Summary written to $SUMMARY_FILE"
fi
# ── Step 8: Notify ──────────────────────────────────────────────────────────
NOTIFY_MSG="evolution run #${RUN_NUM} complete — best_fitness=${BEST_FITNESS} duration=${DURATION_FMT} dir=$(basename "${LATEST_RUN_DIR:-unknown}")"
log "[7/7] Notifying: $NOTIFY_MSG"
notify "$NOTIFY_MSG"
# ── Revert patches ──────────────────────────────────────────────────────────
cleanup_patch
# ── Check stop flag ─────────────────────────────────────────────────────────
if [ "$STOP_REQUESTED" = "true" ]; then
log ""
log "Stop requested — daemon exiting after run #${RUN_NUM}."
exit 0
fi
log "Run #${RUN_NUM} complete (${DURATION_FMT}). Starting next run…"
log ""
done