harb/tools/push3-evolution/evolve.sh

#!/usr/bin/env bash
# =============================================================================
# evolve.sh — Push3 evolution orchestrator
#
# Outer evolutionary loop: generate candidates → score → select → repeat.
#
# Usage:
#   ./tools/push3-evolution/evolve.sh \
#     --seed optimizer_v3.push3 \
#     --population 10 \
#     --generations 5 \
#     --mutation-rate 2 \
#     --output evolved/
#
# Algorithm:
#   1. Initialize population: N copies of seed, each with M random mutations.
#   2. For each generation:
#      a. Score all candidates via fitness.sh
#      b. Log generation stats (min/max/mean fitness, best candidate)
#      c. Select k survivors via tournament selection (k = population/2)
#      d. Generate next population: mutate survivors + crossover pairs
#   3. Output best candidate as Push3 file.
#   4. Show diff: original vs evolved (which constants changed, by how much).
#
# Output:
#   <output>/
#     generation_0.jsonl   {candidate_id, fitness, mutations_applied}
#     generation_1.jsonl
#     ...
#     best.push3           highest-fitness program
#     diff.txt             parameter changes vs original
#     evolution.log        full run log
#
# Environment:
#   ANVIL_FORK_URL  Passed through to fitness.sh when Anvil is not running.
#
# TSX resolution order: tsx in PATH → node_modules/.bin/tsx → npx tsx.
# =============================================================================

set -euo pipefail

# Foundry tools (forge, cast, anvil)
export PATH="${HOME}/.foundry/bin:${PATH}"

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
FITNESS_SH="$SCRIPT_DIR/fitness.sh"
BATCH_EVAL_SH="$SCRIPT_DIR/revm-evaluator/batch-eval.sh"
MUTATE_CLI="$SCRIPT_DIR/mutate-cli.ts"

# EVAL_MODE controls which fitness backend is used:
#   anvil  (default) — per-candidate Anvil+forge-script pipeline (fitness.sh)
#   revm            — in-process revm via FitnessEvaluator.t.sol (batch-eval.sh)
#                     Requires BASE_RPC_URL env var.  10-100× faster at scale.
EVAL_MODE="${EVAL_MODE:-anvil}"

# =============================================================================
# Argument parsing
# =============================================================================

SEED=""
POPULATION=10
GENERATIONS=5
MUTATION_RATE=2
OUTPUT_DIR=""

while [[ $# -gt 0 ]]; do
  case $1 in
    --seed)          SEED="$2";          shift 2 ;;
    --population)    POPULATION="$2";    shift 2 ;;
    --generations)   GENERATIONS="$2";   shift 2 ;;
    --mutation-rate) MUTATION_RATE="$2"; shift 2 ;;
    --output)        OUTPUT_DIR="$2";    shift 2 ;;
    *) echo "Unknown option: $1" >&2; exit 2 ;;
  esac
done

if [ -z "$SEED" ];       then echo "Error: --seed required" >&2;   exit 2; fi
if [ -z "$OUTPUT_DIR" ]; then echo "Error: --output required" >&2; exit 2; fi
if [ ! -f "$SEED" ];     then echo "Error: seed file not found: $SEED" >&2; exit 2; fi

# Validate numeric args
for _name_val in "population:$POPULATION" "generations:$GENERATIONS" "mutation-rate:$MUTATION_RATE"; do
  _name="${_name_val%%:*}"
  _val="${_name_val##*:}"
  if ! [[ "$_val" =~ ^[0-9]+$ ]] || [ "$_val" -lt 1 ]; then
    echo "Error: --${_name} must be a positive integer (got: $_val)" >&2
    exit 2
  fi
done

# Canonicalize paths
SEED="$(cd "$(dirname "$SEED")" && pwd)/$(basename "$SEED")"
mkdir -p "$OUTPUT_DIR"
OUTPUT_DIR="$(cd "$OUTPUT_DIR" && pwd)"
LOG="$OUTPUT_DIR/evolution.log"

# =============================================================================
# Helpers
# =============================================================================

log() {
  local msg="[evolve] $*"
  echo "$msg" >&2
  echo "$msg" >> "$LOG"
}

fail() {
  log "ERROR: $*"
  exit 2
}

# Locate a tsx runner (TypeScript executor for mutate-cli.ts).
# Tries: tsx in PATH → local node_modules → npx tsx.
find_tsx_cmd() {
  if command -v tsx &>/dev/null; then
    echo "tsx"
  elif [ -x "$SCRIPT_DIR/node_modules/.bin/tsx" ]; then
    echo "$SCRIPT_DIR/node_modules/.bin/tsx"
  elif command -v npx &>/dev/null; then
    echo "npx tsx"
  else
    return 1
  fi
}

# Run the mutate-cli.ts with the given arguments.
# All mutation operations run from SCRIPT_DIR so relative TS imports resolve.
run_mutate_cli() {
  (cd "$SCRIPT_DIR" && $TSX_CMD "$MUTATE_CLI" "$@")
}

# Integer min/max/mean via python3 (bash arithmetic overflows on wei values).
py_stats() {
  # Args: space-separated integers on stdin as a Python list literal
  python3 - "$@" <<'PYEOF'
import sys
nums = [int(x) for x in sys.stdin.read().split()]
if not nums:
    print("0 0 0")
    sys.exit(0)
print(min(nums), max(nums), round(sum(nums) / len(nums)))
PYEOF
}

# Tournament selection: given a scores file (one "idx score filepath" per line),
# run k tournaments of size 2 and return winner filepaths (one per line).
py_tournament() {
  local k="$1"
  local scores_file="$2"
  python3 - "$k" "$scores_file" <<'PYEOF'
import sys, random
k = int(sys.argv[1])
entries = []
with open(sys.argv[2]) as f:
    for line in f:
        parts = line.rstrip('\n').split('\t')
        if len(parts) >= 3:
            entries.append((int(parts[0]), int(parts[1]), parts[2]))
if not entries:
    sys.exit(1)
for _ in range(k):
    a = random.choice(entries)
    b = random.choice(entries)
    winner = a if a[1] >= b[1] else b
    print(winner[2])
PYEOF
}

# =============================================================================
# Tool checks
# =============================================================================

for _tool in python3 node; do
  command -v "$_tool" &>/dev/null || fail "$_tool not found in PATH"
done

[ -f "$FITNESS_SH" ] || fail "fitness.sh not found at $FITNESS_SH"
[ -f "$MUTATE_CLI" ] || fail "mutate-cli.ts not found at $MUTATE_CLI"
[ -x "$FITNESS_SH" ] || chmod +x "$FITNESS_SH"

if [ "$EVAL_MODE" = "revm" ]; then
  [ -f "$BATCH_EVAL_SH" ] || fail "batch-eval.sh not found at $BATCH_EVAL_SH"
  [ -x "$BATCH_EVAL_SH" ] || chmod +x "$BATCH_EVAL_SH"
  [ -n "${BASE_RPC_URL:-}" ] || fail "EVAL_MODE=revm requires BASE_RPC_URL env var (Base network RPC)"
  command -v forge &>/dev/null || fail "forge not found in PATH (required for EVAL_MODE=revm)"
elif [ "$EVAL_MODE" != "anvil" ]; then
  fail "Unknown EVAL_MODE '$EVAL_MODE' — must be 'anvil' or 'revm'"
fi

TSX_CMD="$(find_tsx_cmd)" || fail \
  "No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH."

# =============================================================================
# Work directory — holds all candidate .push3 files across generations
# =============================================================================

WORK_DIR="$(mktemp -d)"
cleanup() { rm -rf "$WORK_DIR"; }
trap cleanup EXIT

# =============================================================================
# Log run header
# =============================================================================

log "========================================================"
log "Push3 Evolution — $(date -u '+%Y-%m-%dT%H:%M:%SZ')"
log "  Seed:          $SEED"
log "  Population:    $POPULATION"
log "  Generations:   $GENERATIONS"
log "  Mutation rate: $MUTATION_RATE"
log "  Output:        $OUTPUT_DIR"
log "  TSX:           $TSX_CMD"
log "  Eval mode:     $EVAL_MODE"
log "========================================================"

# =============================================================================
# Step 1 — Initialize generation 0
#
# N copies of the seed, each independently mutated MUTATION_RATE times.
# =============================================================================

log ""
log "=== Initializing population ==="

GEN_DIR="$WORK_DIR/gen_0"
mkdir -p "$GEN_DIR"

for i in $(seq 0 $((POPULATION - 1))); do
  CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $i).push3"
  MUTATED=$(run_mutate_cli mutate "$SEED" "$MUTATION_RATE") \
    || fail "Failed to mutate seed for initial candidate $i"
  printf '%s\n' "$MUTATED" > "$CAND_FILE"
  printf '%d\n' "$MUTATION_RATE" > "${CAND_FILE%.push3}.ops"
done

log "Initialized ${POPULATION} candidates in gen_0"

# =============================================================================
# Step 2 — Evolution loop
# =============================================================================

GLOBAL_BEST_FITNESS=-1
GLOBAL_BEST_GEN=-1
GLOBAL_BEST_CAND=""

CURRENT_GEN_DIR="$GEN_DIR"

for gen in $(seq 0 $((GENERATIONS - 1))); do

  log ""
  log "=== Generation $((gen + 1)) / $GENERATIONS ==="

  JSONL_FILE="$OUTPUT_DIR/generation_${gen}.jsonl"
  SCORES_FILE="$WORK_DIR/scores_gen_${gen}.txt"

  # --- a. Score all candidates ---

  SCORE_VALUES=""
  CAND_COUNT=0

  # In revm mode, batch-score all candidates in one forge test invocation before
  # the per-candidate loop.  Scores are written to a temp JSONL file that the
  # loop reads with a fast Python lookup.
  BATCH_SCORES_FILE="$WORK_DIR/batch_scores_gen_${gen}.jsonl"

  if [ "$EVAL_MODE" = "revm" ]; then
    declare -a _BATCH_FILES=()
    for _CF in "$CURRENT_GEN_DIR"/candidate_*.push3; do
      [ -f "$_CF" ] && _BATCH_FILES+=("$_CF")
    done

    if [ "${#_BATCH_FILES[@]}" -gt 0 ]; then
      BATCH_EC=0
      bash "$BATCH_EVAL_SH" "${_BATCH_FILES[@]}" > "$BATCH_SCORES_FILE" 2>/dev/null \
        || BATCH_EC=$?

      if [ "$BATCH_EC" -eq 2 ]; then
        fail "batch-eval.sh reported an infrastructure error (exit 2) — aborting evolution"
      fi
      log "  revm batch scoring complete (exit $BATCH_EC)"
    fi
  fi

  for CAND_FILE in "$CURRENT_GEN_DIR"/candidate_*.push3; do
    [ -f "$CAND_FILE" ] || continue

    CAND_IDX="${CAND_FILE##*candidate_}"
    CAND_IDX="${CAND_IDX%.push3}"
    CID="gen${gen}_c${CAND_IDX}"

    # Read mutations_applied from sidecar; default 0 if missing.
    OPS_FILE="${CAND_FILE%.push3}.ops"
    MUTATIONS_APPLIED=0
    [ -f "$OPS_FILE" ] && MUTATIONS_APPLIED=$(cat "$OPS_FILE")

    SCORE=0
    FITNESS_EC=0

    if [ "$EVAL_MODE" = "revm" ] && [ -f "$BATCH_SCORES_FILE" ]; then
      # Look up pre-computed score from batch-eval.sh output.
      SCORE=$(python3 - "$CID" "$BATCH_SCORES_FILE" <<'PYEOF'
import json, sys
cid = sys.argv[1]
with open(sys.argv[2]) as f:
    for line in f:
        try:
            d = json.loads(line)
            if d.get("candidate_id") == cid:
                print(d["fitness"])
                sys.exit(0)
        except (json.JSONDecodeError, KeyError):
            pass
print(0)
PYEOF
) || SCORE=0
    else
      # Anvil mode (or revm fallback): score candidate individually.
      SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$?

      # Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
      if [ "$FITNESS_EC" -eq 2 ]; then
        fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
      fi
    fi

    # Validate that score is a non-negative integer; treat any other output as invalid.
    if ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then
      log "  $CID: invalid/missing score, using 0"
      SCORE=0
    else
      log "  $CID: fitness=$SCORE"
    fi

    # Append to JSONL — use the actual operations recorded for this candidate.
    printf '{"candidate_id":"%s","fitness":%s,"mutations_applied":%d}\n' \
      "$CID" "$SCORE" "$MUTATIONS_APPLIED" >> "$JSONL_FILE"

    # Record index, score, and filepath for selection (tab-delimited so paths with spaces are safe).
    printf '%d\t%s\t%s\n' "$CAND_COUNT" "$SCORE" "$CAND_FILE" >> "$SCORES_FILE"

    SCORE_VALUES="$SCORE_VALUES $SCORE"
    CAND_COUNT=$((CAND_COUNT + 1))
  done

  if [ "$CAND_COUNT" -eq 0 ]; then
    fail "No candidates found in $CURRENT_GEN_DIR"
  fi

  # --- b. Log generation stats ---

  read -r MIN MAX MEAN < <(printf '%s' "$SCORE_VALUES" | py_stats)
  log "  Stats: min=$MIN  max=$MAX  mean=$MEAN  candidates=$CAND_COUNT"

  # Find best candidate for this generation (filepath returned directly).
  BEST_FILE_THIS_GEN=$(python3 - "$SCORES_FILE" <<'PYEOF'
import sys
entries = []
with open(sys.argv[1]) as f:
    for line in f:
        parts = line.rstrip('\n').split('\t')
        if len(parts) >= 3:
            entries.append((int(parts[1]), parts[2]))
if not entries:
    sys.exit(1)
print(max(entries, key=lambda x: x[0])[1])
PYEOF
) || fail "Could not determine best candidate from $SCORES_FILE"

  if [ "$MAX" -gt "$GLOBAL_BEST_FITNESS" ] || [ "$GLOBAL_BEST_FITNESS" -eq -1 ]; then
    GLOBAL_BEST_FITNESS="$MAX"
    GLOBAL_BEST_GEN="$gen"
    GLOBAL_BEST_CAND="$BEST_FILE_THIS_GEN"
    log "  New global best: gen=$gen fitness=$GLOBAL_BEST_FITNESS file=$(basename "$BEST_FILE_THIS_GEN")"
  fi

  # Skip next-generation creation after the final generation
  [ "$gen" -eq "$((GENERATIONS - 1))" ] && break

  # --- c. Tournament selection (k = population / 2) ---

  K=$((POPULATION / 2))
  [ "$K" -lt 1 ] && K=1

  SURVIVOR_FILES=()
  while IFS= read -r WIN_FILE; do
    SURVIVOR_FILES+=("$WIN_FILE")
  done < <(py_tournament "$K" "$SCORES_FILE")

  log "  Selected ${#SURVIVOR_FILES[@]} survivors via tournament"

  # --- d. Generate next population ---

  NEXT_GEN_DIR="$WORK_DIR/gen_$((gen + 1))"
  mkdir -p "$NEXT_GEN_DIR"

  NEXT_IDX=0
  HALF=$((POPULATION / 2))

  # First half: mutate random survivors
  for _i in $(seq 1 $HALF); do
    SUR="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
    DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
    if MUTATED=$(run_mutate_cli mutate "$SUR" "$MUTATION_RATE" 2>/dev/null); then
      printf '%s\n' "$MUTATED" > "$DEST"
      printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops"
    else
      # Fallback: copy the survivor as-is to keep population size stable
      cp "$SUR" "$DEST"
      printf '0\n' > "${DEST%.push3}.ops"
    fi
    NEXT_IDX=$((NEXT_IDX + 1))
  done

  # Second half: crossover random survivor pairs
  REMAINING=$((POPULATION - HALF))
  for _i in $(seq 1 $REMAINING); do
    SUR_A="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
    SUR_B="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
    DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
    if CROSSED=$(run_mutate_cli crossover "$SUR_A" "$SUR_B" 2>/dev/null); then
      printf '%s\n' "$CROSSED" > "$DEST"
      printf '0\n' > "${DEST%.push3}.ops"
    else
      # Fallback: mutate one survivor
      if MUTATED=$(run_mutate_cli mutate "$SUR_A" "$MUTATION_RATE" 2>/dev/null); then
        printf '%s\n' "$MUTATED" > "$DEST"
        printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops"
      else
        cp "$SUR_A" "$DEST"
        printf '0\n' > "${DEST%.push3}.ops"
      fi
    fi
    NEXT_IDX=$((NEXT_IDX + 1))
  done

  log "  Generated ${NEXT_IDX} candidates for generation $((gen + 1))"
  CURRENT_GEN_DIR="$NEXT_GEN_DIR"

done

# =============================================================================
# Step 3 — Output best candidate
# =============================================================================

if [ -z "$GLOBAL_BEST_CAND" ] || [ ! -f "$GLOBAL_BEST_CAND" ]; then
  fail "No valid best candidate recorded — evolution produced no scorable output"
fi

BEST_OUTPUT="$OUTPUT_DIR/best.push3"
cp "$GLOBAL_BEST_CAND" "$BEST_OUTPUT"
log ""
log "Best candidate → $BEST_OUTPUT"
log "  Fitness: $GLOBAL_BEST_FITNESS  (generation $GLOBAL_BEST_GEN)"

# =============================================================================
# Step 4 — Diff: original vs evolved constants
# =============================================================================

DIFF_OUTPUT="$OUTPUT_DIR/diff.txt"

python3 - "$SEED" "$BEST_OUTPUT" > "$DIFF_OUTPUT" <<'PYEOF'
import sys, re

def extract_ints(path):
    """Extract all large integer literals (≥6 digits) from a Push3 file."""
    text = open(path).read()
    text = re.sub(r';;[^\n]*', '', text)   # strip comments
    return [int(m) for m in re.findall(r'\b(\d{6,})\b', text)]

seed_path, best_path = sys.argv[1], sys.argv[2]
orig = extract_ints(seed_path)
best = extract_ints(best_path)

print(f"=== Push3 Evolution Diff ===")
print(f"Seed: {seed_path}")
print(f"Best: {best_path}")
print()

changed = 0
for i, (o, b) in enumerate(zip(orig, best)):
    if o != b:
        pct = (b - o) / o * 100 if o != 0 else float('inf')
        print(f"  const[{i:3d}]: {o:>25d}  →  {b:>25d}  (Δ={b - o:+d}, {pct:+.2f}%)")
        changed += 1

if len(orig) != len(best):
    added = len(best) - len(orig)
    if added > 0:
        for i, val in enumerate(best[len(orig):]):
            print(f"  const[{len(orig) + i:3d}]: {'(new)':>25s}  →  {val:>25d}")
    else:
        print(f"  ({-added} constant(s) removed from end)")

print()
if changed == 0 and len(orig) == len(best):
    print("No constant changes — evolution applied structural mutations only.")
else:
    total = min(len(orig), len(best))
    print(f"Summary: {changed} of {total} constant(s) changed.")
PYEOF

log "Diff written to $DIFF_OUTPUT"
log ""
cat "$DIFF_OUTPUT" >&2

log "========================================================"
log "Evolution complete."
log "  Generations run:  $GENERATIONS"
log "  Best fitness:     $GLOBAL_BEST_FITNESS"
log "  Best from gen:    $GLOBAL_BEST_GEN"
log "  Output directory: $OUTPUT_DIR"
log "========================================================"