#!/usr/bin/env bash # ============================================================================= # evolve.sh — Push3 evolution orchestrator # # Outer evolutionary loop: generate candidates → score → select → repeat. # # Usage: # ./tools/push3-evolution/evolve.sh \ # --seed optimizer_v3.push3 \ # --population 10 \ # --generations 5 \ # --mutation-rate 2 \ # --output evolved/ # # Algorithm: # 1. Initialize population: N copies of seed, each with M random mutations. # 2. For each generation: # a. Score all candidates via fitness.sh # b. Log generation stats (min/max/mean fitness, best candidate) # c. Select k survivors via tournament selection (k = population/2) # d. Generate next population: mutate survivors + crossover pairs # 3. Output best candidate as Push3 file. # 4. Show diff: original vs evolved (which constants changed, by how much). # # Output: # / # generation_0.jsonl {candidate_id, fitness, mutations_applied} # generation_1.jsonl # ... # best.push3 highest-fitness program # diff.txt parameter changes vs original # evolution.log full run log # # Environment: # ANVIL_FORK_URL Passed through to fitness.sh when Anvil is not running. # # TSX resolution order: tsx in PATH → node_modules/.bin/tsx → npx tsx. # ============================================================================= set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" FITNESS_SH="$SCRIPT_DIR/fitness.sh" MUTATE_CLI="$SCRIPT_DIR/mutate-cli.ts" # ============================================================================= # Argument parsing # ============================================================================= SEED="" POPULATION=10 GENERATIONS=5 MUTATION_RATE=2 OUTPUT_DIR="" while [[ $# -gt 0 ]]; do case $1 in --seed) SEED="$2"; shift 2 ;; --population) POPULATION="$2"; shift 2 ;; --generations) GENERATIONS="$2"; shift 2 ;; --mutation-rate) MUTATION_RATE="$2"; shift 2 ;; --output) OUTPUT_DIR="$2"; shift 2 ;; *) echo "Unknown option: $1" >&2; exit 2 ;; esac done if [ -z "$SEED" ]; then echo "Error: --seed required" >&2; exit 2; fi if [ -z "$OUTPUT_DIR" ]; then echo "Error: --output required" >&2; exit 2; fi if [ ! -f "$SEED" ]; then echo "Error: seed file not found: $SEED" >&2; exit 2; fi # Validate numeric args for _name_val in "population:$POPULATION" "generations:$GENERATIONS" "mutation-rate:$MUTATION_RATE"; do _name="${_name_val%%:*}" _val="${_name_val##*:}" if ! [[ "$_val" =~ ^[0-9]+$ ]] || [ "$_val" -lt 1 ]; then echo "Error: --${_name} must be a positive integer (got: $_val)" >&2 exit 2 fi done # Canonicalize paths SEED="$(cd "$(dirname "$SEED")" && pwd)/$(basename "$SEED")" mkdir -p "$OUTPUT_DIR" OUTPUT_DIR="$(cd "$OUTPUT_DIR" && pwd)" LOG="$OUTPUT_DIR/evolution.log" # ============================================================================= # Helpers # ============================================================================= log() { local msg="[evolve] $*" echo "$msg" >&2 echo "$msg" >> "$LOG" } fail() { log "ERROR: $*" exit 2 } # Locate a tsx runner (TypeScript executor for mutate-cli.ts). # Tries: tsx in PATH → local node_modules → npx tsx. find_tsx_cmd() { if command -v tsx &>/dev/null; then echo "tsx" elif [ -x "$SCRIPT_DIR/node_modules/.bin/tsx" ]; then echo "$SCRIPT_DIR/node_modules/.bin/tsx" elif command -v npx &>/dev/null; then echo "npx tsx" else return 1 fi } # Run the mutate-cli.ts with the given arguments. # All mutation operations run from SCRIPT_DIR so relative TS imports resolve. run_mutate_cli() { (cd "$SCRIPT_DIR" && $TSX_CMD "$MUTATE_CLI" "$@") } # Integer min/max/mean via python3 (bash arithmetic overflows on wei values). py_stats() { # Args: space-separated integers on stdin as a Python list literal python3 - "$@" <<'PYEOF' import sys nums = [int(x) for x in sys.stdin.read().split()] if not nums: print("0 0 0") sys.exit(0) print(min(nums), max(nums), round(sum(nums) / len(nums))) PYEOF } # Tournament selection: given a scores file (one "idx score filepath" per line), # run k tournaments of size 2 and return winner filepaths (one per line). py_tournament() { local k="$1" local scores_file="$2" python3 - "$k" "$scores_file" <<'PYEOF' import sys, random k = int(sys.argv[1]) entries = [] with open(sys.argv[2]) as f: for line in f: parts = line.split() if len(parts) >= 3: entries.append((int(parts[0]), int(parts[1]), parts[2])) if not entries: sys.exit(1) for _ in range(k): a = random.choice(entries) b = random.choice(entries) winner = a if a[1] >= b[1] else b print(winner[2]) PYEOF } # ============================================================================= # Tool checks # ============================================================================= for _tool in python3 node; do command -v "$_tool" &>/dev/null || fail "$_tool not found in PATH" done [ -f "$FITNESS_SH" ] || fail "fitness.sh not found at $FITNESS_SH" [ -f "$MUTATE_CLI" ] || fail "mutate-cli.ts not found at $MUTATE_CLI" [ -x "$FITNESS_SH" ] || chmod +x "$FITNESS_SH" TSX_CMD="$(find_tsx_cmd)" || fail \ "No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH." # ============================================================================= # Work directory — holds all candidate .push3 files across generations # ============================================================================= WORK_DIR="$(mktemp -d)" cleanup() { rm -rf "$WORK_DIR"; } trap cleanup EXIT # ============================================================================= # Log run header # ============================================================================= log "========================================================" log "Push3 Evolution — $(date -u '+%Y-%m-%dT%H:%M:%SZ')" log " Seed: $SEED" log " Population: $POPULATION" log " Generations: $GENERATIONS" log " Mutation rate: $MUTATION_RATE" log " Output: $OUTPUT_DIR" log " TSX: $TSX_CMD" log "========================================================" # ============================================================================= # Step 1 — Initialize generation 0 # # N copies of the seed, each independently mutated MUTATION_RATE times. # ============================================================================= log "" log "=== Initializing population ===" GEN_DIR="$WORK_DIR/gen_0" mkdir -p "$GEN_DIR" for i in $(seq 0 $((POPULATION - 1))); do CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $i).push3" MUTATED=$(run_mutate_cli mutate "$SEED" "$MUTATION_RATE") \ || fail "Failed to mutate seed for initial candidate $i" printf '%s\n' "$MUTATED" > "$CAND_FILE" printf '%d\n' "$MUTATION_RATE" > "${CAND_FILE%.push3}.ops" done log "Initialized ${POPULATION} candidates in gen_0" # ============================================================================= # Step 2 — Evolution loop # ============================================================================= GLOBAL_BEST_FITNESS=-1 GLOBAL_BEST_GEN=-1 GLOBAL_BEST_CAND="" CURRENT_GEN_DIR="$GEN_DIR" for gen in $(seq 0 $((GENERATIONS - 1))); do log "" log "=== Generation $((gen + 1)) / $GENERATIONS ===" JSONL_FILE="$OUTPUT_DIR/generation_${gen}.jsonl" SCORES_FILE="$WORK_DIR/scores_gen_${gen}.txt" # --- a. Score all candidates --- SCORE_VALUES="" CAND_COUNT=0 for CAND_FILE in "$CURRENT_GEN_DIR"/candidate_*.push3; do [ -f "$CAND_FILE" ] || continue CAND_IDX="${CAND_FILE##*candidate_}" CAND_IDX="${CAND_IDX%.push3}" CID="gen${gen}_c${CAND_IDX}" # Read mutations_applied from sidecar; default 0 if missing. OPS_FILE="${CAND_FILE%.push3}.ops" MUTATIONS_APPLIED=0 [ -f "$OPS_FILE" ] && MUTATIONS_APPLIED=$(cat "$OPS_FILE") SCORE=0 FITNESS_EC=0 SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$? # Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately. if [ "$FITNESS_EC" -eq 2 ]; then fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution" fi # Validate that score is a non-negative integer; treat any other output as invalid. if [ "$FITNESS_EC" -ne 0 ] || ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then log " $CID: invalid candidate (fitness.sh exit $FITNESS_EC), score=0" SCORE=0 else log " $CID: fitness=$SCORE" fi # Append to JSONL — use the actual operations recorded for this candidate. printf '{"candidate_id":"%s","fitness":%s,"mutations_applied":%d}\n' \ "$CID" "$SCORE" "$MUTATIONS_APPLIED" >> "$JSONL_FILE" # Record index, score, and filepath for selection (filepath avoids index→file mapping issues). printf '%d %s %s\n' "$CAND_COUNT" "$SCORE" "$CAND_FILE" >> "$SCORES_FILE" SCORE_VALUES="$SCORE_VALUES $SCORE" CAND_COUNT=$((CAND_COUNT + 1)) done if [ "$CAND_COUNT" -eq 0 ]; then fail "No candidates found in $CURRENT_GEN_DIR" fi # --- b. Log generation stats --- read -r MIN MAX MEAN < <(printf '%s' "$SCORE_VALUES" | py_stats) log " Stats: min=$MIN max=$MAX mean=$MEAN candidates=$CAND_COUNT" # Find best candidate for this generation (filepath returned directly). BEST_FILE_THIS_GEN=$(python3 - "$SCORES_FILE" <<'PYEOF' import sys entries = [] with open(sys.argv[1]) as f: for line in f: parts = line.split() if len(parts) >= 3: entries.append((int(parts[1]), parts[2])) if not entries: sys.exit(1) print(max(entries, key=lambda x: x[0])[1]) PYEOF ) || fail "Could not determine best candidate from $SCORES_FILE" if [ "$MAX" -gt "$GLOBAL_BEST_FITNESS" ] || [ "$GLOBAL_BEST_FITNESS" -eq -1 ]; then GLOBAL_BEST_FITNESS="$MAX" GLOBAL_BEST_GEN="$gen" GLOBAL_BEST_CAND="$BEST_FILE_THIS_GEN" log " New global best: gen=$gen idx=$BEST_IDX_THIS_GEN fitness=$GLOBAL_BEST_FITNESS" fi # Skip next-generation creation after the final generation [ "$gen" -eq "$((GENERATIONS - 1))" ] && break # --- c. Tournament selection (k = population / 2) --- K=$((POPULATION / 2)) [ "$K" -lt 1 ] && K=1 SURVIVOR_FILES=() while IFS= read -r WIN_FILE; do SURVIVOR_FILES+=("$WIN_FILE") done < <(py_tournament "$K" "$SCORES_FILE") log " Selected ${#SURVIVOR_FILES[@]} survivors via tournament" # --- d. Generate next population --- NEXT_GEN_DIR="$WORK_DIR/gen_$((gen + 1))" mkdir -p "$NEXT_GEN_DIR" NEXT_IDX=0 HALF=$((POPULATION / 2)) # First half: mutate random survivors for _i in $(seq 1 $HALF); do SUR="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}" DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3" if MUTATED=$(run_mutate_cli mutate "$SUR" "$MUTATION_RATE" 2>/dev/null); then printf '%s\n' "$MUTATED" > "$DEST" printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops" else # Fallback: copy the survivor as-is to keep population size stable cp "$SUR" "$DEST" printf '0\n' > "${DEST%.push3}.ops" fi NEXT_IDX=$((NEXT_IDX + 1)) done # Second half: crossover random survivor pairs REMAINING=$((POPULATION - HALF)) for _i in $(seq 1 $REMAINING); do SUR_A="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}" SUR_B="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}" DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3" if CROSSED=$(run_mutate_cli crossover "$SUR_A" "$SUR_B" 2>/dev/null); then printf '%s\n' "$CROSSED" > "$DEST" printf '0\n' > "${DEST%.push3}.ops" else # Fallback: mutate one survivor if MUTATED=$(run_mutate_cli mutate "$SUR_A" "$MUTATION_RATE" 2>/dev/null); then printf '%s\n' "$MUTATED" > "$DEST" printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops" else cp "$SUR_A" "$DEST" printf '0\n' > "${DEST%.push3}.ops" fi fi NEXT_IDX=$((NEXT_IDX + 1)) done log " Generated ${NEXT_IDX} candidates for generation $((gen + 1))" CURRENT_GEN_DIR="$NEXT_GEN_DIR" done # ============================================================================= # Step 3 — Output best candidate # ============================================================================= if [ -z "$GLOBAL_BEST_CAND" ] || [ ! -f "$GLOBAL_BEST_CAND" ]; then fail "No valid best candidate recorded — evolution produced no scorable output" fi BEST_OUTPUT="$OUTPUT_DIR/best.push3" cp "$GLOBAL_BEST_CAND" "$BEST_OUTPUT" log "" log "Best candidate → $BEST_OUTPUT" log " Fitness: $GLOBAL_BEST_FITNESS (generation $GLOBAL_BEST_GEN)" # ============================================================================= # Step 4 — Diff: original vs evolved constants # ============================================================================= DIFF_OUTPUT="$OUTPUT_DIR/diff.txt" python3 - "$SEED" "$BEST_OUTPUT" > "$DIFF_OUTPUT" <<'PYEOF' import sys, re def extract_ints(path): """Extract all large integer literals (≥6 digits) from a Push3 file.""" text = open(path).read() text = re.sub(r';;[^\n]*', '', text) # strip comments return [int(m) for m in re.findall(r'\b(\d{6,})\b', text)] seed_path, best_path = sys.argv[1], sys.argv[2] orig = extract_ints(seed_path) best = extract_ints(best_path) print(f"=== Push3 Evolution Diff ===") print(f"Seed: {seed_path}") print(f"Best: {best_path}") print() changed = 0 for i, (o, b) in enumerate(zip(orig, best)): if o != b: pct = (b - o) / o * 100 if o != 0 else float('inf') print(f" const[{i:3d}]: {o:>25d} → {b:>25d} (Δ={b - o:+d}, {pct:+.2f}%)") changed += 1 if len(orig) != len(best): added = len(best) - len(orig) if added > 0: for i, val in enumerate(best[len(orig):]): print(f" const[{len(orig) + i:3d}]: {'(new)':>25s} → {val:>25d}") else: print(f" ({-added} constant(s) removed from end)") print() if changed == 0 and len(orig) == len(best): print("No constant changes — evolution applied structural mutations only.") else: total = min(len(orig), len(best)) print(f"Summary: {changed} of {total} constant(s) changed.") PYEOF log "Diff written to $DIFF_OUTPUT" log "" cat "$DIFF_OUTPUT" >&2 log "========================================================" log "Evolution complete." log " Generations run: $GENERATIONS" log " Best fitness: $GLOBAL_BEST_FITNESS" log " Best from gen: $GLOBAL_BEST_GEN" log " Output directory: $OUTPUT_DIR" log "========================================================"