fix: Push3 evolution: selection loop orchestrator (#546)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
b460e36bbc
commit
547e8beae8
3 changed files with 491 additions and 0 deletions
433
tools/push3-evolution/evolve.sh
Executable file
433
tools/push3-evolution/evolve.sh
Executable file
|
|
@ -0,0 +1,433 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# evolve.sh — Push3 evolution orchestrator
|
||||
#
|
||||
# Outer evolutionary loop: generate candidates → score → select → repeat.
|
||||
#
|
||||
# Usage:
|
||||
# ./tools/push3-evolution/evolve.sh \
|
||||
# --seed optimizer_v3.push3 \
|
||||
# --population 10 \
|
||||
# --generations 5 \
|
||||
# --mutation-rate 2 \
|
||||
# --output evolved/
|
||||
#
|
||||
# Algorithm:
|
||||
# 1. Initialize population: N copies of seed, each with M random mutations.
|
||||
# 2. For each generation:
|
||||
# a. Score all candidates via fitness.sh
|
||||
# b. Log generation stats (min/max/mean fitness, best candidate)
|
||||
# c. Select k survivors via tournament selection (k = population/2)
|
||||
# d. Generate next population: mutate survivors + crossover pairs
|
||||
# 3. Output best candidate as Push3 file.
|
||||
# 4. Show diff: original vs evolved (which constants changed, by how much).
|
||||
#
|
||||
# Output:
|
||||
# <output>/
|
||||
# generation_0.jsonl {candidate_id, fitness, mutations_applied}
|
||||
# generation_1.jsonl
|
||||
# ...
|
||||
# best.push3 highest-fitness program
|
||||
# diff.txt parameter changes vs original
|
||||
# evolution.log full run log
|
||||
#
|
||||
# Environment:
|
||||
# ANVIL_FORK_URL Passed through to fitness.sh when Anvil is not running.
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
FITNESS_SH="$SCRIPT_DIR/fitness.sh"
|
||||
MUTATE_CLI="$SCRIPT_DIR/mutate-cli.ts"
|
||||
|
||||
# =============================================================================
|
||||
# Argument parsing
|
||||
# =============================================================================
|
||||
|
||||
SEED=""
|
||||
POPULATION=10
|
||||
GENERATIONS=5
|
||||
MUTATION_RATE=2
|
||||
OUTPUT_DIR=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--seed) SEED="$2"; shift 2 ;;
|
||||
--population) POPULATION="$2"; shift 2 ;;
|
||||
--generations) GENERATIONS="$2"; shift 2 ;;
|
||||
--mutation-rate) MUTATION_RATE="$2"; shift 2 ;;
|
||||
--output) OUTPUT_DIR="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1" >&2; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$SEED" ]; then echo "Error: --seed required" >&2; exit 2; fi
|
||||
if [ -z "$OUTPUT_DIR" ]; then echo "Error: --output required" >&2; exit 2; fi
|
||||
if [ ! -f "$SEED" ]; then echo "Error: seed file not found: $SEED" >&2; exit 2; fi
|
||||
|
||||
# Validate numeric args
|
||||
for _name_val in "population:$POPULATION" "generations:$GENERATIONS" "mutation-rate:$MUTATION_RATE"; do
|
||||
_name="${_name_val%%:*}"
|
||||
_val="${_name_val##*:}"
|
||||
if ! [[ "$_val" =~ ^[0-9]+$ ]] || [ "$_val" -lt 1 ]; then
|
||||
echo "Error: --${_name} must be a positive integer (got: $_val)" >&2
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
|
||||
# Canonicalize paths
|
||||
SEED="$(cd "$(dirname "$SEED")" && pwd)/$(basename "$SEED")"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
OUTPUT_DIR="$(cd "$OUTPUT_DIR" && pwd)"
|
||||
LOG="$OUTPUT_DIR/evolution.log"
|
||||
|
||||
# =============================================================================
|
||||
# Helpers
|
||||
# =============================================================================
|
||||
|
||||
log() {
|
||||
local msg="[evolve] $*"
|
||||
echo "$msg" >&2
|
||||
echo "$msg" >> "$LOG"
|
||||
}
|
||||
|
||||
fail() {
|
||||
log "ERROR: $*"
|
||||
exit 2
|
||||
}
|
||||
|
||||
# Locate a tsx runner (TypeScript executor for mutate-cli.ts).
|
||||
# Tries: tsx in PATH → local node_modules → npx tsx.
|
||||
find_tsx_cmd() {
|
||||
if command -v tsx &>/dev/null; then
|
||||
echo "tsx"
|
||||
elif [ -x "$SCRIPT_DIR/node_modules/.bin/tsx" ]; then
|
||||
echo "$SCRIPT_DIR/node_modules/.bin/tsx"
|
||||
elif command -v npx &>/dev/null; then
|
||||
echo "npx tsx"
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Run the mutate-cli.ts with the given arguments.
|
||||
# All mutation operations run from SCRIPT_DIR so relative TS imports resolve.
|
||||
run_mutate_cli() {
|
||||
(cd "$SCRIPT_DIR" && $TSX_CMD "$MUTATE_CLI" "$@")
|
||||
}
|
||||
|
||||
# Integer min/max/mean via python3 (bash arithmetic overflows on wei values).
|
||||
py_stats() {
|
||||
# Args: space-separated integers on stdin as a Python list literal
|
||||
python3 - "$@" <<'PYEOF'
|
||||
import sys
|
||||
nums = [int(x) for x in sys.stdin.read().split()]
|
||||
if not nums:
|
||||
print("0 0 0")
|
||||
sys.exit(0)
|
||||
print(min(nums), max(nums), round(sum(nums) / len(nums)))
|
||||
PYEOF
|
||||
}
|
||||
|
||||
# Tournament selection: given a scores file (one "idx score" per line),
|
||||
# run T tournaments of size 2 and return winner indices (one per line).
|
||||
py_tournament() {
|
||||
local k="$1"
|
||||
local scores_file="$2"
|
||||
python3 - "$k" "$scores_file" <<'PYEOF'
|
||||
import sys, random
|
||||
k = int(sys.argv[1])
|
||||
entries = []
|
||||
with open(sys.argv[2]) as f:
|
||||
for line in f:
|
||||
idx, score = line.split()
|
||||
entries.append((int(idx), int(score)))
|
||||
if not entries:
|
||||
sys.exit(1)
|
||||
for _ in range(k):
|
||||
a = random.choice(entries)
|
||||
b = random.choice(entries)
|
||||
winner = a if a[1] >= b[1] else b
|
||||
print(winner[0])
|
||||
PYEOF
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Tool checks
|
||||
# =============================================================================
|
||||
|
||||
for _tool in python3 node; do
|
||||
command -v "$_tool" &>/dev/null || fail "$_tool not found in PATH"
|
||||
done
|
||||
|
||||
[ -f "$FITNESS_SH" ] || fail "fitness.sh not found at $FITNESS_SH"
|
||||
[ -f "$MUTATE_CLI" ] || fail "mutate-cli.ts not found at $MUTATE_CLI"
|
||||
[ -x "$FITNESS_SH" ] || chmod +x "$FITNESS_SH"
|
||||
|
||||
TSX_CMD="$(find_tsx_cmd)" || fail \
|
||||
"No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH."
|
||||
|
||||
# =============================================================================
|
||||
# Work directory — holds all candidate .push3 files across generations
|
||||
# =============================================================================
|
||||
|
||||
WORK_DIR="$(mktemp -d)"
|
||||
cleanup() { rm -rf "$WORK_DIR"; }
|
||||
trap cleanup EXIT
|
||||
|
||||
# =============================================================================
|
||||
# Log run header
|
||||
# =============================================================================
|
||||
|
||||
log "========================================================"
|
||||
log "Push3 Evolution — $(date -u '+%Y-%m-%dT%H:%M:%SZ')"
|
||||
log " Seed: $SEED"
|
||||
log " Population: $POPULATION"
|
||||
log " Generations: $GENERATIONS"
|
||||
log " Mutation rate: $MUTATION_RATE"
|
||||
log " Output: $OUTPUT_DIR"
|
||||
log " TSX: $TSX_CMD"
|
||||
log "========================================================"
|
||||
|
||||
# =============================================================================
|
||||
# Step 1 — Initialize generation 0
|
||||
#
|
||||
# N copies of the seed, each independently mutated MUTATION_RATE times.
|
||||
# =============================================================================
|
||||
|
||||
log ""
|
||||
log "=== Initializing population ==="
|
||||
|
||||
GEN_DIR="$WORK_DIR/gen_0"
|
||||
mkdir -p "$GEN_DIR"
|
||||
|
||||
for i in $(seq 0 $((POPULATION - 1))); do
|
||||
CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $i).push3"
|
||||
MUTATED=""
|
||||
MUTATED=$(run_mutate_cli mutate "$SEED" "$MUTATION_RATE") \
|
||||
|| fail "Failed to mutate seed for initial candidate $i"
|
||||
printf '%s\n' "$MUTATED" > "$CAND_FILE"
|
||||
done
|
||||
|
||||
log "Initialized ${POPULATION} candidates in gen_0"
|
||||
|
||||
# =============================================================================
|
||||
# Step 2 — Evolution loop
|
||||
# =============================================================================
|
||||
|
||||
GLOBAL_BEST_FITNESS=-1
|
||||
GLOBAL_BEST_GEN=-1
|
||||
GLOBAL_BEST_CAND=""
|
||||
|
||||
CURRENT_GEN_DIR="$GEN_DIR"
|
||||
|
||||
for gen in $(seq 0 $((GENERATIONS - 1))); do
|
||||
|
||||
log ""
|
||||
log "=== Generation $((gen + 1)) / $GENERATIONS ==="
|
||||
|
||||
JSONL_FILE="$OUTPUT_DIR/generation_${gen}.jsonl"
|
||||
SCORES_FILE="$WORK_DIR/scores_gen_${gen}.txt"
|
||||
|
||||
# --- a. Score all candidates ---
|
||||
|
||||
SCORE_VALUES=""
|
||||
CAND_COUNT=0
|
||||
|
||||
for CAND_FILE in "$CURRENT_GEN_DIR"/candidate_*.push3; do
|
||||
[ -f "$CAND_FILE" ] || continue
|
||||
|
||||
CAND_IDX="${CAND_FILE##*candidate_}"
|
||||
CAND_IDX="${CAND_IDX%.push3}"
|
||||
CID="gen${gen}_c${CAND_IDX}"
|
||||
|
||||
SCORE=0
|
||||
FITNESS_EC=0
|
||||
SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$?
|
||||
|
||||
if [ "$FITNESS_EC" -ne 0 ]; then
|
||||
log " $CID: invalid candidate (fitness.sh exit $FITNESS_EC), score=0"
|
||||
SCORE=0
|
||||
else
|
||||
log " $CID: fitness=$SCORE"
|
||||
fi
|
||||
|
||||
# Append to JSONL
|
||||
printf '{"candidate_id":"%s","fitness":%s,"mutations_applied":%d}\n' \
|
||||
"$CID" "$SCORE" "$MUTATION_RATE" >> "$JSONL_FILE"
|
||||
|
||||
# Record index→score for selection
|
||||
echo "$CAND_COUNT $SCORE" >> "$SCORES_FILE"
|
||||
|
||||
SCORE_VALUES="$SCORE_VALUES $SCORE"
|
||||
CAND_COUNT=$((CAND_COUNT + 1))
|
||||
done
|
||||
|
||||
if [ "$CAND_COUNT" -eq 0 ]; then
|
||||
fail "No candidates found in $CURRENT_GEN_DIR"
|
||||
fi
|
||||
|
||||
# --- b. Log generation stats ---
|
||||
|
||||
read -r MIN MAX MEAN < <(printf '%s' "$SCORE_VALUES" | py_stats)
|
||||
log " Stats: min=$MIN max=$MAX mean=$MEAN candidates=$CAND_COUNT"
|
||||
|
||||
# Find best candidate index for this generation
|
||||
BEST_IDX_THIS_GEN=$(python3 -c "
|
||||
import sys
|
||||
entries = []
|
||||
with open('$SCORES_FILE') as f:
|
||||
for line in f:
|
||||
idx, score = line.split()
|
||||
entries.append((int(idx), int(score)))
|
||||
best = max(entries, key=lambda x: x[1])
|
||||
print(best[0])
|
||||
")
|
||||
|
||||
BEST_FILE_THIS_GEN=$(ls "$CURRENT_GEN_DIR"/candidate_*.push3 \
|
||||
| sort | awk "NR==$((BEST_IDX_THIS_GEN + 1))")
|
||||
|
||||
if [ "$MAX" -gt "$GLOBAL_BEST_FITNESS" ] || [ "$GLOBAL_BEST_FITNESS" -eq -1 ]; then
|
||||
GLOBAL_BEST_FITNESS="$MAX"
|
||||
GLOBAL_BEST_GEN="$gen"
|
||||
GLOBAL_BEST_CAND="$BEST_FILE_THIS_GEN"
|
||||
log " New global best: gen=$gen idx=$BEST_IDX_THIS_GEN fitness=$GLOBAL_BEST_FITNESS"
|
||||
fi
|
||||
|
||||
# Skip next-generation creation after the final generation
|
||||
[ "$gen" -eq "$((GENERATIONS - 1))" ] && break
|
||||
|
||||
# --- c. Tournament selection (k = population / 2) ---
|
||||
|
||||
K=$((POPULATION / 2))
|
||||
[ "$K" -lt 1 ] && K=1
|
||||
|
||||
SURVIVOR_FILES=()
|
||||
while IFS= read -r WIN_IDX; do
|
||||
WIN_FILE=$(ls "$CURRENT_GEN_DIR"/candidate_*.push3 \
|
||||
| sort | awk "NR==$((WIN_IDX + 1))")
|
||||
SURVIVOR_FILES+=("$WIN_FILE")
|
||||
done < <(py_tournament "$K" "$SCORES_FILE")
|
||||
|
||||
log " Selected ${#SURVIVOR_FILES[@]} survivors via tournament"
|
||||
|
||||
# --- d. Generate next population ---
|
||||
|
||||
NEXT_GEN_DIR="$WORK_DIR/gen_$((gen + 1))"
|
||||
mkdir -p "$NEXT_GEN_DIR"
|
||||
|
||||
NEXT_IDX=0
|
||||
HALF=$((POPULATION / 2))
|
||||
|
||||
# First half: mutate random survivors
|
||||
for _ in $(seq 1 $HALF); do
|
||||
SUR="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
|
||||
DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
|
||||
MUTATED=""
|
||||
if MUTATED=$(run_mutate_cli mutate "$SUR" "$MUTATION_RATE" 2>/dev/null); then
|
||||
printf '%s\n' "$MUTATED" > "$DEST"
|
||||
else
|
||||
# Fallback: copy the survivor as-is to keep population size stable
|
||||
cp "$SUR" "$DEST"
|
||||
fi
|
||||
NEXT_IDX=$((NEXT_IDX + 1))
|
||||
done
|
||||
|
||||
# Second half: crossover random survivor pairs
|
||||
REMAINING=$((POPULATION - HALF))
|
||||
for _ in $(seq 1 $REMAINING); do
|
||||
SUR_A="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
|
||||
SUR_B="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
|
||||
DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
|
||||
CROSSED=""
|
||||
if CROSSED=$(run_mutate_cli crossover "$SUR_A" "$SUR_B" 2>/dev/null); then
|
||||
printf '%s\n' "$CROSSED" > "$DEST"
|
||||
else
|
||||
# Fallback: mutate one survivor
|
||||
if MUTATED=$(run_mutate_cli mutate "$SUR_A" "$MUTATION_RATE" 2>/dev/null); then
|
||||
printf '%s\n' "$MUTATED" > "$DEST"
|
||||
else
|
||||
cp "$SUR_A" "$DEST"
|
||||
fi
|
||||
fi
|
||||
NEXT_IDX=$((NEXT_IDX + 1))
|
||||
done
|
||||
|
||||
log " Generated ${NEXT_IDX} candidates for generation $((gen + 1))"
|
||||
CURRENT_GEN_DIR="$NEXT_GEN_DIR"
|
||||
|
||||
done
|
||||
|
||||
# =============================================================================
|
||||
# Step 3 — Output best candidate
|
||||
# =============================================================================
|
||||
|
||||
if [ -z "$GLOBAL_BEST_CAND" ] || [ ! -f "$GLOBAL_BEST_CAND" ]; then
|
||||
fail "No valid best candidate recorded — evolution produced no scorable output"
|
||||
fi
|
||||
|
||||
BEST_OUTPUT="$OUTPUT_DIR/best.push3"
|
||||
cp "$GLOBAL_BEST_CAND" "$BEST_OUTPUT"
|
||||
log ""
|
||||
log "Best candidate → $BEST_OUTPUT"
|
||||
log " Fitness: $GLOBAL_BEST_FITNESS (generation $GLOBAL_BEST_GEN)"
|
||||
|
||||
# =============================================================================
|
||||
# Step 4 — Diff: original vs evolved constants
|
||||
# =============================================================================
|
||||
|
||||
DIFF_OUTPUT="$OUTPUT_DIR/diff.txt"
|
||||
|
||||
python3 - "$SEED" "$BEST_OUTPUT" > "$DIFF_OUTPUT" <<'PYEOF'
|
||||
import sys, re
|
||||
|
||||
def extract_ints(path):
|
||||
"""Extract all large integer literals (≥6 digits) from a Push3 file."""
|
||||
text = open(path).read()
|
||||
text = re.sub(r';;[^\n]*', '', text) # strip comments
|
||||
return [int(m) for m in re.findall(r'\b(\d{6,})\b', text)]
|
||||
|
||||
seed_path, best_path = sys.argv[1], sys.argv[2]
|
||||
orig = extract_ints(seed_path)
|
||||
best = extract_ints(best_path)
|
||||
|
||||
print(f"=== Push3 Evolution Diff ===")
|
||||
print(f"Seed: {seed_path}")
|
||||
print(f"Best: {best_path}")
|
||||
print()
|
||||
|
||||
changed = 0
|
||||
for i, (o, b) in enumerate(zip(orig, best)):
|
||||
if o != b:
|
||||
pct = (b - o) / o * 100 if o != 0 else float('inf')
|
||||
print(f" const[{i:3d}]: {o:>25d} → {b:>25d} (Δ={b - o:+d}, {pct:+.2f}%)")
|
||||
changed += 1
|
||||
|
||||
if len(orig) != len(best):
|
||||
added = len(best) - len(orig)
|
||||
if added > 0:
|
||||
for i, val in enumerate(best[len(orig):]):
|
||||
print(f" const[{len(orig) + i:3d}]: {'(new)':>25s} → {val:>25d}")
|
||||
else:
|
||||
print(f" ({-added} constant(s) removed from end)")
|
||||
|
||||
print()
|
||||
if changed == 0 and len(orig) == len(best):
|
||||
print("No constant changes — evolution applied structural mutations only.")
|
||||
else:
|
||||
total = min(len(orig), len(best))
|
||||
print(f"Summary: {changed} of {total} constant(s) changed.")
|
||||
PYEOF
|
||||
|
||||
log "Diff written to $DIFF_OUTPUT"
|
||||
log ""
|
||||
cat "$DIFF_OUTPUT" >&2
|
||||
|
||||
log "========================================================"
|
||||
log "Evolution complete."
|
||||
log " Generations run: $GENERATIONS"
|
||||
log " Best fitness: $GLOBAL_BEST_FITNESS"
|
||||
log " Best from gen: $GLOBAL_BEST_GEN"
|
||||
log " Output directory: $OUTPUT_DIR"
|
||||
log "========================================================"
|
||||
Loading…
Add table
Add a link
Reference in a new issue