harb/tools/push3-evolution/evolve.sh
openhands 6694b2daa8 fix: CID format change silently drops historical generation JSONL on re-admission (#757)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-15 04:27:38 +00:00

945 lines
34 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# =============================================================================
# evolve.sh — Push3 evolution orchestrator
#
# Outer evolutionary loop: generate candidates → score → select → repeat.
#
# Usage:
# ./tools/push3-evolution/evolve.sh \
# --seed optimizer_v3.push3 \
# --population 10 \
# --generations 5 \
# --mutation-rate 2 \
# --elites 2 \
# [--output evolved/] \
# [--diverse-seeds] \
# [--run-id <N>]
#
# --diverse-seeds Initialise gen_0 with diverse candidates. When the
# persistent seeds pool (tools/push3-evolution/seeds/) is
# non-empty, a random sample from the pool is used (crossover
# between hand-written and evolved programs). When the pool is
# empty, falls back to the parametric seed-gen-cli variants.
# Any shortfall (pool or variants < --population) is filled by
# mutating the main seed.
#
# --run-id <N> Integer identifier for this run, used to name candidates
# admitted to the seeds pool (e.g. run003_gen2_c005.push3).
# Auto-incremented from the highest existing run in the pool
# manifest when omitted.
#
# Algorithm:
# 1. Initialize population: N copies of seed, each with M random mutations.
# 2. For each generation:
# a. Score all candidates via the configured fitness backend
# (batch-eval.sh for revm [default], fitness.sh for anvil)
# b. Log generation stats (min/max/mean fitness, best candidate)
# c. Select k survivors via tournament selection (k = population/2)
# d. Elitism: copy top N candidates unchanged into next generation
# e. Generate next population: mutate survivors + crossover pairs
# 3. Output best candidate as Push3 file.
# 4. Show diff: original vs evolved (which constants changed, by how much).
#
# Output:
# <output>/run_NNN/ NNN auto-increments from the highest existing run dir
# generation_0.jsonl {candidate_id, fitness, mutations_applied}
# generation_1.jsonl
# ...
# best.push3 highest-fitness program
# diff.txt parameter changes vs original
# evolution.log full run log
#
# Environment:
# BASE_RPC_URL Required when EVAL_MODE=revm (the default). Base network RPC endpoint.
# ANVIL_FORK_URL Passed through to fitness.sh when EVAL_MODE=anvil.
#
# TSX resolution order: tsx in PATH → node_modules/.bin/tsx → npx tsx.
# =============================================================================
set -euo pipefail
# Foundry tools (forge, cast, anvil)
export PATH="${HOME}/.foundry/bin:${PATH}"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
FITNESS_SH="$SCRIPT_DIR/fitness.sh"
BATCH_EVAL_SH="$SCRIPT_DIR/revm-evaluator/batch-eval.sh"
MUTATE_CLI="$SCRIPT_DIR/mutate-cli.ts"
SEED_GEN_CLI="$SCRIPT_DIR/seed-gen-cli.ts"
# EVAL_MODE controls which fitness backend is used:
# revm (default) — in-process revm via FitnessEvaluator.t.sol (batch-eval.sh)
# Requires BASE_RPC_URL env var. 10-100× faster at scale.
# anvil — per-candidate Anvil+forge-script pipeline (fitness.sh)
EVAL_MODE="${EVAL_MODE:-revm}"
# =============================================================================
# Argument parsing
# =============================================================================
SEED=""
POPULATION=10
GENERATIONS=5
MUTATION_RATE=2
ELITES=2
OUTPUT_DIR="evolved"
DIVERSE_SEEDS=false
RUN_ID=""
while [[ $# -gt 0 ]]; do
case $1 in
--seed) SEED="$2"; shift 2 ;;
--population) POPULATION="$2"; shift 2 ;;
--generations) GENERATIONS="$2"; shift 2 ;;
--mutation-rate) MUTATION_RATE="$2"; shift 2 ;;
--elites) ELITES="$2"; shift 2 ;;
--output) OUTPUT_DIR="$2"; shift 2 ;;
--diverse-seeds) DIVERSE_SEEDS=true; shift ;;
--run-id) RUN_ID="$2"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 2 ;;
esac
done
if [ -z "$SEED" ]; then echo "Error: --seed required" >&2; exit 2; fi
if [ ! -f "$SEED" ]; then echo "Error: seed file not found: $SEED" >&2; exit 2; fi
# Validate numeric args
for _name_val in "population:$POPULATION" "generations:$GENERATIONS" "mutation-rate:$MUTATION_RATE"; do
_name="${_name_val%%:*}"
_val="${_name_val##*:}"
if ! [[ "$_val" =~ ^[0-9]+$ ]] || [ "$_val" -lt 1 ]; then
echo "Error: --${_name} must be a positive integer (got: $_val)" >&2
exit 2
fi
done
if ! [[ "$ELITES" =~ ^[0-9]+$ ]]; then
echo "Error: --elites must be a non-negative integer (got: $ELITES)" >&2
exit 2
fi
# Canonicalize paths
SEED="$(cd "$(dirname "$SEED")" && pwd)/$(basename "$SEED")"
# Resolve base output dir and create run_NNN subdirectory
mkdir -p "$OUTPUT_DIR"
BASE_DIR="$(cd "$OUTPUT_DIR" && pwd)"
# Auto-increment: find the highest run_NNN directory under BASE_DIR and add 1
RUN_NUM=$(python3 - "$BASE_DIR" <<'PYEOF'
import sys, os, re
base = sys.argv[1]
max_n = 0
if os.path.isdir(base):
for name in os.listdir(base):
m = re.fullmatch(r'run_(\d+)', name)
if m and os.path.isdir(os.path.join(base, name)):
max_n = max(max_n, int(m.group(1)))
print(f"{max_n + 1:03d}")
PYEOF
)
OUTPUT_DIR="$BASE_DIR/run_${RUN_NUM}"
mkdir -p "$OUTPUT_DIR"
LOG="$OUTPUT_DIR/evolution.log"
# Seeds pool — persistent candidate pool across all runs
# manifest.jsonl schema: tools/push3-evolution/seeds/manifest.schema.json
SEEDS_DIR="$SCRIPT_DIR/seeds"
POOL_MANIFEST="$SEEDS_DIR/manifest.jsonl"
ADMISSION_THRESHOLD=6000000000000000000000 # 6e21 wei
# Validate/auto-compute RUN_ID
if [ -n "$RUN_ID" ]; then
if ! [[ "$RUN_ID" =~ ^[0-9]+$ ]] || [ "$RUN_ID" -lt 1 ]; then
echo "Error: --run-id must be a positive integer (got: $RUN_ID)" >&2
exit 2
fi
RUN_ID=$(printf '%03d' "$RUN_ID")
else
# Auto-increment: find the highest run ID in the manifest and add 1
if [ -f "$POOL_MANIFEST" ]; then
RUN_ID=$(python3 - "$POOL_MANIFEST" <<'PYEOF'
import json, sys
max_run = 0
with open(sys.argv[1]) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
d = json.loads(line)
r = d.get("run")
if r is not None:
max_run = max(max_run, int(r))
except (json.JSONDecodeError, ValueError, TypeError):
pass
print(f"{max_run + 1:03d}")
PYEOF
) || RUN_ID="001"
else
RUN_ID="001"
fi
fi
# =============================================================================
# Helpers
# =============================================================================
log() {
local msg="[evolve] $*"
echo "$msg" >&2
echo "$msg" >> "$LOG"
}
fail() {
log "ERROR: $*"
exit 2
}
# Locate a tsx runner (TypeScript executor for mutate-cli.ts).
# Tries: tsx in PATH → local node_modules → npx tsx.
find_tsx_cmd() {
if command -v tsx &>/dev/null; then
echo "tsx"
elif [ -x "$SCRIPT_DIR/node_modules/.bin/tsx" ]; then
echo "$SCRIPT_DIR/node_modules/.bin/tsx"
elif command -v npx &>/dev/null; then
echo "npx tsx"
else
return 1
fi
}
# Run the mutate-cli.ts with the given arguments.
# All mutation operations run from SCRIPT_DIR so relative TS imports resolve.
run_mutate_cli() {
(cd "$SCRIPT_DIR" && $TSX_CMD "$MUTATE_CLI" "$@")
}
# Run the seed-gen-cli.ts with the given arguments.
run_seed_gen_cli() {
(cd "$SCRIPT_DIR" && $TSX_CMD "$SEED_GEN_CLI" "$@")
}
# Integer min/max/mean via python3 (bash arithmetic overflows on wei values).
py_stats() {
python3 -c "import sys; nums = [int(x) for x in sys.stdin.read().split()]; print(min(nums) if nums else 0, max(nums) if nums else 0, round(sum(nums)/len(nums)) if nums else 0)"
}
# Top-N selection: return filepaths of the N highest-scoring candidates (descending).
py_top_n() {
local n="$1"
local scores_file="$2"
python3 - "$n" "$scores_file" <<'PYEOF'
import sys
n = int(sys.argv[1])
entries = []
with open(sys.argv[2]) as f:
for line in f:
parts = line.rstrip('\n').split('\t')
if len(parts) >= 3:
entries.append((int(parts[1]), parts[2]))
entries.sort(key=lambda x: x[0], reverse=True)
for _, path in entries[:n]:
print(path)
PYEOF
}
# Tournament selection: given a scores file (one "idx score filepath" per line),
# run k tournaments of size 2 and return winner filepaths (one per line).
py_tournament() {
local k="$1"
local scores_file="$2"
python3 - "$k" "$scores_file" <<'PYEOF'
import sys, random
k = int(sys.argv[1])
entries = []
with open(sys.argv[2]) as f:
for line in f:
parts = line.rstrip('\n').split('\t')
if len(parts) >= 3:
entries.append((int(parts[0]), int(parts[1]), parts[2]))
if not entries:
sys.exit(1)
for _ in range(k):
a = random.choice(entries)
b = random.choice(entries)
winner = a if a[1] >= b[1] else b
print(winner[2])
PYEOF
}
# =============================================================================
# Tool checks
# =============================================================================
for _tool in python3 node; do
command -v "$_tool" &>/dev/null || fail "$_tool not found in PATH"
done
[ -f "$FITNESS_SH" ] || fail "fitness.sh not found at $FITNESS_SH"
[ -f "$MUTATE_CLI" ] || fail "mutate-cli.ts not found at $MUTATE_CLI"
[ -x "$FITNESS_SH" ] || chmod +x "$FITNESS_SH"
if [ "$EVAL_MODE" = "revm" ]; then
[ -f "$BATCH_EVAL_SH" ] || fail "batch-eval.sh not found at $BATCH_EVAL_SH"
[ -x "$BATCH_EVAL_SH" ] || chmod +x "$BATCH_EVAL_SH"
[ -n "${BASE_RPC_URL:-}" ] || fail "EVAL_MODE=revm requires BASE_RPC_URL env var (Base network RPC)"
command -v forge &>/dev/null || fail "forge not found in PATH (required for EVAL_MODE=revm)"
elif [ "$EVAL_MODE" != "anvil" ]; then
fail "Unknown EVAL_MODE '$EVAL_MODE' — must be 'anvil' or 'revm'"
fi
TSX_CMD="$(find_tsx_cmd)" || fail \
"No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH."
# =============================================================================
# Work directory — holds all candidate .push3 files across generations
#
# Fixed path under BASE_DIR so stale /tmp/tmp.* dirs from killed runs can
# never interfere with a new run. Cleaned at startup; run outputs are
# already preserved in BASE_DIR/run_NNN/ before this directory is wiped.
# =============================================================================
WORK_DIR="$BASE_DIR/.work"
rm -rf "$WORK_DIR"
mkdir -p "$WORK_DIR"
# =============================================================================
# Log run header
# =============================================================================
log "========================================================"
log "Push3 Evolution — $(date -u '+%Y-%m-%dT%H:%M:%SZ')"
log " Seed: $SEED"
log " Population: $POPULATION"
log " Generations: $GENERATIONS"
log " Mutation rate: $MUTATION_RATE"
log " Elites: $ELITES"
log " Diverse seeds: $DIVERSE_SEEDS"
log " Run ID: $RUN_ID"
log " Base dir: $BASE_DIR"
log " Output: $OUTPUT_DIR"
log " TSX: $TSX_CMD"
log " Eval mode: $EVAL_MODE"
log "========================================================"
# =============================================================================
# Step 1 — Initialize generation 0
#
# N copies of the seed, each independently mutated MUTATION_RATE times.
# =============================================================================
log ""
log "=== Initializing population ==="
GEN_DIR="$WORK_DIR/gen_0"
mkdir -p "$GEN_DIR"
if [ "$DIVERSE_SEEDS" = "true" ]; then
# --- Diverse-seeds mode: prefer persistent pool; fall back to seed-gen-cli ---
VARIANT_IDX=0
# Build a random sample list from the pool in one pass (also determines if
# the pool has any usable entries, avoiding a second manifest parse).
#
# NOTE: seeds with fitness: null (e.g. llm-origin entries not yet evaluated)
# are included in the sample with equal probability to any other seed — the
# pool sampler does a flat random shuffle and does not read or weight by
# fitness at all. Run evaluate-seeds.sh to score them and write real fitness
# values back to manifest.jsonl.
POOL_SAMPLE_LIST="$WORK_DIR/pool_sample.txt"
POOL_COUNT=0
if [ -f "$POOL_MANIFEST" ]; then
python3 - "$POOL_MANIFEST" "$SEEDS_DIR" "$POPULATION" > "$POOL_SAMPLE_LIST" <<'PYEOF'
import json, sys, os, random
manifest_path, seeds_dir, n = sys.argv[1], sys.argv[2], int(sys.argv[3])
entries = []
with open(manifest_path) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
d = json.loads(line)
fpath = os.path.join(seeds_dir, d.get('file', ''))
if os.path.exists(fpath):
entries.append(fpath)
except json.JSONDecodeError:
pass
random.shuffle(entries)
for path in entries[:n]:
print(path)
PYEOF
POOL_COUNT=$(wc -l < "$POOL_SAMPLE_LIST" 2>/dev/null || echo 0)
fi
if [ "$POOL_COUNT" -gt 0 ]; then
# --- Pool mode: random sample from the seeds pool ---
log " diverse-seeds: sampling up to $POPULATION candidates from pool ($POOL_COUNT available)"
while IFS= read -r POOL_FILE && [ "$VARIANT_IDX" -lt "$POPULATION" ]; do
CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $VARIANT_IDX).push3"
cp "$POOL_FILE" "$CAND_FILE"
printf '0\n' > "${CAND_FILE%.push3}.ops"
VARIANT_IDX=$((VARIANT_IDX + 1))
done < "$POOL_SAMPLE_LIST"
log " diverse-seeds: seeded $VARIANT_IDX candidate(s) from pool"
else
# --- Fallback: parametric variants from seed-gen-cli (pool is empty) ---
log " diverse-seeds: pool empty, falling back to seed-gen-cli parametric variants"
[ -f "$SEED_GEN_CLI" ] || fail "seed-gen-cli.ts not found at $SEED_GEN_CLI"
SEED_VARIANTS_DIR="$WORK_DIR/seed_variants"
SEED_VARIANTS_LIST="$WORK_DIR/seed_variants_list.txt"
# Run seed-gen-cli as a direct command (not inside <(...)) so its exit code is
# checked by the parent shell and fail() aborts the entire script on error.
# Stderr goes to the log file for diagnostics rather than being discarded.
run_seed_gen_cli --count "$POPULATION" --output-dir "$SEED_VARIANTS_DIR" \
> "$SEED_VARIANTS_LIST" 2>>"$LOG" \
|| fail "seed-gen-cli.ts failed to generate variants"
while IFS= read -r VARIANT_FILE && [ "$VARIANT_IDX" -lt "$POPULATION" ]; do
CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $VARIANT_IDX).push3"
cp "$VARIANT_FILE" "$CAND_FILE"
printf '0\n' > "${CAND_FILE%.push3}.ops"
VARIANT_IDX=$((VARIANT_IDX + 1))
done < "$SEED_VARIANTS_LIST"
fi
# Fill any remaining slots with mutations of the seed
while [ "$VARIANT_IDX" -lt "$POPULATION" ]; do
CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $VARIANT_IDX).push3"
MUTATED=$(run_mutate_cli mutate "$SEED" "$MUTATION_RATE") \
|| fail "Failed to mutate seed for fallback candidate $VARIANT_IDX"
printf '%s\n' "$MUTATED" > "$CAND_FILE"
printf '%d\n' "$MUTATION_RATE" > "${CAND_FILE%.push3}.ops"
VARIANT_IDX=$((VARIANT_IDX + 1))
done
log "Initialized ${POPULATION} candidates in gen_0 (diverse-seeds, pool=$POOL_COUNT)"
else
# --- Default mode: N copies of the seed, each independently mutated ---
for i in $(seq 0 $((POPULATION - 1))); do
CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $i).push3"
MUTATED=$(run_mutate_cli mutate "$SEED" "$MUTATION_RATE") \
|| fail "Failed to mutate seed for initial candidate $i"
printf '%s\n' "$MUTATED" > "$CAND_FILE"
printf '%d\n' "$MUTATION_RATE" > "${CAND_FILE%.push3}.ops"
done
log "Initialized ${POPULATION} candidates in gen_0"
fi
# =============================================================================
# Step 2 — Evolution loop
# =============================================================================
GLOBAL_BEST_FITNESS=-1
GLOBAL_BEST_GEN=-1
GLOBAL_BEST_CAND=""
CURRENT_GEN_DIR="$GEN_DIR"
for gen in $(seq 0 $((GENERATIONS - 1))); do
log ""
log "=== Generation $((gen + 1)) / $GENERATIONS ==="
JSONL_FILE="$OUTPUT_DIR/generation_${gen}.jsonl"
SCORES_FILE="$WORK_DIR/scores_gen_${gen}.txt"
# --- a. Score all candidates ---
SCORE_VALUES=""
CAND_COUNT=0
# In revm mode, batch-score all candidates in one forge test invocation before
# the per-candidate loop. Scores are written to a temp JSONL file that the
# loop reads with a fast Python lookup.
BATCH_SCORES_FILE="$WORK_DIR/batch_scores_gen_${gen}.jsonl"
if [ "$EVAL_MODE" = "revm" ]; then
declare -a _BATCH_FILES=()
for _CF in "$CURRENT_GEN_DIR"/candidate_*.push3; do
[ -f "$_CF" ] && _BATCH_FILES+=("$_CF")
done
if [ "${#_BATCH_FILES[@]}" -gt 0 ]; then
BATCH_EC=0
BATCH_EVAL_LOG="$WORK_DIR/batch-eval-gen-${gen}.log"
bash "$BATCH_EVAL_SH" "${_BATCH_FILES[@]}" > "$BATCH_SCORES_FILE" 2>"$BATCH_EVAL_LOG" \
|| BATCH_EC=$?
if [ "$BATCH_EC" -ne 0 ]; then
echo "--- last 20 lines of $BATCH_EVAL_LOG ---" >&2
tail -n 20 "$BATCH_EVAL_LOG" >&2
echo "--- end batch-eval.log ---" >&2
fi
if [ "$BATCH_EC" -eq 2 ]; then
fail "batch-eval.sh reported an infrastructure error (exit 2) — aborting evolution"
fi
log " revm batch scoring complete (exit $BATCH_EC)"
fi
fi
for CAND_FILE in "$CURRENT_GEN_DIR"/candidate_*.push3; do
[ -f "$CAND_FILE" ] || continue
CAND_IDX="${CAND_FILE##*candidate_}"
CAND_IDX="${CAND_IDX%.push3}"
# Canonical candidate_id format: "candidate_XXX" (matches source filename and batch-eval IDs).
CID="candidate_${CAND_IDX}"
# Read mutations_applied from sidecar; default 0 if missing.
OPS_FILE="${CAND_FILE%.push3}.ops"
MUTATIONS_APPLIED=0
[ -f "$OPS_FILE" ] && MUTATIONS_APPLIED=$(cat "$OPS_FILE")
SCORE=0
FITNESS_EC=0
if [ "$EVAL_MODE" = "revm" ] && [ -f "$BATCH_SCORES_FILE" ]; then
# Look up pre-computed score from batch-eval.sh output.
# batch-eval uses filename as candidate_id (e.g. "candidate_000")
BATCH_CID="$(basename "$CAND_FILE" .push3)"
SCORE=$(python3 - "$BATCH_CID" "$BATCH_SCORES_FILE" <<'PYEOF'
import json, sys
cid = sys.argv[1]
with open(sys.argv[2]) as f:
for line in f:
try:
d = json.loads(line)
if d.get("candidate_id") == cid:
print(d["fitness"])
sys.exit(0)
except (json.JSONDecodeError, KeyError):
pass
print(0)
PYEOF
) || SCORE=0
else
# Anvil mode (or revm fallback): score candidate individually.
SCORE=$(bash "$FITNESS_SH" "$CAND_FILE") || FITNESS_EC=$?
# Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
if [ "$FITNESS_EC" -eq 2 ]; then
fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
fi
fi
# Validate that score is a non-negative integer; treat any other output as invalid.
if ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then
log " $CID: invalid/missing score, using 0"
SCORE=0
else
log " $CID: fitness=$SCORE"
fi
# Append to JSONL — use the actual operations recorded for this candidate.
printf '{"candidate_id":"%s","fitness":%s,"mutations_applied":%d}\n' \
"$CID" "$SCORE" "$MUTATIONS_APPLIED" >> "$JSONL_FILE"
# Record index, score, and filepath for selection (tab-delimited so paths with spaces are safe).
printf '%d\t%s\t%s\n' "$CAND_COUNT" "$SCORE" "$CAND_FILE" >> "$SCORES_FILE"
SCORE_VALUES="$SCORE_VALUES $SCORE"
CAND_COUNT=$((CAND_COUNT + 1))
done
if [ "$CAND_COUNT" -eq 0 ]; then
fail "No candidates found in $CURRENT_GEN_DIR"
fi
# --- b. Log generation stats ---
read -r MIN MAX MEAN < <(printf '%s' "$SCORE_VALUES" | py_stats)
log " Stats: min=$MIN max=$MAX mean=$MEAN candidates=$CAND_COUNT"
# Find best candidate for this generation (filepath returned directly).
BEST_FILE_THIS_GEN=$(python3 - "$SCORES_FILE" <<'PYEOF'
import sys
entries = []
with open(sys.argv[1]) as f:
for line in f:
parts = line.rstrip('\n').split('\t')
if len(parts) >= 3:
entries.append((int(parts[1]), parts[2]))
if not entries:
sys.exit(1)
print(max(entries, key=lambda x: x[0])[1])
PYEOF
) || fail "Could not determine best candidate from $SCORES_FILE"
if python3 -c "import sys; sys.exit(0 if int(sys.argv[1]) > int(sys.argv[2]) else 1)" "$MAX" "$GLOBAL_BEST_FITNESS" || [ "$GLOBAL_BEST_FITNESS" = "-1" ]; then
GLOBAL_BEST_FITNESS="$MAX"
GLOBAL_BEST_GEN="$gen"
GLOBAL_BEST_CAND="$BEST_FILE_THIS_GEN"
log " New global best: gen=$gen fitness=$GLOBAL_BEST_FITNESS file=$(basename "$BEST_FILE_THIS_GEN")"
fi
# Skip next-generation creation after the final generation
[ "$gen" -eq "$((GENERATIONS - 1))" ] && break
# --- c. Tournament selection (k = population / 2) ---
K=$((POPULATION / 2))
[ "$K" -lt 1 ] && K=1
SURVIVOR_FILES=()
while IFS= read -r WIN_FILE; do
SURVIVOR_FILES+=("$WIN_FILE")
done < <(py_tournament "$K" "$SCORES_FILE")
log " Selected ${#SURVIVOR_FILES[@]} survivors via tournament"
# --- d/e. Generate next population (elitism + offspring) ---
NEXT_GEN_DIR="$WORK_DIR/gen_$((gen + 1))"
mkdir -p "$NEXT_GEN_DIR"
NEXT_IDX=0
# --- d. Elitism: copy top ELITES candidates unchanged ---
if [ "$ELITES" -gt 0 ]; then
ELITE_FILES=()
while IFS= read -r ELITE_FILE; do
[ -f "$ELITE_FILE" ] && ELITE_FILES+=("$ELITE_FILE")
done < <(py_top_n "$ELITES" "$SCORES_FILE")
for ELITE_FILE in "${ELITE_FILES[@]}"; do
DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
cp "$ELITE_FILE" "$DEST"
printf '0\n' > "${DEST%.push3}.ops"
NEXT_IDX=$((NEXT_IDX + 1))
done
log " Elitism: carried over ${#ELITE_FILES[@]} top candidate(s) unchanged"
fi
# --- e. Fill remaining slots with mutation and crossover offspring ---
NON_ELITE=$((POPULATION - NEXT_IDX))
HALF=$((NON_ELITE / 2))
# First half of remaining: mutate random survivors
for _i in $(seq 1 $HALF); do
SUR="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
if MUTATED=$(run_mutate_cli mutate "$SUR" "$MUTATION_RATE" 2>/dev/null); then
printf '%s\n' "$MUTATED" > "$DEST"
printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops"
else
# Fallback: copy the survivor as-is to keep population size stable
cp "$SUR" "$DEST"
printf '0\n' > "${DEST%.push3}.ops"
fi
NEXT_IDX=$((NEXT_IDX + 1))
done
# Second half of remaining: crossover random survivor pairs
REMAINING=$((POPULATION - NEXT_IDX))
for _i in $(seq 1 $REMAINING); do
SUR_A="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
SUR_B="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
if CROSSED=$(run_mutate_cli crossover "$SUR_A" "$SUR_B" 2>/dev/null); then
printf '%s\n' "$CROSSED" > "$DEST"
printf '0\n' > "${DEST%.push3}.ops"
else
# Fallback: mutate one survivor
if MUTATED=$(run_mutate_cli mutate "$SUR_A" "$MUTATION_RATE" 2>/dev/null); then
printf '%s\n' "$MUTATED" > "$DEST"
printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops"
else
cp "$SUR_A" "$DEST"
printf '0\n' > "${DEST%.push3}.ops"
fi
fi
NEXT_IDX=$((NEXT_IDX + 1))
done
log " Generated ${NEXT_IDX} candidates for generation $((gen + 1))"
CURRENT_GEN_DIR="$NEXT_GEN_DIR"
done
# =============================================================================
# Step 3 — Output best candidate
# =============================================================================
if [ -z "$GLOBAL_BEST_CAND" ] || [ ! -f "$GLOBAL_BEST_CAND" ]; then
fail "No valid best candidate recorded — evolution produced no scorable output"
fi
BEST_OUTPUT="$OUTPUT_DIR/best.push3"
cp "$GLOBAL_BEST_CAND" "$BEST_OUTPUT"
log ""
log "Best candidate → $BEST_OUTPUT"
log " Fitness: $GLOBAL_BEST_FITNESS (generation $GLOBAL_BEST_GEN)"
# =============================================================================
# Step 4 — Diff: original vs evolved constants
# =============================================================================
DIFF_OUTPUT="$OUTPUT_DIR/diff.txt"
python3 - "$SEED" "$BEST_OUTPUT" > "$DIFF_OUTPUT" <<'PYEOF'
import sys, re
def extract_ints(path):
"""Extract all large integer literals (≥6 digits) from a Push3 file."""
text = open(path).read()
text = re.sub(r';;[^\n]*', '', text) # strip comments
return [int(m) for m in re.findall(r'\b(\d{6,})\b', text)]
seed_path, best_path = sys.argv[1], sys.argv[2]
orig = extract_ints(seed_path)
best = extract_ints(best_path)
print(f"=== Push3 Evolution Diff ===")
print(f"Seed: {seed_path}")
print(f"Best: {best_path}")
print()
changed = 0
for i, (o, b) in enumerate(zip(orig, best)):
if o != b:
pct = (b - o) / o * 100 if o != 0 else float('inf')
print(f" const[{i:3d}]: {o:>25d} → {b:>25d} (Δ={b - o:+d}, {pct:+.2f}%)")
changed += 1
if len(orig) != len(best):
added = len(best) - len(orig)
if added > 0:
for i, val in enumerate(best[len(orig):]):
print(f" const[{len(orig) + i:3d}]: {'(new)':>25s} → {val:>25d}")
else:
print(f" ({-added} constant(s) removed from end)")
print()
if changed == 0 and len(orig) == len(best):
print("No constant changes — evolution applied structural mutations only.")
else:
total = min(len(orig), len(best))
print(f"Summary: {changed} of {total} constant(s) changed.")
PYEOF
log "Diff written to $DIFF_OUTPUT"
log ""
cat "$DIFF_OUTPUT" >&2
log "========================================================"
log "Evolution complete."
log " Generations run: $GENERATIONS"
log " Best fitness: $GLOBAL_BEST_FITNESS"
log " Best from gen: $GLOBAL_BEST_GEN"
log " Output directory: $OUTPUT_DIR"
log "========================================================"
# =============================================================================
# Step 5 — Seed pool admission
#
# Scan all generation JSONL files for candidates that scored above the
# admission threshold (6e21). Deduplicate by Push3 content hash against the
# existing pool. Admit qualifying candidates into seeds/ and rewrite
# manifest.jsonl, keeping at most the top-100 by fitness.
# =============================================================================
log ""
log "=== Seed pool admission (run=$RUN_ID, threshold=$ADMISSION_THRESHOLD) ==="
mkdir -p "$SEEDS_DIR"
_ADMISSION_OUT="$WORK_DIR/admission_output.txt"
_ADMISSION_RC=0
python3 - "$OUTPUT_DIR" "$WORK_DIR" "$SEEDS_DIR" \
"$ADMISSION_THRESHOLD" "$RUN_ID" "$(date -u '+%Y-%m-%d')" \
> "$_ADMISSION_OUT" 2>&1 <<'PYEOF' || _ADMISSION_RC=$?
import json, sys, os, hashlib, shutil, tempfile
output_dir, work_dir, seeds_dir = sys.argv[1], sys.argv[2], sys.argv[3]
threshold = int(sys.argv[4])
run_id = sys.argv[5]
today = sys.argv[6]
MAX_EVOLVED = 100 # cap applies to evolved entries only; hand-written are always pinned
manifest_path = os.path.join(seeds_dir, 'manifest.jsonl')
# ── 1. Read existing manifest ─────────────────────────────────────────────────
existing = []
if os.path.exists(manifest_path):
with open(manifest_path) as f:
for line in f:
line = line.strip()
if line:
try:
existing.append(json.loads(line))
except json.JSONDecodeError:
pass
# ── 2. Hash existing pool files for deduplication ────────────────────────────
def file_hash(path):
with open(path, 'rb') as fh:
return hashlib.sha256(fh.read()).hexdigest()
existing_hashes = set()
for entry in existing:
fpath = os.path.join(seeds_dir, entry.get('file', ''))
if os.path.exists(fpath):
existing_hashes.add(file_hash(fpath))
# ── 3. Collect qualifying candidates from generation JSONL files ──────────────
qualifying = [] # (fitness, push3_path, gen_idx, cand_str)
for fname in sorted(os.listdir(output_dir)):
if not (fname.startswith('generation_') and fname.endswith('.jsonl')):
continue
try:
gen_idx = int(fname[len('generation_'):-len('.jsonl')]) # validate integer suffix
except ValueError:
continue
with open(os.path.join(output_dir, fname)) as f:
for line in f:
try:
d = json.loads(line)
cid = d.get('candidate_id', '')
fitness = int(d.get('fitness', 0))
if fitness < threshold:
continue
# Canonical CID format is "candidate_XXX" (zero-padded numeric suffix,
# e.g. "candidate_001"); gen_idx is derived from the enclosing filename.
# Old runs 16 used "gen{N}_c{MMM}" — see manifest.jsonl schema (#720).
if not cid.startswith('candidate_'):
print(f'WARNING: skipping unrecognised CID format {cid!r} in {fname}')
continue
cand_str = cid[len('candidate_'):] # numeric suffix, e.g. "001"
push3_path = os.path.join(
work_dir, f'gen_{gen_idx}',
f'candidate_{int(cand_str):03d}.push3'
)
if os.path.exists(push3_path):
qualifying.append((fitness, push3_path, gen_idx, cand_str))
except (json.JSONDecodeError, ValueError, TypeError, AttributeError):
pass
qualifying.sort(key=lambda x: x[0], reverse=True)
# ── 4. Deduplicate and assign filenames (resolve --run-id reuse collisions) ───
new_items = [] # (fitness, push3_path, manifest_entry)
seen = set(existing_hashes)
for fitness, push3_path, gen_idx, cand_str in qualifying:
h = file_hash(push3_path)
if h in seen:
continue
seen.add(h)
# Canonical name: run{run_id}_gen{gen_idx:03d}_c{cand_str}.push3
# If a different file already occupies that name (same run-id reused), add
# a counter suffix (_r2, _r3, …) until we find an unused or same-content slot.
base = f'run{run_id}_gen{gen_idx:03d}_c{cand_str}'
filename = f'{base}.push3'
dest = os.path.join(seeds_dir, filename)
if os.path.exists(dest) and file_hash(dest) != h:
counter = 2
while True:
filename = f'{base}_r{counter}.push3'
dest = os.path.join(seeds_dir, filename)
if not os.path.exists(dest) or file_hash(dest) == h:
break
counter += 1
entry = {
'file': filename,
'fitness': fitness,
'origin': 'evolved',
'run': run_id,
'generation': gen_idx,
'date': today,
}
new_items.append((fitness, push3_path, entry))
if not new_items:
print(f'No new qualifying candidates from run {run_id} '
f'(threshold={threshold}, scanned {len(qualifying)} above-threshold hits)')
sys.exit(0)
# ── 5. Separate pinned (hand-written) from evolved; top-100 cap on evolved only
#
# NOTE: raw fitness values are only comparable within the same evaluation run.
# Entries whose fitness_flags contain any flag in ZERO_RATED_FLAGS are ranked
# as fitness=0 so that inflated scores do not bias pool admission or eviction.
#
# ZERO_RATED_FLAGS: canonical set of flag strings that force effective_fitness=0.
# Add new inflation/distortion flags here; no other code change is required.
ZERO_RATED_FLAGS = {
'token_value_inflation',
}
def effective_fitness(entry):
flags = entry.get('fitness_flags') or ''
if any(flag in flags for flag in ZERO_RATED_FLAGS):
return 0
return int(entry.get('fitness') or 0)
pinned = [(effective_fitness(e), e, None) for e in existing
if e.get('origin') != 'evolved']
evolved = [(effective_fitness(e), e, None) for e in existing
if e.get('origin') == 'evolved']
for fitness, push3_path, entry in new_items:
evolved.append((fitness, entry, push3_path))
evolved.sort(key=lambda x: x[0], reverse=True)
admitted_evolved = evolved[:MAX_EVOLVED]
evicted_evolved = evolved[MAX_EVOLVED:]
# ── 6. Copy admitted new files; remove evicted evolved files ─────────────────
admitted_count = 0
for _, entry, src_path in admitted_evolved:
if src_path is not None: # new candidate
dest = os.path.join(seeds_dir, entry['file'])
shutil.copy2(src_path, dest)
print(f' admitted: {entry["file"]} fitness={entry["fitness"]}')
admitted_count += 1
for _, entry, src_path in evicted_evolved:
if src_path is not None: # rejected before being copied
print(f' rejected (below pool floor): {entry["file"]} fitness={entry["fitness"]}')
else: # existing evolved entry pushed out
fpath = os.path.join(seeds_dir, entry.get('file', ''))
if os.path.exists(fpath):
os.remove(fpath)
print(f' evicted from pool: {entry["file"]} fitness={entry["fitness"]}')
# Warn if any pinned (hand-written) entry ranks below the current pool floor
if evicted_evolved and pinned:
pool_floor = evicted_evolved[0][0]
for fit, entry, _ in pinned:
if fit <= pool_floor:
print(f' WARNING: pinned seed "{entry.get("file")}" (fitness={fit}) '
f'ranks below evolved pool floor ({pool_floor}) — kept in manifest regardless')
# ── 7. Rewrite manifest.jsonl atomically via temp-file + rename ──────────────
admitted = admitted_evolved + pinned
admitted.sort(key=lambda x: x[0], reverse=True)
manifest_dir = os.path.dirname(manifest_path)
with tempfile.NamedTemporaryFile('w', dir=manifest_dir, delete=False, suffix='.tmp') as tmp:
tmp_path = tmp.name
for _, entry, _ in admitted:
tmp.write(json.dumps(entry) + '\n')
os.replace(tmp_path, manifest_path)
print(f'Pool updated: {len(admitted)} entries total '
f'({len(admitted_evolved)} evolved + {len(pinned)} pinned), '
f'+{admitted_count} from run {run_id}')
PYEOF
while IFS= read -r _line; do log " $_line"; done < "$_ADMISSION_OUT"
if [ "$_ADMISSION_RC" -ne 0 ]; then
log " WARNING: seed pool admission failed (exit $_ADMISSION_RC) — pool unchanged"
fi