diff --git a/tools/push3-evolution/evolve.sh b/tools/push3-evolution/evolve.sh new file mode 100755 index 0000000..4851178 --- /dev/null +++ b/tools/push3-evolution/evolve.sh @@ -0,0 +1,448 @@ +#!/usr/bin/env bash +# ============================================================================= +# evolve.sh — Push3 evolution orchestrator +# +# Outer evolutionary loop: generate candidates → score → select → repeat. +# +# Usage: +# ./tools/push3-evolution/evolve.sh \ +# --seed optimizer_v3.push3 \ +# --population 10 \ +# --generations 5 \ +# --mutation-rate 2 \ +# --output evolved/ +# +# Algorithm: +# 1. Initialize population: N copies of seed, each with M random mutations. +# 2. For each generation: +# a. Score all candidates via fitness.sh +# b. Log generation stats (min/max/mean fitness, best candidate) +# c. Select k survivors via tournament selection (k = population/2) +# d. Generate next population: mutate survivors + crossover pairs +# 3. Output best candidate as Push3 file. +# 4. Show diff: original vs evolved (which constants changed, by how much). +# +# Output: +# / +# generation_0.jsonl {candidate_id, fitness, mutations_applied} +# generation_1.jsonl +# ... +# best.push3 highest-fitness program +# diff.txt parameter changes vs original +# evolution.log full run log +# +# Environment: +# ANVIL_FORK_URL Passed through to fitness.sh when Anvil is not running. +# +# TSX resolution order: tsx in PATH → node_modules/.bin/tsx → npx tsx. +# ============================================================================= + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +FITNESS_SH="$SCRIPT_DIR/fitness.sh" +MUTATE_CLI="$SCRIPT_DIR/mutate-cli.ts" + +# ============================================================================= +# Argument parsing +# ============================================================================= + +SEED="" +POPULATION=10 +GENERATIONS=5 +MUTATION_RATE=2 +OUTPUT_DIR="" + +while [[ $# -gt 0 ]]; do + case $1 in + --seed) SEED="$2"; shift 2 ;; + --population) POPULATION="$2"; shift 2 ;; + --generations) GENERATIONS="$2"; shift 2 ;; + --mutation-rate) MUTATION_RATE="$2"; shift 2 ;; + --output) OUTPUT_DIR="$2"; shift 2 ;; + *) echo "Unknown option: $1" >&2; exit 2 ;; + esac +done + +if [ -z "$SEED" ]; then echo "Error: --seed required" >&2; exit 2; fi +if [ -z "$OUTPUT_DIR" ]; then echo "Error: --output required" >&2; exit 2; fi +if [ ! -f "$SEED" ]; then echo "Error: seed file not found: $SEED" >&2; exit 2; fi + +# Validate numeric args +for _name_val in "population:$POPULATION" "generations:$GENERATIONS" "mutation-rate:$MUTATION_RATE"; do + _name="${_name_val%%:*}" + _val="${_name_val##*:}" + if ! [[ "$_val" =~ ^[0-9]+$ ]] || [ "$_val" -lt 1 ]; then + echo "Error: --${_name} must be a positive integer (got: $_val)" >&2 + exit 2 + fi +done + +# Canonicalize paths +SEED="$(cd "$(dirname "$SEED")" && pwd)/$(basename "$SEED")" +mkdir -p "$OUTPUT_DIR" +OUTPUT_DIR="$(cd "$OUTPUT_DIR" && pwd)" +LOG="$OUTPUT_DIR/evolution.log" + +# ============================================================================= +# Helpers +# ============================================================================= + +log() { + local msg="[evolve] $*" + echo "$msg" >&2 + echo "$msg" >> "$LOG" +} + +fail() { + log "ERROR: $*" + exit 2 +} + +# Locate a tsx runner (TypeScript executor for mutate-cli.ts). +# Tries: tsx in PATH → local node_modules → npx tsx. +find_tsx_cmd() { + if command -v tsx &>/dev/null; then + echo "tsx" + elif [ -x "$SCRIPT_DIR/node_modules/.bin/tsx" ]; then + echo "$SCRIPT_DIR/node_modules/.bin/tsx" + elif command -v npx &>/dev/null; then + echo "npx tsx" + else + return 1 + fi +} + +# Run the mutate-cli.ts with the given arguments. +# All mutation operations run from SCRIPT_DIR so relative TS imports resolve. +run_mutate_cli() { + (cd "$SCRIPT_DIR" && $TSX_CMD "$MUTATE_CLI" "$@") +} + +# Integer min/max/mean via python3 (bash arithmetic overflows on wei values). +py_stats() { + # Args: space-separated integers on stdin as a Python list literal + python3 - "$@" <<'PYEOF' +import sys +nums = [int(x) for x in sys.stdin.read().split()] +if not nums: + print("0 0 0") + sys.exit(0) +print(min(nums), max(nums), round(sum(nums) / len(nums))) +PYEOF +} + +# Tournament selection: given a scores file (one "idx score filepath" per line), +# run k tournaments of size 2 and return winner filepaths (one per line). +py_tournament() { + local k="$1" + local scores_file="$2" + python3 - "$k" "$scores_file" <<'PYEOF' +import sys, random +k = int(sys.argv[1]) +entries = [] +with open(sys.argv[2]) as f: + for line in f: + parts = line.rstrip('\n').split('\t') + if len(parts) >= 3: + entries.append((int(parts[0]), int(parts[1]), parts[2])) +if not entries: + sys.exit(1) +for _ in range(k): + a = random.choice(entries) + b = random.choice(entries) + winner = a if a[1] >= b[1] else b + print(winner[2]) +PYEOF +} + +# ============================================================================= +# Tool checks +# ============================================================================= + +for _tool in python3 node; do + command -v "$_tool" &>/dev/null || fail "$_tool not found in PATH" +done + +[ -f "$FITNESS_SH" ] || fail "fitness.sh not found at $FITNESS_SH" +[ -f "$MUTATE_CLI" ] || fail "mutate-cli.ts not found at $MUTATE_CLI" +[ -x "$FITNESS_SH" ] || chmod +x "$FITNESS_SH" + +TSX_CMD="$(find_tsx_cmd)" || fail \ + "No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH." + +# ============================================================================= +# Work directory — holds all candidate .push3 files across generations +# ============================================================================= + +WORK_DIR="$(mktemp -d)" +cleanup() { rm -rf "$WORK_DIR"; } +trap cleanup EXIT + +# ============================================================================= +# Log run header +# ============================================================================= + +log "========================================================" +log "Push3 Evolution — $(date -u '+%Y-%m-%dT%H:%M:%SZ')" +log " Seed: $SEED" +log " Population: $POPULATION" +log " Generations: $GENERATIONS" +log " Mutation rate: $MUTATION_RATE" +log " Output: $OUTPUT_DIR" +log " TSX: $TSX_CMD" +log "========================================================" + +# ============================================================================= +# Step 1 — Initialize generation 0 +# +# N copies of the seed, each independently mutated MUTATION_RATE times. +# ============================================================================= + +log "" +log "=== Initializing population ===" + +GEN_DIR="$WORK_DIR/gen_0" +mkdir -p "$GEN_DIR" + +for i in $(seq 0 $((POPULATION - 1))); do + CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $i).push3" + MUTATED=$(run_mutate_cli mutate "$SEED" "$MUTATION_RATE") \ + || fail "Failed to mutate seed for initial candidate $i" + printf '%s\n' "$MUTATED" > "$CAND_FILE" + printf '%d\n' "$MUTATION_RATE" > "${CAND_FILE%.push3}.ops" +done + +log "Initialized ${POPULATION} candidates in gen_0" + +# ============================================================================= +# Step 2 — Evolution loop +# ============================================================================= + +GLOBAL_BEST_FITNESS=-1 +GLOBAL_BEST_GEN=-1 +GLOBAL_BEST_CAND="" + +CURRENT_GEN_DIR="$GEN_DIR" + +for gen in $(seq 0 $((GENERATIONS - 1))); do + + log "" + log "=== Generation $((gen + 1)) / $GENERATIONS ===" + + JSONL_FILE="$OUTPUT_DIR/generation_${gen}.jsonl" + SCORES_FILE="$WORK_DIR/scores_gen_${gen}.txt" + + # --- a. Score all candidates --- + + SCORE_VALUES="" + CAND_COUNT=0 + + for CAND_FILE in "$CURRENT_GEN_DIR"/candidate_*.push3; do + [ -f "$CAND_FILE" ] || continue + + CAND_IDX="${CAND_FILE##*candidate_}" + CAND_IDX="${CAND_IDX%.push3}" + CID="gen${gen}_c${CAND_IDX}" + + # Read mutations_applied from sidecar; default 0 if missing. + OPS_FILE="${CAND_FILE%.push3}.ops" + MUTATIONS_APPLIED=0 + [ -f "$OPS_FILE" ] && MUTATIONS_APPLIED=$(cat "$OPS_FILE") + + SCORE=0 + FITNESS_EC=0 + SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$? + + # Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately. + if [ "$FITNESS_EC" -eq 2 ]; then + fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution" + fi + + # Validate that score is a non-negative integer; treat any other output as invalid. + if [ "$FITNESS_EC" -ne 0 ] || ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then + log " $CID: invalid candidate (fitness.sh exit $FITNESS_EC), score=0" + SCORE=0 + else + log " $CID: fitness=$SCORE" + fi + + # Append to JSONL — use the actual operations recorded for this candidate. + printf '{"candidate_id":"%s","fitness":%s,"mutations_applied":%d}\n' \ + "$CID" "$SCORE" "$MUTATIONS_APPLIED" >> "$JSONL_FILE" + + # Record index, score, and filepath for selection (tab-delimited so paths with spaces are safe). + printf '%d\t%s\t%s\n' "$CAND_COUNT" "$SCORE" "$CAND_FILE" >> "$SCORES_FILE" + + SCORE_VALUES="$SCORE_VALUES $SCORE" + CAND_COUNT=$((CAND_COUNT + 1)) + done + + if [ "$CAND_COUNT" -eq 0 ]; then + fail "No candidates found in $CURRENT_GEN_DIR" + fi + + # --- b. Log generation stats --- + + read -r MIN MAX MEAN < <(printf '%s' "$SCORE_VALUES" | py_stats) + log " Stats: min=$MIN max=$MAX mean=$MEAN candidates=$CAND_COUNT" + + # Find best candidate for this generation (filepath returned directly). + BEST_FILE_THIS_GEN=$(python3 - "$SCORES_FILE" <<'PYEOF' +import sys +entries = [] +with open(sys.argv[1]) as f: + for line in f: + parts = line.rstrip('\n').split('\t') + if len(parts) >= 3: + entries.append((int(parts[1]), parts[2])) +if not entries: + sys.exit(1) +print(max(entries, key=lambda x: x[0])[1]) +PYEOF +) || fail "Could not determine best candidate from $SCORES_FILE" + + if [ "$MAX" -gt "$GLOBAL_BEST_FITNESS" ] || [ "$GLOBAL_BEST_FITNESS" -eq -1 ]; then + GLOBAL_BEST_FITNESS="$MAX" + GLOBAL_BEST_GEN="$gen" + GLOBAL_BEST_CAND="$BEST_FILE_THIS_GEN" + log " New global best: gen=$gen fitness=$GLOBAL_BEST_FITNESS file=$(basename "$BEST_FILE_THIS_GEN")" + fi + + # Skip next-generation creation after the final generation + [ "$gen" -eq "$((GENERATIONS - 1))" ] && break + + # --- c. Tournament selection (k = population / 2) --- + + K=$((POPULATION / 2)) + [ "$K" -lt 1 ] && K=1 + + SURVIVOR_FILES=() + while IFS= read -r WIN_FILE; do + SURVIVOR_FILES+=("$WIN_FILE") + done < <(py_tournament "$K" "$SCORES_FILE") + + log " Selected ${#SURVIVOR_FILES[@]} survivors via tournament" + + # --- d. Generate next population --- + + NEXT_GEN_DIR="$WORK_DIR/gen_$((gen + 1))" + mkdir -p "$NEXT_GEN_DIR" + + NEXT_IDX=0 + HALF=$((POPULATION / 2)) + + # First half: mutate random survivors + for _i in $(seq 1 $HALF); do + SUR="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}" + DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3" + if MUTATED=$(run_mutate_cli mutate "$SUR" "$MUTATION_RATE" 2>/dev/null); then + printf '%s\n' "$MUTATED" > "$DEST" + printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops" + else + # Fallback: copy the survivor as-is to keep population size stable + cp "$SUR" "$DEST" + printf '0\n' > "${DEST%.push3}.ops" + fi + NEXT_IDX=$((NEXT_IDX + 1)) + done + + # Second half: crossover random survivor pairs + REMAINING=$((POPULATION - HALF)) + for _i in $(seq 1 $REMAINING); do + SUR_A="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}" + SUR_B="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}" + DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3" + if CROSSED=$(run_mutate_cli crossover "$SUR_A" "$SUR_B" 2>/dev/null); then + printf '%s\n' "$CROSSED" > "$DEST" + printf '0\n' > "${DEST%.push3}.ops" + else + # Fallback: mutate one survivor + if MUTATED=$(run_mutate_cli mutate "$SUR_A" "$MUTATION_RATE" 2>/dev/null); then + printf '%s\n' "$MUTATED" > "$DEST" + printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops" + else + cp "$SUR_A" "$DEST" + printf '0\n' > "${DEST%.push3}.ops" + fi + fi + NEXT_IDX=$((NEXT_IDX + 1)) + done + + log " Generated ${NEXT_IDX} candidates for generation $((gen + 1))" + CURRENT_GEN_DIR="$NEXT_GEN_DIR" + +done + +# ============================================================================= +# Step 3 — Output best candidate +# ============================================================================= + +if [ -z "$GLOBAL_BEST_CAND" ] || [ ! -f "$GLOBAL_BEST_CAND" ]; then + fail "No valid best candidate recorded — evolution produced no scorable output" +fi + +BEST_OUTPUT="$OUTPUT_DIR/best.push3" +cp "$GLOBAL_BEST_CAND" "$BEST_OUTPUT" +log "" +log "Best candidate → $BEST_OUTPUT" +log " Fitness: $GLOBAL_BEST_FITNESS (generation $GLOBAL_BEST_GEN)" + +# ============================================================================= +# Step 4 — Diff: original vs evolved constants +# ============================================================================= + +DIFF_OUTPUT="$OUTPUT_DIR/diff.txt" + +python3 - "$SEED" "$BEST_OUTPUT" > "$DIFF_OUTPUT" <<'PYEOF' +import sys, re + +def extract_ints(path): + """Extract all large integer literals (≥6 digits) from a Push3 file.""" + text = open(path).read() + text = re.sub(r';;[^\n]*', '', text) # strip comments + return [int(m) for m in re.findall(r'\b(\d{6,})\b', text)] + +seed_path, best_path = sys.argv[1], sys.argv[2] +orig = extract_ints(seed_path) +best = extract_ints(best_path) + +print(f"=== Push3 Evolution Diff ===") +print(f"Seed: {seed_path}") +print(f"Best: {best_path}") +print() + +changed = 0 +for i, (o, b) in enumerate(zip(orig, best)): + if o != b: + pct = (b - o) / o * 100 if o != 0 else float('inf') + print(f" const[{i:3d}]: {o:>25d} → {b:>25d} (Δ={b - o:+d}, {pct:+.2f}%)") + changed += 1 + +if len(orig) != len(best): + added = len(best) - len(orig) + if added > 0: + for i, val in enumerate(best[len(orig):]): + print(f" const[{len(orig) + i:3d}]: {'(new)':>25s} → {val:>25d}") + else: + print(f" ({-added} constant(s) removed from end)") + +print() +if changed == 0 and len(orig) == len(best): + print("No constant changes — evolution applied structural mutations only.") +else: + total = min(len(orig), len(best)) + print(f"Summary: {changed} of {total} constant(s) changed.") +PYEOF + +log "Diff written to $DIFF_OUTPUT" +log "" +cat "$DIFF_OUTPUT" >&2 + +log "========================================================" +log "Evolution complete." +log " Generations run: $GENERATIONS" +log " Best fitness: $GLOBAL_BEST_FITNESS" +log " Best from gen: $GLOBAL_BEST_GEN" +log " Output directory: $OUTPUT_DIR" +log "========================================================" diff --git a/tools/push3-evolution/mutate-cli.ts b/tools/push3-evolution/mutate-cli.ts new file mode 100644 index 0000000..d743d7c --- /dev/null +++ b/tools/push3-evolution/mutate-cli.ts @@ -0,0 +1,57 @@ +/** + * mutate-cli.ts — CLI wrapper for Push3 mutation operators. + * + * Commands: + * mutate Apply `rate` random mutations. + * crossover Single-point crossover of two programs. + * + * Outputs the resulting Push3 program text to stdout. + * Exits 1 on invalid input or mutation failure. + */ + +import { readFileSync } from 'fs'; +import { parse } from '../push3-transpiler/src/parser'; +import { mutate, crossover, serialize } from './mutate'; + +function loadProgram(filePath: string) { + const src = readFileSync(filePath, 'utf8'); + return parse(src); +} + +const [, , cmd, ...args] = process.argv; + +switch (cmd) { + case 'mutate': { + const [file, rateStr] = args; + if (!file || !rateStr) { + process.stderr.write('Usage: mutate-cli mutate \n'); + process.exit(1); + } + const rate = parseInt(rateStr, 10); + if (isNaN(rate) || rate < 0) { + process.stderr.write(`Invalid rate: ${rateStr}\n`); + process.exit(1); + } + const program = loadProgram(file); + const mutated = mutate(program, rate); + process.stdout.write(serialize(mutated) + '\n'); + break; + } + + case 'crossover': { + const [file1, file2] = args; + if (!file1 || !file2) { + process.stderr.write('Usage: mutate-cli crossover \n'); + process.exit(1); + } + const a = loadProgram(file1); + const b = loadProgram(file2); + const crossed = crossover(a, b); + process.stdout.write(serialize(crossed) + '\n'); + break; + } + + default: + process.stderr.write(`Unknown command: ${cmd}\nCommands: mutate, crossover\n`); + process.exit(1); +} diff --git a/tools/push3-evolution/package-lock.json b/tools/push3-evolution/package-lock.json index 3ab9f8d..d49b3f1 100644 --- a/tools/push3-evolution/package-lock.json +++ b/tools/push3-evolution/package-lock.json @@ -1434,6 +1434,7 @@ "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0",