harb/tools/push3-evolution/evolve.sh

#!/usr/bin/env bash
# =============================================================================
# evolve.sh — Push3 evolution orchestrator
#
# Outer evolutionary loop: generate candidates → score → select → repeat.
#
# Usage:
#   ./tools/push3-evolution/evolve.sh \
#     --seed optimizer_v3.push3 \
#     --population 10 \
#     --generations 5 \
#     --mutation-rate 2 \
#     --elites 2 \
#     [--output evolved/] \
#     [--diverse-seeds] \
#     [--run-id <N>]
#
# --diverse-seeds  Initialise gen_0 with diverse candidates.  When the
#                  persistent seeds pool (tools/push3-evolution/seeds/) is
#                  non-empty, a random sample from the pool is used (crossover
#                  between hand-written and evolved programs).  When the pool is
#                  empty, falls back to the parametric seed-gen-cli variants.
#                  Any shortfall (pool or variants < --population) is filled by
#                  mutating the main seed.
#
# --run-id <N>     Integer identifier for this run, used to name candidates
#                  admitted to the seeds pool (e.g. run003_gen2_c005.push3).
#                  Auto-incremented from the highest existing run in the pool
#                  manifest when omitted.
#
# Algorithm:
#   1. Initialize population: N copies of seed, each with M random mutations.
#   2. For each generation:
#      a. Score all candidates via fitness.sh
#      b. Log generation stats (min/max/mean fitness, best candidate)
#      c. Select k survivors via tournament selection (k = population/2)
#      d. Elitism: copy top N candidates unchanged into next generation
#      e. Generate next population: mutate survivors + crossover pairs
#   3. Output best candidate as Push3 file.
#   4. Show diff: original vs evolved (which constants changed, by how much).
#
# Output:
#   <output>/run_NNN/        NNN auto-increments from the highest existing run dir
#     generation_0.jsonl   {candidate_id, fitness, mutations_applied}
#     generation_1.jsonl
#     ...
#     best.push3           highest-fitness program
#     diff.txt             parameter changes vs original
#     evolution.log        full run log
#
# Environment:
#   ANVIL_FORK_URL  Passed through to fitness.sh when Anvil is not running.
#
# TSX resolution order: tsx in PATH → node_modules/.bin/tsx → npx tsx.
# =============================================================================

set -euo pipefail

# Foundry tools (forge, cast, anvil)
export PATH="${HOME}/.foundry/bin:${PATH}"

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
FITNESS_SH="$SCRIPT_DIR/fitness.sh"
BATCH_EVAL_SH="$SCRIPT_DIR/revm-evaluator/batch-eval.sh"
MUTATE_CLI="$SCRIPT_DIR/mutate-cli.ts"
SEED_GEN_CLI="$SCRIPT_DIR/seed-gen-cli.ts"

# EVAL_MODE controls which fitness backend is used:
#   revm   (default) — in-process revm via FitnessEvaluator.t.sol (batch-eval.sh)
#                      Requires BASE_RPC_URL env var.  10-100× faster at scale.
#   anvil            — per-candidate Anvil+forge-script pipeline (fitness.sh)
EVAL_MODE="${EVAL_MODE:-revm}"

# =============================================================================
# Argument parsing
# =============================================================================

SEED=""
POPULATION=10
GENERATIONS=5
MUTATION_RATE=2
ELITES=2
OUTPUT_DIR="evolved"
DIVERSE_SEEDS=false
RUN_ID=""

while [[ $# -gt 0 ]]; do
  case $1 in
    --seed)          SEED="$2";          shift 2 ;;
    --population)    POPULATION="$2";    shift 2 ;;
    --generations)   GENERATIONS="$2";   shift 2 ;;
    --mutation-rate) MUTATION_RATE="$2"; shift 2 ;;
    --elites)        ELITES="$2";        shift 2 ;;
    --output)        OUTPUT_DIR="$2";    shift 2 ;;
    --diverse-seeds) DIVERSE_SEEDS=true; shift   ;;
    --run-id)        RUN_ID="$2";        shift 2 ;;
    *) echo "Unknown option: $1" >&2; exit 2 ;;
  esac
done

if [ -z "$SEED" ];   then echo "Error: --seed required" >&2;              exit 2; fi
if [ ! -f "$SEED" ]; then echo "Error: seed file not found: $SEED" >&2; exit 2; fi

# Validate numeric args
for _name_val in "population:$POPULATION" "generations:$GENERATIONS" "mutation-rate:$MUTATION_RATE"; do
  _name="${_name_val%%:*}"
  _val="${_name_val##*:}"
  if ! [[ "$_val" =~ ^[0-9]+$ ]] || [ "$_val" -lt 1 ]; then
    echo "Error: --${_name} must be a positive integer (got: $_val)" >&2
    exit 2
  fi
done

if ! [[ "$ELITES" =~ ^[0-9]+$ ]]; then
  echo "Error: --elites must be a non-negative integer (got: $ELITES)" >&2
  exit 2
fi

# Canonicalize paths
SEED="$(cd "$(dirname "$SEED")" && pwd)/$(basename "$SEED")"

# Resolve base output dir and create run_NNN subdirectory
mkdir -p "$OUTPUT_DIR"
BASE_DIR="$(cd "$OUTPUT_DIR" && pwd)"

# Auto-increment: find the highest run_NNN directory under BASE_DIR and add 1
RUN_NUM=$(python3 - "$BASE_DIR" <<'PYEOF'
import sys, os, re
base = sys.argv[1]
max_n = 0
if os.path.isdir(base):
    for name in os.listdir(base):
        m = re.fullmatch(r'run_(\d+)', name)
        if m and os.path.isdir(os.path.join(base, name)):
            max_n = max(max_n, int(m.group(1)))
print(f"{max_n + 1:03d}")
PYEOF
)

OUTPUT_DIR="$BASE_DIR/run_${RUN_NUM}"
mkdir -p "$OUTPUT_DIR"
LOG="$OUTPUT_DIR/evolution.log"

# Seeds pool — persistent candidate pool across all runs
SEEDS_DIR="$SCRIPT_DIR/seeds"
POOL_MANIFEST="$SEEDS_DIR/manifest.jsonl"
ADMISSION_THRESHOLD=6000000000000000000000  # 6e21 wei

# Validate/auto-compute RUN_ID
if [ -n "$RUN_ID" ]; then
  if ! [[ "$RUN_ID" =~ ^[0-9]+$ ]] || [ "$RUN_ID" -lt 1 ]; then
    echo "Error: --run-id must be a positive integer (got: $RUN_ID)" >&2
    exit 2
  fi
  RUN_ID=$(printf '%03d' "$RUN_ID")
else
  # Auto-increment: find the highest run ID in the manifest and add 1
  if [ -f "$POOL_MANIFEST" ]; then
    RUN_ID=$(python3 - "$POOL_MANIFEST" <<'PYEOF'
import json, sys
max_run = 0
with open(sys.argv[1]) as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        try:
            d = json.loads(line)
            r = d.get("run")
            if r is not None:
                max_run = max(max_run, int(r))
        except (json.JSONDecodeError, ValueError, TypeError):
            pass
print(f"{max_run + 1:03d}")
PYEOF
) || RUN_ID="001"
  else
    RUN_ID="001"
  fi
fi

# =============================================================================
# Helpers
# =============================================================================

log() {
  local msg="[evolve] $*"
  echo "$msg" >&2
  echo "$msg" >> "$LOG"
}

fail() {
  log "ERROR: $*"
  exit 2
}

# Locate a tsx runner (TypeScript executor for mutate-cli.ts).
# Tries: tsx in PATH → local node_modules → npx tsx.
find_tsx_cmd() {
  if command -v tsx &>/dev/null; then
    echo "tsx"
  elif [ -x "$SCRIPT_DIR/node_modules/.bin/tsx" ]; then
    echo "$SCRIPT_DIR/node_modules/.bin/tsx"
  elif command -v npx &>/dev/null; then
    echo "npx tsx"
  else
    return 1
  fi
}

# Run the mutate-cli.ts with the given arguments.
# All mutation operations run from SCRIPT_DIR so relative TS imports resolve.
run_mutate_cli() {
  (cd "$SCRIPT_DIR" && $TSX_CMD "$MUTATE_CLI" "$@")
}

# Run the seed-gen-cli.ts with the given arguments.
run_seed_gen_cli() {
  (cd "$SCRIPT_DIR" && $TSX_CMD "$SEED_GEN_CLI" "$@")
}

# Integer min/max/mean via python3 (bash arithmetic overflows on wei values).
py_stats() {
  python3 -c "import sys; nums = [int(x) for x in sys.stdin.read().split()]; print(min(nums) if nums else 0, max(nums) if nums else 0, round(sum(nums)/len(nums)) if nums else 0)"
}

# Top-N selection: return filepaths of the N highest-scoring candidates (descending).
py_top_n() {
  local n="$1"
  local scores_file="$2"
  python3 - "$n" "$scores_file" <<'PYEOF'
import sys
n = int(sys.argv[1])
entries = []
with open(sys.argv[2]) as f:
    for line in f:
        parts = line.rstrip('\n').split('\t')
        if len(parts) >= 3:
            entries.append((int(parts[1]), parts[2]))
entries.sort(key=lambda x: x[0], reverse=True)
for _, path in entries[:n]:
    print(path)
PYEOF
}

# Tournament selection: given a scores file (one "idx score filepath" per line),
# run k tournaments of size 2 and return winner filepaths (one per line).
py_tournament() {
  local k="$1"
  local scores_file="$2"
  python3 - "$k" "$scores_file" <<'PYEOF'
import sys, random
k = int(sys.argv[1])
entries = []
with open(sys.argv[2]) as f:
    for line in f:
        parts = line.rstrip('\n').split('\t')
        if len(parts) >= 3:
            entries.append((int(parts[0]), int(parts[1]), parts[2]))
if not entries:
    sys.exit(1)
for _ in range(k):
    a = random.choice(entries)
    b = random.choice(entries)
    winner = a if a[1] >= b[1] else b
    print(winner[2])
PYEOF
}

# =============================================================================
# Tool checks
# =============================================================================

for _tool in python3 node; do
  command -v "$_tool" &>/dev/null || fail "$_tool not found in PATH"
done

[ -f "$FITNESS_SH" ] || fail "fitness.sh not found at $FITNESS_SH"
[ -f "$MUTATE_CLI" ] || fail "mutate-cli.ts not found at $MUTATE_CLI"
[ -x "$FITNESS_SH" ] || chmod +x "$FITNESS_SH"

if [ "$EVAL_MODE" = "revm" ]; then
  [ -f "$BATCH_EVAL_SH" ] || fail "batch-eval.sh not found at $BATCH_EVAL_SH"
  [ -x "$BATCH_EVAL_SH" ] || chmod +x "$BATCH_EVAL_SH"
  [ -n "${BASE_RPC_URL:-}" ] || fail "EVAL_MODE=revm requires BASE_RPC_URL env var (Base network RPC)"
  command -v forge &>/dev/null || fail "forge not found in PATH (required for EVAL_MODE=revm)"
elif [ "$EVAL_MODE" != "anvil" ]; then
  fail "Unknown EVAL_MODE '$EVAL_MODE' — must be 'anvil' or 'revm'"
fi

TSX_CMD="$(find_tsx_cmd)" || fail \
  "No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH."

# =============================================================================
# Work directory — holds all candidate .push3 files across generations
# =============================================================================

WORK_DIR="$(mktemp -d)"
cleanup() { rm -rf "$WORK_DIR"; }
trap cleanup EXIT

# =============================================================================
# Log run header
# =============================================================================

log "========================================================"
log "Push3 Evolution — $(date -u '+%Y-%m-%dT%H:%M:%SZ')"
log "  Seed:           $SEED"
log "  Population:     $POPULATION"
log "  Generations:    $GENERATIONS"
log "  Mutation rate:  $MUTATION_RATE"
log "  Elites:         $ELITES"
log "  Diverse seeds:  $DIVERSE_SEEDS"
log "  Run ID:         $RUN_ID"
log "  Base dir:       $BASE_DIR"
log "  Output:         $OUTPUT_DIR"
log "  TSX:            $TSX_CMD"
log "  Eval mode:      $EVAL_MODE"
log "========================================================"

# =============================================================================
# Step 1 — Initialize generation 0
#
# N copies of the seed, each independently mutated MUTATION_RATE times.
# =============================================================================

log ""
log "=== Initializing population ==="

GEN_DIR="$WORK_DIR/gen_0"
mkdir -p "$GEN_DIR"

if [ "$DIVERSE_SEEDS" = "true" ]; then
  # --- Diverse-seeds mode: prefer persistent pool; fall back to seed-gen-cli ---
  VARIANT_IDX=0

  # Build a random sample list from the pool in one pass (also determines if
  # the pool has any usable entries, avoiding a second manifest parse).
  POOL_SAMPLE_LIST="$WORK_DIR/pool_sample.txt"
  POOL_COUNT=0
  if [ -f "$POOL_MANIFEST" ]; then
    python3 - "$POOL_MANIFEST" "$SEEDS_DIR" "$POPULATION" > "$POOL_SAMPLE_LIST" <<'PYEOF'
import json, sys, os, random
manifest_path, seeds_dir, n = sys.argv[1], sys.argv[2], int(sys.argv[3])
entries = []
with open(manifest_path) as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        try:
            d = json.loads(line)
            fpath = os.path.join(seeds_dir, d.get('file', ''))
            if os.path.exists(fpath):
                entries.append(fpath)
        except json.JSONDecodeError:
            pass
random.shuffle(entries)
for path in entries[:n]:
    print(path)
PYEOF
    POOL_COUNT=$(wc -l < "$POOL_SAMPLE_LIST" 2>/dev/null || echo 0)
  fi

  if [ "$POOL_COUNT" -gt 0 ]; then
    # --- Pool mode: random sample from the seeds pool ---
    log "  diverse-seeds: sampling up to $POPULATION candidates from pool ($POOL_COUNT available)"

    while IFS= read -r POOL_FILE && [ "$VARIANT_IDX" -lt "$POPULATION" ]; do
      CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $VARIANT_IDX).push3"
      cp "$POOL_FILE" "$CAND_FILE"
      printf '0\n' > "${CAND_FILE%.push3}.ops"
      VARIANT_IDX=$((VARIANT_IDX + 1))
    done < "$POOL_SAMPLE_LIST"

    log "  diverse-seeds: seeded $VARIANT_IDX candidate(s) from pool"
  else
    # --- Fallback: parametric variants from seed-gen-cli (pool is empty) ---
    log "  diverse-seeds: pool empty, falling back to seed-gen-cli parametric variants"
    [ -f "$SEED_GEN_CLI" ] || fail "seed-gen-cli.ts not found at $SEED_GEN_CLI"
    SEED_VARIANTS_DIR="$WORK_DIR/seed_variants"
    SEED_VARIANTS_LIST="$WORK_DIR/seed_variants_list.txt"

    # Run seed-gen-cli as a direct command (not inside <(...)) so its exit code is
    # checked by the parent shell and fail() aborts the entire script on error.
    # Stderr goes to the log file for diagnostics rather than being discarded.
    run_seed_gen_cli --count "$POPULATION" --output-dir "$SEED_VARIANTS_DIR" \
      > "$SEED_VARIANTS_LIST" 2>>"$LOG" \
      || fail "seed-gen-cli.ts failed to generate variants"

    while IFS= read -r VARIANT_FILE && [ "$VARIANT_IDX" -lt "$POPULATION" ]; do
      CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $VARIANT_IDX).push3"
      cp "$VARIANT_FILE" "$CAND_FILE"
      printf '0\n' > "${CAND_FILE%.push3}.ops"
      VARIANT_IDX=$((VARIANT_IDX + 1))
    done < "$SEED_VARIANTS_LIST"
  fi

  # Fill any remaining slots with mutations of the seed
  while [ "$VARIANT_IDX" -lt "$POPULATION" ]; do
    CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $VARIANT_IDX).push3"
    MUTATED=$(run_mutate_cli mutate "$SEED" "$MUTATION_RATE") \
      || fail "Failed to mutate seed for fallback candidate $VARIANT_IDX"
    printf '%s\n' "$MUTATED" > "$CAND_FILE"
    printf '%d\n' "$MUTATION_RATE" > "${CAND_FILE%.push3}.ops"
    VARIANT_IDX=$((VARIANT_IDX + 1))
  done

  log "Initialized ${POPULATION} candidates in gen_0 (diverse-seeds, pool=$POOL_COUNT)"
else
  # --- Default mode: N copies of the seed, each independently mutated ---
  for i in $(seq 0 $((POPULATION - 1))); do
    CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $i).push3"
    MUTATED=$(run_mutate_cli mutate "$SEED" "$MUTATION_RATE") \
      || fail "Failed to mutate seed for initial candidate $i"
    printf '%s\n' "$MUTATED" > "$CAND_FILE"
    printf '%d\n' "$MUTATION_RATE" > "${CAND_FILE%.push3}.ops"
  done

  log "Initialized ${POPULATION} candidates in gen_0"
fi

# =============================================================================
# Step 2 — Evolution loop
# =============================================================================

GLOBAL_BEST_FITNESS=-1
GLOBAL_BEST_GEN=-1
GLOBAL_BEST_CAND=""

CURRENT_GEN_DIR="$GEN_DIR"

for gen in $(seq 0 $((GENERATIONS - 1))); do

  log ""
  log "=== Generation $((gen + 1)) / $GENERATIONS ==="

  JSONL_FILE="$OUTPUT_DIR/generation_${gen}.jsonl"
  SCORES_FILE="$WORK_DIR/scores_gen_${gen}.txt"

  # --- a. Score all candidates ---

  SCORE_VALUES=""
  CAND_COUNT=0

  # In revm mode, batch-score all candidates in one forge test invocation before
  # the per-candidate loop.  Scores are written to a temp JSONL file that the
  # loop reads with a fast Python lookup.
  BATCH_SCORES_FILE="$WORK_DIR/batch_scores_gen_${gen}.jsonl"

  if [ "$EVAL_MODE" = "revm" ]; then
    declare -a _BATCH_FILES=()
    for _CF in "$CURRENT_GEN_DIR"/candidate_*.push3; do
      [ -f "$_CF" ] && _BATCH_FILES+=("$_CF")
    done

    if [ "${#_BATCH_FILES[@]}" -gt 0 ]; then
      BATCH_EC=0
      bash "$BATCH_EVAL_SH" "${_BATCH_FILES[@]}" > "$BATCH_SCORES_FILE" 2>/dev/null \
        || BATCH_EC=$?

      if [ "$BATCH_EC" -eq 2 ]; then
        fail "batch-eval.sh reported an infrastructure error (exit 2) — aborting evolution"
      fi
      log "  revm batch scoring complete (exit $BATCH_EC)"
    fi
  fi

  for CAND_FILE in "$CURRENT_GEN_DIR"/candidate_*.push3; do
    [ -f "$CAND_FILE" ] || continue

    CAND_IDX="${CAND_FILE##*candidate_}"
    CAND_IDX="${CAND_IDX%.push3}"
    # Canonical candidate_id format: "candidate_XXX" (matches source filename and batch-eval IDs).
    CID="candidate_${CAND_IDX}"

    # Read mutations_applied from sidecar; default 0 if missing.
    OPS_FILE="${CAND_FILE%.push3}.ops"
    MUTATIONS_APPLIED=0
    [ -f "$OPS_FILE" ] && MUTATIONS_APPLIED=$(cat "$OPS_FILE")

    SCORE=0
    FITNESS_EC=0

    if [ "$EVAL_MODE" = "revm" ] && [ -f "$BATCH_SCORES_FILE" ]; then
      # Look up pre-computed score from batch-eval.sh output.
      # batch-eval uses filename as candidate_id (e.g. "candidate_000")
      BATCH_CID="$(basename "$CAND_FILE" .push3)"
      SCORE=$(python3 - "$BATCH_CID" "$BATCH_SCORES_FILE" <<'PYEOF'
import json, sys
cid = sys.argv[1]
with open(sys.argv[2]) as f:
    for line in f:
        try:
            d = json.loads(line)
            if d.get("candidate_id") == cid:
                print(d["fitness"])
                sys.exit(0)
        except (json.JSONDecodeError, KeyError):
            pass
print(0)
PYEOF
) || SCORE=0
    else
      # Anvil mode (or revm fallback): score candidate individually.
      SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$?

      # Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
      if [ "$FITNESS_EC" -eq 2 ]; then
        fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
      fi
    fi

    # Validate that score is a non-negative integer; treat any other output as invalid.
    if ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then
      log "  $CID: invalid/missing score, using 0"
      SCORE=0
    else
      log "  $CID: fitness=$SCORE"
    fi

    # Append to JSONL — use the actual operations recorded for this candidate.
    printf '{"candidate_id":"%s","fitness":%s,"mutations_applied":%d}\n' \
      "$CID" "$SCORE" "$MUTATIONS_APPLIED" >> "$JSONL_FILE"

    # Record index, score, and filepath for selection (tab-delimited so paths with spaces are safe).
    printf '%d\t%s\t%s\n' "$CAND_COUNT" "$SCORE" "$CAND_FILE" >> "$SCORES_FILE"

    SCORE_VALUES="$SCORE_VALUES $SCORE"
    CAND_COUNT=$((CAND_COUNT + 1))
  done

  if [ "$CAND_COUNT" -eq 0 ]; then
    fail "No candidates found in $CURRENT_GEN_DIR"
  fi

  # --- b. Log generation stats ---

  read -r MIN MAX MEAN < <(printf '%s' "$SCORE_VALUES" | py_stats)
  log "  Stats: min=$MIN  max=$MAX  mean=$MEAN  candidates=$CAND_COUNT"

  # Find best candidate for this generation (filepath returned directly).
  BEST_FILE_THIS_GEN=$(python3 - "$SCORES_FILE" <<'PYEOF'
import sys
entries = []
with open(sys.argv[1]) as f:
    for line in f:
        parts = line.rstrip('\n').split('\t')
        if len(parts) >= 3:
            entries.append((int(parts[1]), parts[2]))
if not entries:
    sys.exit(1)
print(max(entries, key=lambda x: x[0])[1])
PYEOF
) || fail "Could not determine best candidate from $SCORES_FILE"

  if python3 -c "import sys; sys.exit(0 if int(sys.argv[1]) > int(sys.argv[2]) else 1)" "$MAX" "$GLOBAL_BEST_FITNESS" || [ "$GLOBAL_BEST_FITNESS" = "-1" ]; then
    GLOBAL_BEST_FITNESS="$MAX"
    GLOBAL_BEST_GEN="$gen"
    GLOBAL_BEST_CAND="$BEST_FILE_THIS_GEN"
    log "  New global best: gen=$gen fitness=$GLOBAL_BEST_FITNESS file=$(basename "$BEST_FILE_THIS_GEN")"
  fi

  # Skip next-generation creation after the final generation
  [ "$gen" -eq "$((GENERATIONS - 1))" ] && break

  # --- c. Tournament selection (k = population / 2) ---

  K=$((POPULATION / 2))
  [ "$K" -lt 1 ] && K=1

  SURVIVOR_FILES=()
  while IFS= read -r WIN_FILE; do
    SURVIVOR_FILES+=("$WIN_FILE")
  done < <(py_tournament "$K" "$SCORES_FILE")

  log "  Selected ${#SURVIVOR_FILES[@]} survivors via tournament"

  # --- d/e. Generate next population (elitism + offspring) ---

  NEXT_GEN_DIR="$WORK_DIR/gen_$((gen + 1))"
  mkdir -p "$NEXT_GEN_DIR"

  NEXT_IDX=0

  # --- d. Elitism: copy top ELITES candidates unchanged ---

  if [ "$ELITES" -gt 0 ]; then
    ELITE_FILES=()
    while IFS= read -r ELITE_FILE; do
      [ -f "$ELITE_FILE" ] && ELITE_FILES+=("$ELITE_FILE")
    done < <(py_top_n "$ELITES" "$SCORES_FILE")

    for ELITE_FILE in "${ELITE_FILES[@]}"; do
      DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
      cp "$ELITE_FILE" "$DEST"
      printf '0\n' > "${DEST%.push3}.ops"
      NEXT_IDX=$((NEXT_IDX + 1))
    done

    log "  Elitism: carried over ${#ELITE_FILES[@]} top candidate(s) unchanged"
  fi

  # --- e. Fill remaining slots with mutation and crossover offspring ---

  NON_ELITE=$((POPULATION - NEXT_IDX))
  HALF=$((NON_ELITE / 2))

  # First half of remaining: mutate random survivors
  for _i in $(seq 1 $HALF); do
    SUR="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
    DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
    if MUTATED=$(run_mutate_cli mutate "$SUR" "$MUTATION_RATE" 2>/dev/null); then
      printf '%s\n' "$MUTATED" > "$DEST"
      printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops"
    else
      # Fallback: copy the survivor as-is to keep population size stable
      cp "$SUR" "$DEST"
      printf '0\n' > "${DEST%.push3}.ops"
    fi
    NEXT_IDX=$((NEXT_IDX + 1))
  done

  # Second half of remaining: crossover random survivor pairs
  REMAINING=$((POPULATION - NEXT_IDX))
  for _i in $(seq 1 $REMAINING); do
    SUR_A="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
    SUR_B="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
    DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
    if CROSSED=$(run_mutate_cli crossover "$SUR_A" "$SUR_B" 2>/dev/null); then
      printf '%s\n' "$CROSSED" > "$DEST"
      printf '0\n' > "${DEST%.push3}.ops"
    else
      # Fallback: mutate one survivor
      if MUTATED=$(run_mutate_cli mutate "$SUR_A" "$MUTATION_RATE" 2>/dev/null); then
        printf '%s\n' "$MUTATED" > "$DEST"
        printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops"
      else
        cp "$SUR_A" "$DEST"
        printf '0\n' > "${DEST%.push3}.ops"
      fi
    fi
    NEXT_IDX=$((NEXT_IDX + 1))
  done

  log "  Generated ${NEXT_IDX} candidates for generation $((gen + 1))"
  CURRENT_GEN_DIR="$NEXT_GEN_DIR"

done

# =============================================================================
# Step 3 — Output best candidate
# =============================================================================

if [ -z "$GLOBAL_BEST_CAND" ] || [ ! -f "$GLOBAL_BEST_CAND" ]; then
  fail "No valid best candidate recorded — evolution produced no scorable output"
fi

BEST_OUTPUT="$OUTPUT_DIR/best.push3"
cp "$GLOBAL_BEST_CAND" "$BEST_OUTPUT"
log ""
log "Best candidate → $BEST_OUTPUT"
log "  Fitness: $GLOBAL_BEST_FITNESS  (generation $GLOBAL_BEST_GEN)"

# =============================================================================
# Step 4 — Diff: original vs evolved constants
# =============================================================================

DIFF_OUTPUT="$OUTPUT_DIR/diff.txt"

python3 - "$SEED" "$BEST_OUTPUT" > "$DIFF_OUTPUT" <<'PYEOF'
import sys, re

def extract_ints(path):
    """Extract all large integer literals (≥6 digits) from a Push3 file."""
    text = open(path).read()
    text = re.sub(r';;[^\n]*', '', text)   # strip comments
    return [int(m) for m in re.findall(r'\b(\d{6,})\b', text)]

seed_path, best_path = sys.argv[1], sys.argv[2]
orig = extract_ints(seed_path)
best = extract_ints(best_path)

print(f"=== Push3 Evolution Diff ===")
print(f"Seed: {seed_path}")
print(f"Best: {best_path}")
print()

changed = 0
for i, (o, b) in enumerate(zip(orig, best)):
    if o != b:
        pct = (b - o) / o * 100 if o != 0 else float('inf')
        print(f"  const[{i:3d}]: {o:>25d}  →  {b:>25d}  (Δ={b - o:+d}, {pct:+.2f}%)")
        changed += 1

if len(orig) != len(best):
    added = len(best) - len(orig)
    if added > 0:
        for i, val in enumerate(best[len(orig):]):
            print(f"  const[{len(orig) + i:3d}]: {'(new)':>25s}  →  {val:>25d}")
    else:
        print(f"  ({-added} constant(s) removed from end)")

print()
if changed == 0 and len(orig) == len(best):
    print("No constant changes — evolution applied structural mutations only.")
else:
    total = min(len(orig), len(best))
    print(f"Summary: {changed} of {total} constant(s) changed.")
PYEOF

log "Diff written to $DIFF_OUTPUT"
log ""
cat "$DIFF_OUTPUT" >&2

log "========================================================"
log "Evolution complete."
log "  Generations run:  $GENERATIONS"
log "  Best fitness:     $GLOBAL_BEST_FITNESS"
log "  Best from gen:    $GLOBAL_BEST_GEN"
log "  Output directory: $OUTPUT_DIR"
log "========================================================"

# =============================================================================
# Step 5 — Seed pool admission
#
# Scan all generation JSONL files for candidates that scored above the
# admission threshold (6e21).  Deduplicate by Push3 content hash against the
# existing pool.  Admit qualifying candidates into seeds/ and rewrite
# manifest.jsonl, keeping at most the top-100 by fitness.
# =============================================================================

log ""
log "=== Seed pool admission (run=$RUN_ID, threshold=$ADMISSION_THRESHOLD) ==="

mkdir -p "$SEEDS_DIR"

_ADMISSION_OUT="$WORK_DIR/admission_output.txt"
_ADMISSION_RC=0

python3 - "$OUTPUT_DIR" "$WORK_DIR" "$SEEDS_DIR" \
  "$ADMISSION_THRESHOLD" "$RUN_ID" "$(date -u '+%Y-%m-%d')" \
  > "$_ADMISSION_OUT" 2>&1 <<'PYEOF' || _ADMISSION_RC=$?
import json, sys, os, hashlib, shutil, tempfile

output_dir, work_dir, seeds_dir = sys.argv[1], sys.argv[2], sys.argv[3]
threshold  = int(sys.argv[4])
run_id     = sys.argv[5]
today      = sys.argv[6]
MAX_EVOLVED = 100  # cap applies to evolved entries only; hand-written are always pinned

manifest_path = os.path.join(seeds_dir, 'manifest.jsonl')

# ── 1. Read existing manifest ─────────────────────────────────────────────────
existing = []
if os.path.exists(manifest_path):
    with open(manifest_path) as f:
        for line in f:
            line = line.strip()
            if line:
                try:
                    existing.append(json.loads(line))
                except json.JSONDecodeError:
                    pass

# ── 2. Hash existing pool files for deduplication ────────────────────────────
def file_hash(path):
    with open(path, 'rb') as fh:
        return hashlib.sha256(fh.read()).hexdigest()

existing_hashes = set()
for entry in existing:
    fpath = os.path.join(seeds_dir, entry.get('file', ''))
    if os.path.exists(fpath):
        existing_hashes.add(file_hash(fpath))

# ── 3. Collect qualifying candidates from generation JSONL files ──────────────
qualifying = []  # (fitness, push3_path, gen_idx, cand_str)

for fname in sorted(os.listdir(output_dir)):
    if not (fname.startswith('generation_') and fname.endswith('.jsonl')):
        continue
    try:
        gen_idx = int(fname[len('generation_'):-len('.jsonl')])  # validate integer suffix
    except ValueError:
        continue
    with open(os.path.join(output_dir, fname)) as f:
        for line in f:
            try:
                d       = json.loads(line)
                cid     = d.get('candidate_id', '')
                fitness = int(d.get('fitness', 0))
                if fitness < threshold:
                    continue
                # cid format: "candidate_XXX" (gen_idx derived from enclosing filename)
                if not cid.startswith('candidate_'):
                    continue
                cand_str  = cid[len('candidate_'):]      # numeric suffix, e.g. "001"
                push3_path = os.path.join(
                    work_dir, f'gen_{gen_idx}',
                    f'candidate_{int(cand_str):03d}.push3'
                )
                if os.path.exists(push3_path):
                    qualifying.append((fitness, push3_path, gen_idx, cand_str))
            except (json.JSONDecodeError, ValueError, TypeError, AttributeError):
                pass

qualifying.sort(key=lambda x: x[0], reverse=True)

# ── 4. Deduplicate and assign filenames (resolve --run-id reuse collisions) ───
new_items = []     # (fitness, push3_path, manifest_entry)
seen      = set(existing_hashes)

for fitness, push3_path, gen_idx, cand_str in qualifying:
    h = file_hash(push3_path)
    if h in seen:
        continue
    seen.add(h)
    # Canonical name: run{run_id}_gen{gen_idx:03d}_c{cand_str}.push3
    # If a different file already occupies that name (same run-id reused), add
    # a counter suffix (_r2, _r3, …) until we find an unused or same-content slot.
    base     = f'run{run_id}_gen{gen_idx:03d}_c{cand_str}'
    filename = f'{base}.push3'
    dest     = os.path.join(seeds_dir, filename)
    if os.path.exists(dest) and file_hash(dest) != h:
        counter = 2
        while True:
            filename = f'{base}_r{counter}.push3'
            dest     = os.path.join(seeds_dir, filename)
            if not os.path.exists(dest) or file_hash(dest) == h:
                break
            counter += 1
    entry = {
        'file':       filename,
        'fitness':    fitness,
        'origin':     'evolved',
        'run':        run_id,
        'generation': gen_idx,
        'date':       today,
    }
    new_items.append((fitness, push3_path, entry))

if not new_items:
    print(f'No new qualifying candidates from run {run_id} '
          f'(threshold={threshold}, scanned {len(qualifying)} above-threshold hits)')
    sys.exit(0)

# ── 5. Separate pinned (hand-written) from evolved; top-100 cap on evolved only
#
# NOTE: raw fitness values are only comparable within the same evaluation run.
# Entries with fitness_flags='token_value_inflation' (or other flags) are ranked
# as fitness=0 so that inflated scores do not bias pool admission or eviction.
def effective_fitness(entry):
    flags = entry.get('fitness_flags') or ''
    if 'token_value_inflation' in flags:
        return 0
    return int(entry.get('fitness') or 0)

pinned  = [(effective_fitness(e), e, None) for e in existing
           if e.get('origin') != 'evolved']
evolved = [(effective_fitness(e), e, None) for e in existing
           if e.get('origin') == 'evolved']
for fitness, push3_path, entry in new_items:
    evolved.append((fitness, entry, push3_path))

evolved.sort(key=lambda x: x[0], reverse=True)
admitted_evolved = evolved[:MAX_EVOLVED]
evicted_evolved  = evolved[MAX_EVOLVED:]

# ── 6. Copy admitted new files; remove evicted evolved files ─────────────────
admitted_count = 0
for _, entry, src_path in admitted_evolved:
    if src_path is not None:          # new candidate
        dest = os.path.join(seeds_dir, entry['file'])
        shutil.copy2(src_path, dest)
        print(f'  admitted: {entry["file"]}  fitness={entry["fitness"]}')
        admitted_count += 1

for _, entry, src_path in evicted_evolved:
    if src_path is not None:          # rejected before being copied
        print(f'  rejected (below pool floor): {entry["file"]}  fitness={entry["fitness"]}')
    else:                              # existing evolved entry pushed out
        fpath = os.path.join(seeds_dir, entry.get('file', ''))
        if os.path.exists(fpath):
            os.remove(fpath)
            print(f'  evicted from pool: {entry["file"]}  fitness={entry["fitness"]}')

# Warn if any pinned (hand-written) entry ranks below the current pool floor
if evicted_evolved and pinned:
    pool_floor = evicted_evolved[0][0]
    for fit, entry, _ in pinned:
        if fit <= pool_floor:
            print(f'  WARNING: pinned seed "{entry.get("file")}" (fitness={fit}) '
                  f'ranks below evolved pool floor ({pool_floor}) — kept in manifest regardless')

# ── 7. Rewrite manifest.jsonl atomically via temp-file + rename ──────────────
admitted = admitted_evolved + pinned
admitted.sort(key=lambda x: x[0], reverse=True)

manifest_dir = os.path.dirname(manifest_path)
with tempfile.NamedTemporaryFile('w', dir=manifest_dir, delete=False, suffix='.tmp') as tmp:
    tmp_path = tmp.name
    for _, entry, _ in admitted:
        tmp.write(json.dumps(entry) + '\n')
os.replace(tmp_path, manifest_path)

print(f'Pool updated: {len(admitted)} entries total '
      f'({len(admitted_evolved)} evolved + {len(pinned)} pinned), '
      f'+{admitted_count} from run {run_id}')
PYEOF

while IFS= read -r _line; do log "  $_line"; done < "$_ADMISSION_OUT"
if [ "$_ADMISSION_RC" -ne 0 ]; then
  log "  WARNING: seed pool admission failed (exit $_ADMISSION_RC) — pool unchanged"
fi
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								#!/usr/bin/env bash
 								# =============================================================================
 								# evolve.sh — Push3 evolution orchestrator
 								#
 								# Outer evolutionary loop: generate candidates → score → select → repeat.
 								#
 								# Usage:
 								#   ./tools/push3-evolution/evolve.sh \
 								#     --seed optimizer_v3.push3 \
 								#     --population 10 \
 								#     --generations 5 \
 								#     --mutation-rate 2 \
-												fix: feat: Push3 evolution — elitism (top N survive unchanged) (#640)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 22:29:23 +00:00
+								#     --elites 2 \
-												fix: feat: evolve.sh auto-incrementing per-run results directory (#752)

- --output now accepts a base dir (default: evolved/) instead of requiring
  an explicit path each run
- On each invocation, scan base dir for existing run_NNN/ subdirectories,
  find the highest N, and create run_(N+1)/ for this run's outputs
- All generation JSONL files, best.push3, diff.txt, and evolution.log are
  written to the new run dir — previous runs are never overwritten
- Log header now shows both Base dir and Output (run dir) for clarity

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-14 11:08:04 +00:00
+								#     [--output evolved/] \
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								#     [--diverse-seeds] \
 								#     [--run-id <N>]
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
+								#
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								# --diverse-seeds  Initialise gen_0 with diverse candidates.  When the
 								#                  persistent seeds pool (tools/push3-evolution/seeds/) is
 								#                  non-empty, a random sample from the pool is used (crossover
 								#                  between hand-written and evolved programs).  When the pool is
 								#                  empty, falls back to the parametric seed-gen-cli variants.
 								#                  Any shortfall (pool or variants < --population) is filled by
 								#                  mutating the main seed.
 								#
 								# --run-id <N>     Integer identifier for this run, used to name candidates
 								#                  admitted to the seeds pool (e.g. run003_gen2_c005.push3).
 								#                  Auto-incremented from the highest existing run in the pool
 								#                  manifest when omitted.
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								#
 								# Algorithm:
 								#   1. Initialize population: N copies of seed, each with M random mutations.
 								#   2. For each generation:
 								#      a. Score all candidates via fitness.sh
 								#      b. Log generation stats (min/max/mean fitness, best candidate)
 								#      c. Select k survivors via tournament selection (k = population/2)
-												fix: feat: Push3 evolution — elitism (top N survive unchanged) (#640)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 22:29:23 +00:00
+								#      d. Elitism: copy top N candidates unchanged into next generation
 								#      e. Generate next population: mutate survivors + crossover pairs
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								#   3. Output best candidate as Push3 file.
 								#   4. Show diff: original vs evolved (which constants changed, by how much).
 								#
 								# Output:
-												fix: feat: evolve.sh auto-incrementing per-run results directory (#752)

- --output now accepts a base dir (default: evolved/) instead of requiring
  an explicit path each run
- On each invocation, scan base dir for existing run_NNN/ subdirectories,
  find the highest N, and create run_(N+1)/ for this run's outputs
- All generation JSONL files, best.push3, diff.txt, and evolution.log are
  written to the new run dir — previous runs are never overwritten
- Log header now shows both Base dir and Output (run dir) for clarity

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-14 11:08:04 +00:00
+								#   <output>/run_NNN/        NNN auto-increments from the highest existing run dir
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								#     generation_0.jsonl   {candidate_id, fitness, mutations_applied}
 								#     generation_1.jsonl
 								#     ...
 								#     best.push3           highest-fitness program
 								#     diff.txt             parameter changes vs original
 								#     evolution.log        full run log
 								#
 								# Environment:
 								#   ANVIL_FORK_URL  Passed through to fitness.sh when Anvil is not running.
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								#
 								# TSX resolution order: tsx in PATH → node_modules/.bin/tsx → npx tsx.
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								# =============================================================================
 								set -euo pipefail
-												fix: Evolution pipeline UUPS upgrade + Foundry PATH (#593)

- Add virtual to Optimizer.calculateParams() for UUPS override
- Create OptimizerV3.sol: UUPS-upgradeable optimizer with transpiled Push3 logic
- Update deploy-optimizer.sh to deploy OptimizerV3 instead of Optimizer
- Add ~/.foundry/bin to PATH in evolve.sh, fitness.sh, deploy-optimizer.sh

											
										
										
											2026-03-12 06:47:35 +00:00
+								# Foundry tools (forge, cast, anvil)
 								export PATH="${HOME}/.foundry/bin:${PATH}"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 								FITNESS_SH="$SCRIPT_DIR/fitness.sh"
-												fix: feat: revm-based fitness evaluator for evolution at scale (#604)

Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.

New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
  deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
  inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
  attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
  and emits one {"candidate_id","fitness"} JSON line per candidate.
  Skips gracefully when BASE_RPC_URL is unset (CI-safe).

- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
  transpiles+compiles each candidate sequentially, writes a two-file
  manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
  in a single forge test run and parses the score JSON from stdout.

Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
  When EVAL_MODE=revm, batch-scores every candidate in a generation with
  one batch-eval.sh call instead of N sequential fitness.sh processes;
  scores are looked up from the JSONL output in the per-candidate loop.
  Default remains EVAL_MODE=anvil for backward compatibility.

Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
  produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
  revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
  during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 11:54:41 +00:00
+								BATCH_EVAL_SH="$SCRIPT_DIR/revm-evaluator/batch-eval.sh"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								MUTATE_CLI="$SCRIPT_DIR/mutate-cli.ts"
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
+								SEED_GEN_CLI="$SCRIPT_DIR/seed-gen-cli.ts"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
-												fix: feat: revm-based fitness evaluator for evolution at scale (#604)

Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.

New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
  deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
  inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
  attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
  and emits one {"candidate_id","fitness"} JSON line per candidate.
  Skips gracefully when BASE_RPC_URL is unset (CI-safe).

- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
  transpiles+compiles each candidate sequentially, writes a two-file
  manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
  in a single forge test run and parses the score JSON from stdout.

Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
  When EVAL_MODE=revm, batch-scores every candidate in a generation with
  one batch-eval.sh call instead of N sequential fitness.sh processes;
  scores are looked up from the JSONL output in the per-candidate loop.
  Default remains EVAL_MODE=anvil for backward compatibility.

Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
  produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
  revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
  during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 11:54:41 +00:00
+								# EVAL_MODE controls which fitness backend is used:
-												fix: fix: EVAL_MODE defaults to anvil — should default to revm (#751)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-14 11:56:52 +00:00
+								#   revm   (default) — in-process revm via FitnessEvaluator.t.sol (batch-eval.sh)
 								#                      Requires BASE_RPC_URL env var.  10-100× faster at scale.
 								#   anvil            — per-candidate Anvil+forge-script pipeline (fitness.sh)
 								EVAL_MODE="${EVAL_MODE:-revm}"
-												fix: feat: revm-based fitness evaluator for evolution at scale (#604)

Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.

New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
  deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
  inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
  attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
  and emits one {"candidate_id","fitness"} JSON line per candidate.
  Skips gracefully when BASE_RPC_URL is unset (CI-safe).

- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
  transpiles+compiles each candidate sequentially, writes a two-file
  manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
  in a single forge test run and parses the score JSON from stdout.

Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
  When EVAL_MODE=revm, batch-scores every candidate in a generation with
  one batch-eval.sh call instead of N sequential fitness.sh processes;
  scores are looked up from the JSONL output in the per-candidate loop.
  Default remains EVAL_MODE=anvil for backward compatibility.

Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
  produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
  revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
  during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 11:54:41 +00:00
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								# =============================================================================
 								# Argument parsing
 								# =============================================================================
 								SEED=""
 								POPULATION=10
 								GENERATIONS=5
 								MUTATION_RATE=2
-												fix: feat: Push3 evolution — elitism (top N survive unchanged) (#640)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 22:29:23 +00:00
+								ELITES=2
-												fix: feat: evolve.sh auto-incrementing per-run results directory (#752)

- --output now accepts a base dir (default: evolved/) instead of requiring
  an explicit path each run
- On each invocation, scan base dir for existing run_NNN/ subdirectories,
  find the highest N, and create run_(N+1)/ for this run's outputs
- All generation JSONL files, best.push3, diff.txt, and evolution.log are
  written to the new run dir — previous runs are never overwritten
- Log header now shows both Base dir and Output (run dir) for clarity

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-14 11:08:04 +00:00
+								OUTPUT_DIR="evolved"
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
+								DIVERSE_SEEDS=false
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								RUN_ID=""
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
 								while [[ $# -gt 0 ]]; do
 								  case $1 in
 								    --seed)          SEED="$2";          shift 2 ;;
 								    --population)    POPULATION="$2";    shift 2 ;;
 								    --generations)   GENERATIONS="$2";   shift 2 ;;
 								    --mutation-rate) MUTATION_RATE="$2"; shift 2 ;;
-												fix: feat: Push3 evolution — elitism (top N survive unchanged) (#640)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 22:29:23 +00:00
+								    --elites)        ELITES="$2";        shift 2 ;;
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    --output)        OUTPUT_DIR="$2";    shift 2 ;;
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
+								    --diverse-seeds) DIVERSE_SEEDS=true; shift   ;;
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								    --run-id)        RUN_ID="$2";        shift 2 ;;
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    *) echo "Unknown option: $1" >&2; exit 2 ;;
 								  esac
 								done
-												fix: feat: evolve.sh auto-incrementing per-run results directory (#752)

- --output now accepts a base dir (default: evolved/) instead of requiring
  an explicit path each run
- On each invocation, scan base dir for existing run_NNN/ subdirectories,
  find the highest N, and create run_(N+1)/ for this run's outputs
- All generation JSONL files, best.push3, diff.txt, and evolution.log are
  written to the new run dir — previous runs are never overwritten
- Log header now shows both Base dir and Output (run dir) for clarity

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-14 11:08:04 +00:00
+								if [ -z "$SEED" ];   then echo "Error: --seed required" >&2;              exit 2; fi
 								if [ ! -f "$SEED" ]; then echo "Error: seed file not found: $SEED" >&2; exit 2; fi
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
 								# Validate numeric args
 								for _name_val in "population:$POPULATION" "generations:$GENERATIONS" "mutation-rate:$MUTATION_RATE"; do
 								  _name="${_name_val%%:*}"
 								  _val="${_name_val##*:}"
 								  if ! [[ "$_val" =~ ^[0-9]+$ ]] || [ "$_val" -lt 1 ]; then
 								    echo "Error: --${_name} must be a positive integer (got: $_val)" >&2
 								    exit 2
 								  fi
 								done
-												fix: feat: Push3 evolution — elitism (top N survive unchanged) (#640)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 22:29:23 +00:00
+								if ! [[ "$ELITES" =~ ^[0-9]+$ ]]; then
 								  echo "Error: --elites must be a non-negative integer (got: $ELITES)" >&2
 								  exit 2
 								fi
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								# Canonicalize paths
 								SEED="$(cd "$(dirname "$SEED")" && pwd)/$(basename "$SEED")"
-												fix: feat: evolve.sh auto-incrementing per-run results directory (#752)

- --output now accepts a base dir (default: evolved/) instead of requiring
  an explicit path each run
- On each invocation, scan base dir for existing run_NNN/ subdirectories,
  find the highest N, and create run_(N+1)/ for this run's outputs
- All generation JSONL files, best.push3, diff.txt, and evolution.log are
  written to the new run dir — previous runs are never overwritten
- Log header now shows both Base dir and Output (run dir) for clarity

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-14 11:08:04 +00:00
 								# Resolve base output dir and create run_NNN subdirectory
 								mkdir -p "$OUTPUT_DIR"
 								BASE_DIR="$(cd "$OUTPUT_DIR" && pwd)"
 								# Auto-increment: find the highest run_NNN directory under BASE_DIR and add 1
 								RUN_NUM=$(python3 - "$BASE_DIR" <<'PYEOF'
 								import sys, os, re
 								base = sys.argv[1]
 								max_n = 0
 								if os.path.isdir(base):
 								    for name in os.listdir(base):
-												fix: address review findings for #752 — regex and STATE.md cleanup

- Fix run_NNN scan regex: r'run(\d+)' → r'run_(\d+)' so it correctly
  matches the underscore-separated directory names the script creates
  (previously always resolved to 001, overwriting the same dir each run)
- Remove [in-progress] tag from STATE.md entry for #752

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-14 11:27:53 +00:00
+								        m = re.fullmatch(r'run_(\d+)', name)
-												fix: feat: evolve.sh auto-incrementing per-run results directory (#752)

- --output now accepts a base dir (default: evolved/) instead of requiring
  an explicit path each run
- On each invocation, scan base dir for existing run_NNN/ subdirectories,
  find the highest N, and create run_(N+1)/ for this run's outputs
- All generation JSONL files, best.push3, diff.txt, and evolution.log are
  written to the new run dir — previous runs are never overwritten
- Log header now shows both Base dir and Output (run dir) for clarity

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-14 11:08:04 +00:00
+								        if m and os.path.isdir(os.path.join(base, name)):
 								            max_n = max(max_n, int(m.group(1)))
 								print(f"{max_n + 1:03d}")
 								PYEOF
 								)
 								OUTPUT_DIR="$BASE_DIR/run_${RUN_NUM}"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								mkdir -p "$OUTPUT_DIR"
 								LOG="$OUTPUT_DIR/evolution.log"
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								# Seeds pool — persistent candidate pool across all runs
 								SEEDS_DIR="$SCRIPT_DIR/seeds"
 								POOL_MANIFEST="$SEEDS_DIR/manifest.jsonl"
 								ADMISSION_THRESHOLD=6000000000000000000000  # 6e21 wei
 								# Validate/auto-compute RUN_ID
 								if [ -n "$RUN_ID" ]; then
 								  if ! [[ "$RUN_ID" =~ ^[0-9]+$ ]] || [ "$RUN_ID" -lt 1 ]; then
 								    echo "Error: --run-id must be a positive integer (got: $RUN_ID)" >&2
 								    exit 2
 								  fi
 								  RUN_ID=$(printf '%03d' "$RUN_ID")
 								else
 								  # Auto-increment: find the highest run ID in the manifest and add 1
 								  if [ -f "$POOL_MANIFEST" ]; then
 								    RUN_ID=$(python3 - "$POOL_MANIFEST" <<'PYEOF'
 								import json, sys
 								max_run = 0
 								with open(sys.argv[1]) as f:
 								    for line in f:
 								        line = line.strip()
 								        if not line:
 								            continue
 								        try:
 								            d = json.loads(line)
 								            r = d.get("run")
 								            if r is not None:
 								                max_run = max(max_run, int(r))
 								        except (json.JSONDecodeError, ValueError, TypeError):
 								            pass
 								print(f"{max_run + 1:03d}")
 								PYEOF
 								) || RUN_ID="001"
 								  else
 								    RUN_ID="001"
 								  fi
 								fi
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								# =============================================================================
 								# Helpers
 								# =============================================================================
 								log() {
 								  local msg="[evolve] $*"
 								  echo "$msg" >&2
 								  echo "$msg" >> "$LOG"
 								}
 								fail() {
 								  log "ERROR: $*"
 								  exit 2
 								}
 								# Locate a tsx runner (TypeScript executor for mutate-cli.ts).
 								# Tries: tsx in PATH → local node_modules → npx tsx.
 								find_tsx_cmd() {
 								  if command -v tsx &>/dev/null; then
 								    echo "tsx"
 								  elif [ -x "$SCRIPT_DIR/node_modules/.bin/tsx" ]; then
 								    echo "$SCRIPT_DIR/node_modules/.bin/tsx"
 								  elif command -v npx &>/dev/null; then
 								    echo "npx tsx"
 								  else
 								    return 1
 								  fi
 								}
 								# Run the mutate-cli.ts with the given arguments.
 								# All mutation operations run from SCRIPT_DIR so relative TS imports resolve.
 								run_mutate_cli() {
 								  (cd "$SCRIPT_DIR" && $TSX_CMD "$MUTATE_CLI" "$@")
 								}
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
+								# Run the seed-gen-cli.ts with the given arguments.
 								run_seed_gen_cli() {
 								  (cd "$SCRIPT_DIR" && $TSX_CMD "$SEED_GEN_CLI" "$@")
 								}
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								# Integer min/max/mean via python3 (bash arithmetic overflows on wei values).
 								py_stats() {
-												fix: evolution scoring — 3 bugs made all candidates report fitness=0 (#665)

## Three bugs in evolve.sh

1. **Heredoc stdin conflict** — `py_stats()` used `<<PYEOF` heredoc which stole stdin from the pipe, so python never received score values → stats always `min=0 max=0 mean=0`

2. **Bash integer overflow** — global best comparison used `[ $MAX -gt $GLOBAL_BEST_FITNESS ]` which overflows on uint256 wei values (>9.2e18) → best always tracked as 0

3. **candidate_id mismatch** — evolve.sh looked up `gen0_c000` but batch-eval produces `candidate_000` (derived from filename) → score lookup always returned default 0

All 3 previous evolution runs (150+ candidates) reported all zeros despite batch-eval correctly scoring them at ~8.26e21 wei.

## Fix
- `py_stats`: heredoc → `python3 -c` inline
- Global best: bash `[ -gt ]` → `python3` big number comparison
- Score lookup: use `basename $CAND_FILE` instead of synthetic CID

Co-authored-by: root <root@debian-g-2vcpu-8gb-ams3-01>
Reviewed-on: https://codeberg.org/johba/harb/pulls/665
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 10:02:24 +01:00
+								  python3 -c "import sys; nums = [int(x) for x in sys.stdin.read().split()]; print(min(nums) if nums else 0, max(nums) if nums else 0, round(sum(nums)/len(nums)) if nums else 0)"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								}
-												fix: feat: Push3 evolution — elitism (top N survive unchanged) (#640)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 22:29:23 +00:00
+								# Top-N selection: return filepaths of the N highest-scoring candidates (descending).
 								py_top_n() {
 								  local n="$1"
 								  local scores_file="$2"
 								  python3 - "$n" "$scores_file" <<'PYEOF'
 								import sys
 								n = int(sys.argv[1])
 								entries = []
 								with open(sys.argv[2]) as f:
 								    for line in f:
 								        parts = line.rstrip('\n').split('\t')
 								        if len(parts) >= 3:
 								            entries.append((int(parts[1]), parts[2]))
 								entries.sort(key=lambda x: x[0], reverse=True)
 								for _, path in entries[:n]:
 								    print(path)
 								PYEOF
 								}
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								# Tournament selection: given a scores file (one "idx score filepath" per line),
 								# run k tournaments of size 2 and return winner filepaths (one per line).
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								py_tournament() {
 								  local k="$1"
 								  local scores_file="$2"
 								  python3 - "$k" "$scores_file" <<'PYEOF'
 								import sys, random
 								k = int(sys.argv[1])
 								entries = []
 								with open(sys.argv[2]) as f:
 								    for line in f:
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 22:06:18 +00:00
+								        parts = line.rstrip('\n').split('\t')
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								        if len(parts) >= 3:
 								            entries.append((int(parts[0]), int(parts[1]), parts[2]))
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								if not entries:
 								    sys.exit(1)
 								for _ in range(k):
 								    a = random.choice(entries)
 								    b = random.choice(entries)
 								    winner = a if a[1] >= b[1] else b
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								    print(winner[2])
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								PYEOF
 								}
 								# =============================================================================
 								# Tool checks
 								# =============================================================================
 								for _tool in python3 node; do
 								  command -v "$_tool" &>/dev/null || fail "$_tool not found in PATH"
 								done
 								[ -f "$FITNESS_SH" ] || fail "fitness.sh not found at $FITNESS_SH"
 								[ -f "$MUTATE_CLI" ] || fail "mutate-cli.ts not found at $MUTATE_CLI"
 								[ -x "$FITNESS_SH" ] || chmod +x "$FITNESS_SH"
-												fix: feat: revm-based fitness evaluator for evolution at scale (#604)

Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.

New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
  deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
  inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
  attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
  and emits one {"candidate_id","fitness"} JSON line per candidate.
  Skips gracefully when BASE_RPC_URL is unset (CI-safe).

- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
  transpiles+compiles each candidate sequentially, writes a two-file
  manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
  in a single forge test run and parses the score JSON from stdout.

Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
  When EVAL_MODE=revm, batch-scores every candidate in a generation with
  one batch-eval.sh call instead of N sequential fitness.sh processes;
  scores are looked up from the JSONL output in the per-candidate loop.
  Default remains EVAL_MODE=anvil for backward compatibility.

Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
  produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
  revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
  during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 11:54:41 +00:00
+								if [ "$EVAL_MODE" = "revm" ]; then
 								  [ -f "$BATCH_EVAL_SH" ] || fail "batch-eval.sh not found at $BATCH_EVAL_SH"
 								  [ -x "$BATCH_EVAL_SH" ] || chmod +x "$BATCH_EVAL_SH"
 								  [ -n "${BASE_RPC_URL:-}" ] || fail "EVAL_MODE=revm requires BASE_RPC_URL env var (Base network RPC)"
 								  command -v forge &>/dev/null || fail "forge not found in PATH (required for EVAL_MODE=revm)"
 								elif [ "$EVAL_MODE" != "anvil" ]; then
 								  fail "Unknown EVAL_MODE '$EVAL_MODE' — must be 'anvil' or 'revm'"
 								fi
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								TSX_CMD="$(find_tsx_cmd)" || fail \
 								  "No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH."
 								# =============================================================================
 								# Work directory — holds all candidate .push3 files across generations
 								# =============================================================================
 								WORK_DIR="$(mktemp -d)"
 								cleanup() { rm -rf "$WORK_DIR"; }
 								trap cleanup EXIT
 								# =============================================================================
 								# Log run header
 								# =============================================================================
 								log "========================================================"
 								log "Push3 Evolution — $(date -u '+%Y-%m-%dT%H:%M:%SZ')"
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
+								log "  Seed:           $SEED"
 								log "  Population:     $POPULATION"
 								log "  Generations:    $GENERATIONS"
 								log "  Mutation rate:  $MUTATION_RATE"
 								log "  Elites:         $ELITES"
 								log "  Diverse seeds:  $DIVERSE_SEEDS"
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								log "  Run ID:         $RUN_ID"
-												fix: feat: evolve.sh auto-incrementing per-run results directory (#752)

- --output now accepts a base dir (default: evolved/) instead of requiring
  an explicit path each run
- On each invocation, scan base dir for existing run_NNN/ subdirectories,
  find the highest N, and create run_(N+1)/ for this run's outputs
- All generation JSONL files, best.push3, diff.txt, and evolution.log are
  written to the new run dir — previous runs are never overwritten
- Log header now shows both Base dir and Output (run dir) for clarity

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-14 11:08:04 +00:00
+								log "  Base dir:       $BASE_DIR"
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
+								log "  Output:         $OUTPUT_DIR"
 								log "  TSX:            $TSX_CMD"
 								log "  Eval mode:      $EVAL_MODE"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								log "========================================================"
 								# =============================================================================
 								# Step 1 — Initialize generation 0
 								#
 								# N copies of the seed, each independently mutated MUTATION_RATE times.
 								# =============================================================================
 								log ""
 								log "=== Initializing population ==="
 								GEN_DIR="$WORK_DIR/gen_0"
 								mkdir -p "$GEN_DIR"
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
+								if [ "$DIVERSE_SEEDS" = "true" ]; then
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								  # --- Diverse-seeds mode: prefer persistent pool; fall back to seed-gen-cli ---
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
+								  VARIANT_IDX=0
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								  # Build a random sample list from the pool in one pass (also determines if
 								  # the pool has any usable entries, avoiding a second manifest parse).
 								  POOL_SAMPLE_LIST="$WORK_DIR/pool_sample.txt"
 								  POOL_COUNT=0
 								  if [ -f "$POOL_MANIFEST" ]; then
 								    python3 - "$POOL_MANIFEST" "$SEEDS_DIR" "$POPULATION" > "$POOL_SAMPLE_LIST" <<'PYEOF'
 								import json, sys, os, random
 								manifest_path, seeds_dir, n = sys.argv[1], sys.argv[2], int(sys.argv[3])
 								entries = []
 								with open(manifest_path) as f:
 								    for line in f:
 								        line = line.strip()
 								        if not line:
 								            continue
 								        try:
 								            d = json.loads(line)
 								            fpath = os.path.join(seeds_dir, d.get('file', ''))
 								            if os.path.exists(fpath):
 								                entries.append(fpath)
 								        except json.JSONDecodeError:
 								            pass
 								random.shuffle(entries)
 								for path in entries[:n]:
 								    print(path)
 								PYEOF
 								    POOL_COUNT=$(wc -l < "$POOL_SAMPLE_LIST" 2>/dev/null || echo 0)
 								  fi
 								  if [ "$POOL_COUNT" -gt 0 ]; then
 								    # --- Pool mode: random sample from the seeds pool ---
 								    log "  diverse-seeds: sampling up to $POPULATION candidates from pool ($POOL_COUNT available)"
-												fix: address review findings for diverse seed population (#638)

- evolve.sh: fix fail-in-subshell bug — run seed-gen-cli as a direct
  command so its exit code is checked by the parent shell and fail()
  aborts the script correctly; redirect stderr to log file instead of
  discarding it with 2>/dev/null
- seed-generator.ts: reorder enumerateVariants() to put
  STAKED_THRESHOLDS outermost (192 entries/block) so that
  selectVariants(6) with stride=192 covers all 6 staked% thresholds;
  remove false doc claim about "first variant is current seed config";
  add comments explaining CI=0n is intentional in all presets
- seed-gen-cli.ts: emit a stderr diagnostic when count exceeds the
  1152-variant cap so the cap is visible rather than silently producing
  fewer files than requested
- test: strengthen n=6 test to assert all STAKED_THRESHOLDS values are
  represented in the selected variants

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 05:21:05 +00:00
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								    while IFS= read -r POOL_FILE && [ "$VARIANT_IDX" -lt "$POPULATION" ]; do
 								      CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $VARIANT_IDX).push3"
 								      cp "$POOL_FILE" "$CAND_FILE"
 								      printf '0\n' > "${CAND_FILE%.push3}.ops"
 								      VARIANT_IDX=$((VARIANT_IDX + 1))
 								    done < "$POOL_SAMPLE_LIST"
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								    log "  diverse-seeds: seeded $VARIANT_IDX candidate(s) from pool"
 								  else
 								    # --- Fallback: parametric variants from seed-gen-cli (pool is empty) ---
 								    log "  diverse-seeds: pool empty, falling back to seed-gen-cli parametric variants"
 								    [ -f "$SEED_GEN_CLI" ] || fail "seed-gen-cli.ts not found at $SEED_GEN_CLI"
 								    SEED_VARIANTS_DIR="$WORK_DIR/seed_variants"
 								    SEED_VARIANTS_LIST="$WORK_DIR/seed_variants_list.txt"
 								    # Run seed-gen-cli as a direct command (not inside <(...)) so its exit code is
 								    # checked by the parent shell and fail() aborts the entire script on error.
 								    # Stderr goes to the log file for diagnostics rather than being discarded.
 								    run_seed_gen_cli --count "$POPULATION" --output-dir "$SEED_VARIANTS_DIR" \
 								      > "$SEED_VARIANTS_LIST" 2>>"$LOG" \
 								      || fail "seed-gen-cli.ts failed to generate variants"
 								    while IFS= read -r VARIANT_FILE && [ "$VARIANT_IDX" -lt "$POPULATION" ]; do
 								      CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $VARIANT_IDX).push3"
 								      cp "$VARIANT_FILE" "$CAND_FILE"
 								      printf '0\n' > "${CAND_FILE%.push3}.ops"
 								      VARIANT_IDX=$((VARIANT_IDX + 1))
 								    done < "$SEED_VARIANTS_LIST"
 								  fi
 								  # Fill any remaining slots with mutations of the seed
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
+								  while [ "$VARIANT_IDX" -lt "$POPULATION" ]; do
 								    CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $VARIANT_IDX).push3"
 								    MUTATED=$(run_mutate_cli mutate "$SEED" "$MUTATION_RATE") \
 								      || fail "Failed to mutate seed for fallback candidate $VARIANT_IDX"
 								    printf '%s\n' "$MUTATED" > "$CAND_FILE"
 								    printf '%d\n' "$MUTATION_RATE" > "${CAND_FILE%.push3}.ops"
 								    VARIANT_IDX=$((VARIANT_IDX + 1))
 								  done
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								  log "Initialized ${POPULATION} candidates in gen_0 (diverse-seeds, pool=$POOL_COUNT)"
-												fix: feat: Push3 evolution — diverse seed population (#638)

Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.

Variants systematically cover:
  - Staked% thresholds: 80, 85, 88, 91, 94, 97
  - Penalty thresholds: 30, 50, 70, 100
  - Bull params: 4 presets (aggressive → mild)
  - Bear params: 4 presets (standard → very mild)
  - Tax distributions: exponential (seed), linear, sqrt

Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.

evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.

All generated programs pass transpiler stack validation (33 new tests).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-13 04:48:04 +00:00
+								else
 								  # --- Default mode: N copies of the seed, each independently mutated ---
 								  for i in $(seq 0 $((POPULATION - 1))); do
 								    CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $i).push3"
 								    MUTATED=$(run_mutate_cli mutate "$SEED" "$MUTATION_RATE") \
 								      || fail "Failed to mutate seed for initial candidate $i"
 								    printf '%s\n' "$MUTATED" > "$CAND_FILE"
 								    printf '%d\n' "$MUTATION_RATE" > "${CAND_FILE%.push3}.ops"
 								  done
 								  log "Initialized ${POPULATION} candidates in gen_0"
 								fi
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
 								# =============================================================================
 								# Step 2 — Evolution loop
 								# =============================================================================
 								GLOBAL_BEST_FITNESS=-1
 								GLOBAL_BEST_GEN=-1
 								GLOBAL_BEST_CAND=""
 								CURRENT_GEN_DIR="$GEN_DIR"
 								for gen in $(seq 0 $((GENERATIONS - 1))); do
 								  log ""
 								  log "=== Generation $((gen + 1)) / $GENERATIONS ==="
 								  JSONL_FILE="$OUTPUT_DIR/generation_${gen}.jsonl"
 								  SCORES_FILE="$WORK_DIR/scores_gen_${gen}.txt"
 								  # --- a. Score all candidates ---
 								  SCORE_VALUES=""
 								  CAND_COUNT=0
-												fix: feat: revm-based fitness evaluator for evolution at scale (#604)

Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.

New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
  deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
  inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
  attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
  and emits one {"candidate_id","fitness"} JSON line per candidate.
  Skips gracefully when BASE_RPC_URL is unset (CI-safe).

- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
  transpiles+compiles each candidate sequentially, writes a two-file
  manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
  in a single forge test run and parses the score JSON from stdout.

Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
  When EVAL_MODE=revm, batch-scores every candidate in a generation with
  one batch-eval.sh call instead of N sequential fitness.sh processes;
  scores are looked up from the JSONL output in the per-candidate loop.
  Default remains EVAL_MODE=anvil for backward compatibility.

Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
  produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
  revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
  during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 11:54:41 +00:00
+								  # In revm mode, batch-score all candidates in one forge test invocation before
 								  # the per-candidate loop.  Scores are written to a temp JSONL file that the
 								  # loop reads with a fast Python lookup.
 								  BATCH_SCORES_FILE="$WORK_DIR/batch_scores_gen_${gen}.jsonl"
 								  if [ "$EVAL_MODE" = "revm" ]; then
 								    declare -a _BATCH_FILES=()
 								    for _CF in "$CURRENT_GEN_DIR"/candidate_*.push3; do
 								      [ -f "$_CF" ] && _BATCH_FILES+=("$_CF")
 								    done
 								    if [ "${#_BATCH_FILES[@]}" -gt 0 ]; then
 								      BATCH_EC=0
 								      bash "$BATCH_EVAL_SH" "${_BATCH_FILES[@]}" > "$BATCH_SCORES_FILE" 2>/dev/null \
 								        || BATCH_EC=$?
 								      if [ "$BATCH_EC" -eq 2 ]; then
 								        fail "batch-eval.sh reported an infrastructure error (exit 2) — aborting evolution"
 								      fi
 								      log "  revm batch scoring complete (exit $BATCH_EC)"
 								    fi
 								  fi
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								  for CAND_FILE in "$CURRENT_GEN_DIR"/candidate_*.push3; do
 								    [ -f "$CAND_FILE" ] || continue
 								    CAND_IDX="${CAND_FILE##*candidate_}"
 								    CAND_IDX="${CAND_IDX%.push3}"
-												fix: generation_N.jsonl candidate_id format mismatch vs filenames (#669)

											
										
										
											2026-03-14 04:07:00 +00:00
+								    # Canonical candidate_id format: "candidate_XXX" (matches source filename and batch-eval IDs).
 								    CID="candidate_${CAND_IDX}"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								    # Read mutations_applied from sidecar; default 0 if missing.
 								    OPS_FILE="${CAND_FILE%.push3}.ops"
 								    MUTATIONS_APPLIED=0
 								    [ -f "$OPS_FILE" ] && MUTATIONS_APPLIED=$(cat "$OPS_FILE")
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    SCORE=0
 								    FITNESS_EC=0
-												fix: feat: revm-based fitness evaluator for evolution at scale (#604)

Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.

New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
  deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
  inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
  attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
  and emits one {"candidate_id","fitness"} JSON line per candidate.
  Skips gracefully when BASE_RPC_URL is unset (CI-safe).

- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
  transpiles+compiles each candidate sequentially, writes a two-file
  manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
  in a single forge test run and parses the score JSON from stdout.

Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
  When EVAL_MODE=revm, batch-scores every candidate in a generation with
  one batch-eval.sh call instead of N sequential fitness.sh processes;
  scores are looked up from the JSONL output in the per-candidate loop.
  Default remains EVAL_MODE=anvil for backward compatibility.

Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
  produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
  revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
  during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 11:54:41 +00:00
+								    if [ "$EVAL_MODE" = "revm" ] && [ -f "$BATCH_SCORES_FILE" ]; then
 								      # Look up pre-computed score from batch-eval.sh output.
-												fix: evolution scoring — 3 bugs made all candidates report fitness=0 (#665)

## Three bugs in evolve.sh

1. **Heredoc stdin conflict** — `py_stats()` used `<<PYEOF` heredoc which stole stdin from the pipe, so python never received score values → stats always `min=0 max=0 mean=0`

2. **Bash integer overflow** — global best comparison used `[ $MAX -gt $GLOBAL_BEST_FITNESS ]` which overflows on uint256 wei values (>9.2e18) → best always tracked as 0

3. **candidate_id mismatch** — evolve.sh looked up `gen0_c000` but batch-eval produces `candidate_000` (derived from filename) → score lookup always returned default 0

All 3 previous evolution runs (150+ candidates) reported all zeros despite batch-eval correctly scoring them at ~8.26e21 wei.

## Fix
- `py_stats`: heredoc → `python3 -c` inline
- Global best: bash `[ -gt ]` → `python3` big number comparison
- Score lookup: use `basename $CAND_FILE` instead of synthetic CID

Co-authored-by: root <root@debian-g-2vcpu-8gb-ams3-01>
Reviewed-on: https://codeberg.org/johba/harb/pulls/665
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 10:02:24 +01:00
+								      # batch-eval uses filename as candidate_id (e.g. "candidate_000")
 								      BATCH_CID="$(basename "$CAND_FILE" .push3)"
 								      SCORE=$(python3 - "$BATCH_CID" "$BATCH_SCORES_FILE" <<'PYEOF'
-												fix: feat: revm-based fitness evaluator for evolution at scale (#604)

Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.

New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
  deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
  inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
  attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
  and emits one {"candidate_id","fitness"} JSON line per candidate.
  Skips gracefully when BASE_RPC_URL is unset (CI-safe).

- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
  transpiles+compiles each candidate sequentially, writes a two-file
  manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
  in a single forge test run and parses the score JSON from stdout.

Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
  When EVAL_MODE=revm, batch-scores every candidate in a generation with
  one batch-eval.sh call instead of N sequential fitness.sh processes;
  scores are looked up from the JSONL output in the per-candidate loop.
  Default remains EVAL_MODE=anvil for backward compatibility.

Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
  produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
  revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
  during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 11:54:41 +00:00
+								import json, sys
 								cid = sys.argv[1]
 								with open(sys.argv[2]) as f:
 								    for line in f:
 								        try:
 								            d = json.loads(line)
 								            if d.get("candidate_id") == cid:
 								                print(d["fitness"])
 								                sys.exit(0)
 								        except (json.JSONDecodeError, KeyError):
 								            pass
 								print(0)
 								PYEOF
 								) || SCORE=0
 								    else
 								      # Anvil mode (or revm fallback): score candidate individually.
 								      SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$?
 								      # Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
 								      if [ "$FITNESS_EC" -eq 2 ]; then
 								        fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
 								      fi
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								    fi
 								    # Validate that score is a non-negative integer; treat any other output as invalid.
-												fix: feat: revm-based fitness evaluator for evolution at scale (#604)

Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.

New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
  deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
  inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
  attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
  and emits one {"candidate_id","fitness"} JSON line per candidate.
  Skips gracefully when BASE_RPC_URL is unset (CI-safe).

- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
  transpiles+compiles each candidate sequentially, writes a two-file
  manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
  in a single forge test run and parses the score JSON from stdout.

Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
  When EVAL_MODE=revm, batch-scores every candidate in a generation with
  one batch-eval.sh call instead of N sequential fitness.sh processes;
  scores are looked up from the JSONL output in the per-candidate loop.
  Default remains EVAL_MODE=anvil for backward compatibility.

Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
  produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
  revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
  during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 11:54:41 +00:00
+								    if ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then
 								      log "  $CID: invalid/missing score, using 0"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								      SCORE=0
 								    else
 								      log "  $CID: fitness=$SCORE"
 								    fi
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								    # Append to JSONL — use the actual operations recorded for this candidate.
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    printf '{"candidate_id":"%s","fitness":%s,"mutations_applied":%d}\n' \
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								      "$CID" "$SCORE" "$MUTATIONS_APPLIED" >> "$JSONL_FILE"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 22:06:18 +00:00
+								    # Record index, score, and filepath for selection (tab-delimited so paths with spaces are safe).
 								    printf '%d\t%s\t%s\n' "$CAND_COUNT" "$SCORE" "$CAND_FILE" >> "$SCORES_FILE"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
 								    SCORE_VALUES="$SCORE_VALUES $SCORE"
 								    CAND_COUNT=$((CAND_COUNT + 1))
 								  done
 								  if [ "$CAND_COUNT" -eq 0 ]; then
 								    fail "No candidates found in $CURRENT_GEN_DIR"
 								  fi
 								  # --- b. Log generation stats ---
 								  read -r MIN MAX MEAN < <(printf '%s' "$SCORE_VALUES" | py_stats)
 								  log "  Stats: min=$MIN  max=$MAX  mean=$MEAN  candidates=$CAND_COUNT"
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								  # Find best candidate for this generation (filepath returned directly).
 								  BEST_FILE_THIS_GEN=$(python3 - "$SCORES_FILE" <<'PYEOF'
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								import sys
 								entries = []
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								with open(sys.argv[1]) as f:
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    for line in f:
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 22:06:18 +00:00
+								        parts = line.rstrip('\n').split('\t')
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								        if len(parts) >= 3:
 								            entries.append((int(parts[1]), parts[2]))
 								if not entries:
 								    sys.exit(1)
 								print(max(entries, key=lambda x: x[0])[1])
 								PYEOF
 								) || fail "Could not determine best candidate from $SCORES_FILE"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
-												fix: evolution scoring — 3 bugs made all candidates report fitness=0 (#665)

## Three bugs in evolve.sh

1. **Heredoc stdin conflict** — `py_stats()` used `<<PYEOF` heredoc which stole stdin from the pipe, so python never received score values → stats always `min=0 max=0 mean=0`

2. **Bash integer overflow** — global best comparison used `[ $MAX -gt $GLOBAL_BEST_FITNESS ]` which overflows on uint256 wei values (>9.2e18) → best always tracked as 0

3. **candidate_id mismatch** — evolve.sh looked up `gen0_c000` but batch-eval produces `candidate_000` (derived from filename) → score lookup always returned default 0

All 3 previous evolution runs (150+ candidates) reported all zeros despite batch-eval correctly scoring them at ~8.26e21 wei.

## Fix
- `py_stats`: heredoc → `python3 -c` inline
- Global best: bash `[ -gt ]` → `python3` big number comparison
- Score lookup: use `basename $CAND_FILE` instead of synthetic CID

Co-authored-by: root <root@debian-g-2vcpu-8gb-ams3-01>
Reviewed-on: https://codeberg.org/johba/harb/pulls/665
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 10:02:24 +01:00
+								  if python3 -c "import sys; sys.exit(0 if int(sys.argv[1]) > int(sys.argv[2]) else 1)" "$MAX" "$GLOBAL_BEST_FITNESS" || [ "$GLOBAL_BEST_FITNESS" = "-1" ]; then
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    GLOBAL_BEST_FITNESS="$MAX"
 								    GLOBAL_BEST_GEN="$gen"
 								    GLOBAL_BEST_CAND="$BEST_FILE_THIS_GEN"
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 22:06:18 +00:00
+								    log "  New global best: gen=$gen fitness=$GLOBAL_BEST_FITNESS file=$(basename "$BEST_FILE_THIS_GEN")"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								  fi
 								  # Skip next-generation creation after the final generation
 								  [ "$gen" -eq "$((GENERATIONS - 1))" ] && break
 								  # --- c. Tournament selection (k = population / 2) ---
 								  K=$((POPULATION / 2))
 								  [ "$K" -lt 1 ] && K=1
 								  SURVIVOR_FILES=()
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								  while IFS= read -r WIN_FILE; do
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    SURVIVOR_FILES+=("$WIN_FILE")
 								  done < <(py_tournament "$K" "$SCORES_FILE")
 								  log "  Selected ${#SURVIVOR_FILES[@]} survivors via tournament"
-												fix: feat: Push3 evolution — elitism (top N survive unchanged) (#640)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 22:29:23 +00:00
+								  # --- d/e. Generate next population (elitism + offspring) ---
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
 								  NEXT_GEN_DIR="$WORK_DIR/gen_$((gen + 1))"
 								  mkdir -p "$NEXT_GEN_DIR"
 								  NEXT_IDX=0
-												fix: feat: Push3 evolution — elitism (top N survive unchanged) (#640)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 22:29:23 +00:00
+								  # --- d. Elitism: copy top ELITES candidates unchanged ---
 								  if [ "$ELITES" -gt 0 ]; then
 								    ELITE_FILES=()
 								    while IFS= read -r ELITE_FILE; do
 								      [ -f "$ELITE_FILE" ] && ELITE_FILES+=("$ELITE_FILE")
 								    done < <(py_top_n "$ELITES" "$SCORES_FILE")
 								    for ELITE_FILE in "${ELITE_FILES[@]}"; do
 								      DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
 								      cp "$ELITE_FILE" "$DEST"
 								      printf '0\n' > "${DEST%.push3}.ops"
 								      NEXT_IDX=$((NEXT_IDX + 1))
 								    done
 								    log "  Elitism: carried over ${#ELITE_FILES[@]} top candidate(s) unchanged"
 								  fi
 								  # --- e. Fill remaining slots with mutation and crossover offspring ---
 								  NON_ELITE=$((POPULATION - NEXT_IDX))
 								  HALF=$((NON_ELITE / 2))
 								  # First half of remaining: mutate random survivors
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								  for _i in $(seq 1 $HALF); do
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    SUR="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
 								    DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
 								    if MUTATED=$(run_mutate_cli mutate "$SUR" "$MUTATION_RATE" 2>/dev/null); then
 								      printf '%s\n' "$MUTATED" > "$DEST"
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								      printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    else
 								      # Fallback: copy the survivor as-is to keep population size stable
 								      cp "$SUR" "$DEST"
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								      printf '0\n' > "${DEST%.push3}.ops"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    fi
 								    NEXT_IDX=$((NEXT_IDX + 1))
 								  done
-												fix: feat: Push3 evolution — elitism (top N survive unchanged) (#640)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-12 22:29:23 +00:00
+								  # Second half of remaining: crossover random survivor pairs
 								  REMAINING=$((POPULATION - NEXT_IDX))
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								  for _i in $(seq 1 $REMAINING); do
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    SUR_A="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
 								    SUR_B="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
 								    DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
 								    if CROSSED=$(run_mutate_cli crossover "$SUR_A" "$SUR_B" 2>/dev/null); then
 								      printf '%s\n' "$CROSSED" > "$DEST"
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								      printf '0\n' > "${DEST%.push3}.ops"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								    else
 								      # Fallback: mutate one survivor
 								      if MUTATED=$(run_mutate_cli mutate "$SUR_A" "$MUTATION_RATE" 2>/dev/null); then
 								        printf '%s\n' "$MUTATED" > "$DEST"
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								        printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								      else
 								        cp "$SUR_A" "$DEST"
-												fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 21:29:14 +00:00
+								        printf '0\n' > "${DEST%.push3}.ops"
-												fix: Push3 evolution: selection loop orchestrator (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 20:56:19 +00:00
+								      fi
 								    fi
 								    NEXT_IDX=$((NEXT_IDX + 1))
 								  done
 								  log "  Generated ${NEXT_IDX} candidates for generation $((gen + 1))"
 								  CURRENT_GEN_DIR="$NEXT_GEN_DIR"
 								done
 								# =============================================================================
 								# Step 3 — Output best candidate
 								# =============================================================================
 								if [ -z "$GLOBAL_BEST_CAND" ] || [ ! -f "$GLOBAL_BEST_CAND" ]; then
 								  fail "No valid best candidate recorded — evolution produced no scorable output"
 								fi
 								BEST_OUTPUT="$OUTPUT_DIR/best.push3"
 								cp "$GLOBAL_BEST_CAND" "$BEST_OUTPUT"
 								log ""
 								log "Best candidate → $BEST_OUTPUT"
 								log "  Fitness: $GLOBAL_BEST_FITNESS  (generation $GLOBAL_BEST_GEN)"
 								# =============================================================================
 								# Step 4 — Diff: original vs evolved constants
 								# =============================================================================
 								DIFF_OUTPUT="$OUTPUT_DIR/diff.txt"
 								python3 - "$SEED" "$BEST_OUTPUT" > "$DIFF_OUTPUT" <<'PYEOF'
 								import sys, re
 								def extract_ints(path):
 								    """Extract all large integer literals (≥6 digits) from a Push3 file."""
 								    text = open(path).read()
 								    text = re.sub(r';;[^\n]*', '', text)   # strip comments
 								    return [int(m) for m in re.findall(r'\b(\d{6,})\b', text)]
 								seed_path, best_path = sys.argv[1], sys.argv[2]
 								orig = extract_ints(seed_path)
 								best = extract_ints(best_path)
 								print(f"=== Push3 Evolution Diff ===")
 								print(f"Seed: {seed_path}")
 								print(f"Best: {best_path}")
 								print()
 								changed = 0
 								for i, (o, b) in enumerate(zip(orig, best)):
 								    if o != b:
 								        pct = (b - o) / o * 100 if o != 0 else float('inf')
 								        print(f"  const[{i:3d}]: {o:>25d}  →  {b:>25d}  (Δ={b - o:+d}, {pct:+.2f}%)")
 								        changed += 1
 								if len(orig) != len(best):
 								    added = len(best) - len(orig)
 								    if added > 0:
 								        for i, val in enumerate(best[len(orig):]):
 								            print(f"  const[{len(orig) + i:3d}]: {'(new)':>25s}  →  {val:>25d}")
 								    else:
 								        print(f"  ({-added} constant(s) removed from end)")
 								print()
 								if changed == 0 and len(orig) == len(best):
 								    print("No constant changes — evolution applied structural mutations only.")
 								else:
 								    total = min(len(orig), len(best))
 								    print(f"Summary: {changed} of {total} constant(s) changed.")
 								PYEOF
 								log "Diff written to $DIFF_OUTPUT"
 								log ""
 								cat "$DIFF_OUTPUT" >&2
 								log "========================================================"
 								log "Evolution complete."
 								log "  Generations run:  $GENERATIONS"
 								log "  Best fitness:     $GLOBAL_BEST_FITNESS"
 								log "  Best from gen:    $GLOBAL_BEST_GEN"
 								log "  Output directory: $OUTPUT_DIR"
 								log "========================================================"
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
 								# =============================================================================
 								# Step 5 — Seed pool admission
 								#
 								# Scan all generation JSONL files for candidates that scored above the
 								# admission threshold (6e21).  Deduplicate by Push3 content hash against the
 								# existing pool.  Admit qualifying candidates into seeds/ and rewrite
 								# manifest.jsonl, keeping at most the top-100 by fitness.
 								# =============================================================================
 								log ""
 								log "=== Seed pool admission (run=$RUN_ID, threshold=$ADMISSION_THRESHOLD) ==="
 								mkdir -p "$SEEDS_DIR"
 								_ADMISSION_OUT="$WORK_DIR/admission_output.txt"
 								_ADMISSION_RC=0
 								python3 - "$OUTPUT_DIR" "$WORK_DIR" "$SEEDS_DIR" \
 								  "$ADMISSION_THRESHOLD" "$RUN_ID" "$(date -u '+%Y-%m-%d')" \
 								  > "$_ADMISSION_OUT" 2>&1 <<'PYEOF' || _ADMISSION_RC=$?
 								import json, sys, os, hashlib, shutil, tempfile
 								output_dir, work_dir, seeds_dir = sys.argv[1], sys.argv[2], sys.argv[3]
 								threshold  = int(sys.argv[4])
 								run_id     = sys.argv[5]
 								today      = sys.argv[6]
 								MAX_EVOLVED = 100  # cap applies to evolved entries only; hand-written are always pinned
 								manifest_path = os.path.join(seeds_dir, 'manifest.jsonl')
 								# ── 1. Read existing manifest ─────────────────────────────────────────────────
 								existing = []
 								if os.path.exists(manifest_path):
 								    with open(manifest_path) as f:
 								        for line in f:
 								            line = line.strip()
 								            if line:
 								                try:
 								                    existing.append(json.loads(line))
 								                except json.JSONDecodeError:
 								                    pass
 								# ── 2. Hash existing pool files for deduplication ────────────────────────────
 								def file_hash(path):
 								    with open(path, 'rb') as fh:
 								        return hashlib.sha256(fh.read()).hexdigest()
 								existing_hashes = set()
 								for entry in existing:
 								    fpath = os.path.join(seeds_dir, entry.get('file', ''))
 								    if os.path.exists(fpath):
 								        existing_hashes.add(file_hash(fpath))
 								# ── 3. Collect qualifying candidates from generation JSONL files ──────────────
 								qualifying = []  # (fitness, push3_path, gen_idx, cand_str)
 								for fname in sorted(os.listdir(output_dir)):
 								    if not (fname.startswith('generation_') and fname.endswith('.jsonl')):
 								        continue
 								    try:
-												fix: generation_N.jsonl candidate_id format mismatch vs filenames (#669)

											
										
										
											2026-03-14 04:27:59 +00:00
+								        gen_idx = int(fname[len('generation_'):-len('.jsonl')])  # validate integer suffix
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								    except ValueError:
 								        continue
 								    with open(os.path.join(output_dir, fname)) as f:
 								        for line in f:
 								            try:
 								                d       = json.loads(line)
 								                cid     = d.get('candidate_id', '')
 								                fitness = int(d.get('fitness', 0))
 								                if fitness < threshold:
 								                    continue
-												fix: generation_N.jsonl candidate_id format mismatch vs filenames (#669)

											
										
										
											2026-03-14 04:27:59 +00:00
+								                # cid format: "candidate_XXX" (gen_idx derived from enclosing filename)
 								                if not cid.startswith('candidate_'):
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								                    continue
-												fix: generation_N.jsonl candidate_id format mismatch vs filenames (#669)

											
										
										
											2026-03-14 04:27:59 +00:00
+								                cand_str  = cid[len('candidate_'):]      # numeric suffix, e.g. "001"
 								                push3_path = os.path.join(
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								                    work_dir, f'gen_{gen_idx}',
 								                    f'candidate_{int(cand_str):03d}.push3'
 								                )
 								                if os.path.exists(push3_path):
 								                    qualifying.append((fitness, push3_path, gen_idx, cand_str))
 								            except (json.JSONDecodeError, ValueError, TypeError, AttributeError):
 								                pass
 								qualifying.sort(key=lambda x: x[0], reverse=True)
 								# ── 4. Deduplicate and assign filenames (resolve --run-id reuse collisions) ───
 								new_items = []     # (fitness, push3_path, manifest_entry)
 								seen      = set(existing_hashes)
 								for fitness, push3_path, gen_idx, cand_str in qualifying:
 								    h = file_hash(push3_path)
 								    if h in seen:
 								        continue
 								    seen.add(h)
 								    # Canonical name: run{run_id}_gen{gen_idx:03d}_c{cand_str}.push3
 								    # If a different file already occupies that name (same run-id reused), add
 								    # a counter suffix (_r2, _r3, …) until we find an unused or same-content slot.
 								    base     = f'run{run_id}_gen{gen_idx:03d}_c{cand_str}'
 								    filename = f'{base}.push3'
 								    dest     = os.path.join(seeds_dir, filename)
 								    if os.path.exists(dest) and file_hash(dest) != h:
 								        counter = 2
 								        while True:
 								            filename = f'{base}_r{counter}.push3'
 								            dest     = os.path.join(seeds_dir, filename)
 								            if not os.path.exists(dest) or file_hash(dest) == h:
 								                break
 								            counter += 1
 								    entry = {
 								        'file':       filename,
 								        'fitness':    fitness,
 								        'origin':     'evolved',
 								        'run':        run_id,
 								        'generation': gen_idx,
 								        'date':       today,
 								    }
 								    new_items.append((fitness, push3_path, entry))
 								if not new_items:
 								    print(f'No new qualifying candidates from run {run_id} '
 								          f'(threshold={threshold}, scanned {len(qualifying)} above-threshold hits)')
 								    sys.exit(0)
 								# ── 5. Separate pinned (hand-written) from evolved; top-100 cap on evolved only
-												fix: evo_run004_champion fitness inflated by token value (#670) (#704)

- Add fitness_flags="token_value_inflation" to evo_run004_champion in
  manifest.jsonl so callers can detect the inflated value without
  discarding the entry entirely.
- Add effective_fitness() helper in evolve.sh pool admission (step 5)
  that returns 0 for any entry with a token_value_inflation flag,
  preventing inflated scores from biasing the top-100 evolved pool
  ranking or eviction decisions.
- Document in evolve.sh that raw fitness values are only comparable
  within the same evaluation run.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-14 01:08:13 +00:00
+								#
 								# NOTE: raw fitness values are only comparable within the same evaluation run.
 								# Entries with fitness_flags='token_value_inflation' (or other flags) are ranked
 								# as fitness=0 so that inflated scores do not bias pool admission or eviction.
 								def effective_fitness(entry):
 								    flags = entry.get('fitness_flags') or ''
 								    if 'token_value_inflation' in flags:
 								        return 0
 								    return int(entry.get('fitness') or 0)
 								pinned  = [(effective_fitness(e), e, None) for e in existing
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								           if e.get('origin') != 'evolved']
-												fix: evo_run004_champion fitness inflated by token value (#670) (#704)

- Add fitness_flags="token_value_inflation" to evo_run004_champion in
  manifest.jsonl so callers can detect the inflated value without
  discarding the entry entirely.
- Add effective_fitness() helper in evolve.sh pool admission (step 5)
  that returns 0 for any entry with a token_value_inflation flag,
  preventing inflated scores from biasing the top-100 evolved pool
  ranking or eviction decisions.
- Document in evolve.sh that raw fitness values are only comparable
  within the same evaluation run.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-14 01:08:13 +00:00
+								evolved = [(effective_fitness(e), e, None) for e in existing
-												fix: feat: Seed kindergarten — persistent top-100 candidate pool (#667) (#683)

Fixes #667

## Changes
## Summary

Implemented persistent top-100 candidate pool in `tools/push3-evolution/evolve.sh`:

### Changes

**`--run-id <N>` flag** (line 96)
- Optional integer; auto-increments from highest `run` field in `manifest.jsonl` when omitted
- Zero-padded to 3 digits (`001`, `002`, …)

**Seeds pool constants** (after path canonicalization)
- `SEEDS_DIR` → `$SCRIPT_DIR/seeds/`
- `POOL_MANIFEST` → `seeds/manifest.jsonl`
- `ADMISSION_THRESHOLD` → `6000000000000000000000` (6e21 wei)

**`--diverse-seeds` mode** now has two paths:
1. **Pool mode** (pool non-empty): random-shuffles the pool and takes up to `POPULATION` candidates — real evolved diversity, not parametric clones
2. **Fallback** (pool empty): original `seed-gen-cli` parametric variant behavior
- Both paths fall back to mutating `--seed` to fill any shortfall

**Step 5 — End-of-run admission** (after the diff step):
1. Scans all `generation_*.jsonl` in `OUTPUT_DIR` for candidates with `fitness ≥ 6e21`
2. Maps `candidate_id` (e.g. `gen2_c005`) back to `.push3` files in `WORK_DIR` (still exists since cleanup fires on EXIT)
3. Deduplicates by SHA-256 content hash against existing pool
4. Names new files `run{RUN_ID}_gen{N}_c{MMM}.push3`
5. Merges with existing pool, sorts by fitness descending, keeps top 100
6. Copies admitted files to `seeds/`, removes evicted evolved files (never hand-written), rewrites `manifest.jsonl`

Co-authored-by: openhands <openhands@all-hands.dev>
Reviewed-on: https://codeberg.org/johba/harb/pulls/683
Reviewed-by: review_bot <review_bot@noreply.codeberg.org>

											
										
										
											2026-03-13 20:45:03 +01:00
+								           if e.get('origin') == 'evolved']
 								for fitness, push3_path, entry in new_items:
 								    evolved.append((fitness, entry, push3_path))
 								evolved.sort(key=lambda x: x[0], reverse=True)
 								admitted_evolved = evolved[:MAX_EVOLVED]
 								evicted_evolved  = evolved[MAX_EVOLVED:]
 								# ── 6. Copy admitted new files; remove evicted evolved files ─────────────────
 								admitted_count = 0
 								for _, entry, src_path in admitted_evolved:
 								    if src_path is not None:          # new candidate
 								        dest = os.path.join(seeds_dir, entry['file'])
 								        shutil.copy2(src_path, dest)
 								        print(f'  admitted: {entry["file"]}  fitness={entry["fitness"]}')
 								        admitted_count += 1
 								for _, entry, src_path in evicted_evolved:
 								    if src_path is not None:          # rejected before being copied
 								        print(f'  rejected (below pool floor): {entry["file"]}  fitness={entry["fitness"]}')
 								    else:                              # existing evolved entry pushed out
 								        fpath = os.path.join(seeds_dir, entry.get('file', ''))
 								        if os.path.exists(fpath):
 								            os.remove(fpath)
 								            print(f'  evicted from pool: {entry["file"]}  fitness={entry["fitness"]}')
 								# Warn if any pinned (hand-written) entry ranks below the current pool floor
 								if evicted_evolved and pinned:
 								    pool_floor = evicted_evolved[0][0]
 								    for fit, entry, _ in pinned:
 								        if fit <= pool_floor:
 								            print(f'  WARNING: pinned seed "{entry.get("file")}" (fitness={fit}) '
 								                  f'ranks below evolved pool floor ({pool_floor}) — kept in manifest regardless')
 								# ── 7. Rewrite manifest.jsonl atomically via temp-file + rename ──────────────
 								admitted = admitted_evolved + pinned
 								admitted.sort(key=lambda x: x[0], reverse=True)
 								manifest_dir = os.path.dirname(manifest_path)
 								with tempfile.NamedTemporaryFile('w', dir=manifest_dir, delete=False, suffix='.tmp') as tmp:
 								    tmp_path = tmp.name
 								    for _, entry, _ in admitted:
 								        tmp.write(json.dumps(entry) + '\n')
 								os.replace(tmp_path, manifest_path)
 								print(f'Pool updated: {len(admitted)} entries total '
 								      f'({len(admitted_evolved)} evolved + {len(pinned)} pinned), '
 								      f'+{admitted_count} from run {run_id}')
 								PYEOF
 								while IFS= read -r _line; do log "  $_line"; done < "$_ADMISSION_OUT"
 								if [ "$_ADMISSION_RC" -ne 0 ]; then
 								  log "  WARNING: seed pool admission failed (exit $_ADMISSION_RC) — pool unchanged"
 								fi