2026-03-11 20:56:19 +00:00
#!/usr/bin/env bash
# =============================================================================
# evolve.sh — Push3 evolution orchestrator
#
# Outer evolutionary loop: generate candidates → score → select → repeat.
#
# Usage:
# ./tools/push3-evolution/evolve.sh \
# --seed optimizer_v3.push3 \
# --population 10 \
# --generations 5 \
# --mutation-rate 2 \
2026-03-12 22:29:23 +00:00
# --elites 2 \
2026-03-14 11:08:04 +00:00
# [--output evolved/] \
2026-03-13 20:45:03 +01:00
# [--diverse-seeds] \
# [--run-id <N>]
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
#
2026-03-13 20:45:03 +01:00
# --diverse-seeds Initialise gen_0 with diverse candidates. When the
# persistent seeds pool (tools/push3-evolution/seeds/) is
# non-empty, a random sample from the pool is used (crossover
# between hand-written and evolved programs). When the pool is
# empty, falls back to the parametric seed-gen-cli variants.
# Any shortfall (pool or variants < --population) is filled by
# mutating the main seed.
#
# --run-id <N> Integer identifier for this run, used to name candidates
# admitted to the seeds pool (e.g. run003_gen2_c005.push3).
# Auto-incremented from the highest existing run in the pool
# manifest when omitted.
2026-03-11 20:56:19 +00:00
#
# Algorithm:
# 1. Initialize population: N copies of seed, each with M random mutations.
# 2. For each generation:
# a. Score all candidates via fitness.sh
# b. Log generation stats (min/max/mean fitness, best candidate)
# c. Select k survivors via tournament selection (k = population/2)
2026-03-12 22:29:23 +00:00
# d. Elitism: copy top N candidates unchanged into next generation
# e. Generate next population: mutate survivors + crossover pairs
2026-03-11 20:56:19 +00:00
# 3. Output best candidate as Push3 file.
# 4. Show diff: original vs evolved (which constants changed, by how much).
#
# Output:
2026-03-14 11:08:04 +00:00
# <output>/run_NNN/ NNN auto-increments from the highest existing run dir
2026-03-11 20:56:19 +00:00
# generation_0.jsonl {candidate_id, fitness, mutations_applied}
# generation_1.jsonl
# ...
# best.push3 highest-fitness program
# diff.txt parameter changes vs original
# evolution.log full run log
#
# Environment:
# ANVIL_FORK_URL Passed through to fitness.sh when Anvil is not running.
2026-03-11 21:29:14 +00:00
#
# TSX resolution order: tsx in PATH → node_modules/.bin/tsx → npx tsx.
2026-03-11 20:56:19 +00:00
# =============================================================================
set -euo pipefail
2026-03-12 06:47:35 +00:00
# Foundry tools (forge, cast, anvil)
export PATH = " ${ HOME } /.foundry/bin: ${ PATH } "
2026-03-11 20:56:19 +00:00
SCRIPT_DIR = " $( cd " $( dirname " $0 " ) " && pwd ) "
FITNESS_SH = " $SCRIPT_DIR /fitness.sh "
fix: feat: revm-based fitness evaluator for evolution at scale (#604)
Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.
New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
and emits one {"candidate_id","fitness"} JSON line per candidate.
Skips gracefully when BASE_RPC_URL is unset (CI-safe).
- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
transpiles+compiles each candidate sequentially, writes a two-file
manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
in a single forge test run and parses the score JSON from stdout.
Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
When EVAL_MODE=revm, batch-scores every candidate in a generation with
one batch-eval.sh call instead of N sequential fitness.sh processes;
scores are looked up from the JSONL output in the per-candidate loop.
Default remains EVAL_MODE=anvil for backward compatibility.
Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 11:54:41 +00:00
BATCH_EVAL_SH = " $SCRIPT_DIR /revm-evaluator/batch-eval.sh "
2026-03-11 20:56:19 +00:00
MUTATE_CLI = " $SCRIPT_DIR /mutate-cli.ts "
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
SEED_GEN_CLI = " $SCRIPT_DIR /seed-gen-cli.ts "
2026-03-11 20:56:19 +00:00
fix: feat: revm-based fitness evaluator for evolution at scale (#604)
Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.
New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
and emits one {"candidate_id","fitness"} JSON line per candidate.
Skips gracefully when BASE_RPC_URL is unset (CI-safe).
- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
transpiles+compiles each candidate sequentially, writes a two-file
manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
in a single forge test run and parses the score JSON from stdout.
Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
When EVAL_MODE=revm, batch-scores every candidate in a generation with
one batch-eval.sh call instead of N sequential fitness.sh processes;
scores are looked up from the JSONL output in the per-candidate loop.
Default remains EVAL_MODE=anvil for backward compatibility.
Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 11:54:41 +00:00
# EVAL_MODE controls which fitness backend is used:
2026-03-14 11:56:52 +00:00
# revm (default) — in-process revm via FitnessEvaluator.t.sol (batch-eval.sh)
# Requires BASE_RPC_URL env var. 10-100× faster at scale.
# anvil — per-candidate Anvil+forge-script pipeline (fitness.sh)
EVAL_MODE = " ${ EVAL_MODE :- revm } "
fix: feat: revm-based fitness evaluator for evolution at scale (#604)
Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.
New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
and emits one {"candidate_id","fitness"} JSON line per candidate.
Skips gracefully when BASE_RPC_URL is unset (CI-safe).
- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
transpiles+compiles each candidate sequentially, writes a two-file
manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
in a single forge test run and parses the score JSON from stdout.
Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
When EVAL_MODE=revm, batch-scores every candidate in a generation with
one batch-eval.sh call instead of N sequential fitness.sh processes;
scores are looked up from the JSONL output in the per-candidate loop.
Default remains EVAL_MODE=anvil for backward compatibility.
Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 11:54:41 +00:00
2026-03-11 20:56:19 +00:00
# =============================================================================
# Argument parsing
# =============================================================================
SEED = ""
POPULATION = 10
GENERATIONS = 5
MUTATION_RATE = 2
2026-03-12 22:29:23 +00:00
ELITES = 2
2026-03-14 11:08:04 +00:00
OUTPUT_DIR = "evolved"
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
DIVERSE_SEEDS = false
2026-03-13 20:45:03 +01:00
RUN_ID = ""
2026-03-11 20:56:19 +00:00
while [ [ $# -gt 0 ] ] ; do
case $1 in
--seed) SEED = " $2 " ; shift 2 ; ;
--population) POPULATION = " $2 " ; shift 2 ; ;
--generations) GENERATIONS = " $2 " ; shift 2 ; ;
--mutation-rate) MUTATION_RATE = " $2 " ; shift 2 ; ;
2026-03-12 22:29:23 +00:00
--elites) ELITES = " $2 " ; shift 2 ; ;
2026-03-11 20:56:19 +00:00
--output) OUTPUT_DIR = " $2 " ; shift 2 ; ;
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
--diverse-seeds) DIVERSE_SEEDS = true; shift ; ;
2026-03-13 20:45:03 +01:00
--run-id) RUN_ID = " $2 " ; shift 2 ; ;
2026-03-11 20:56:19 +00:00
*) echo " Unknown option: $1 " >& 2; exit 2 ; ;
esac
done
2026-03-14 11:08:04 +00:00
if [ -z " $SEED " ] ; then echo "Error: --seed required" >& 2; exit 2; fi
if [ ! -f " $SEED " ] ; then echo " Error: seed file not found: $SEED " >& 2; exit 2; fi
2026-03-11 20:56:19 +00:00
# Validate numeric args
for _name_val in " population: $POPULATION " " generations: $GENERATIONS " " mutation-rate: $MUTATION_RATE " ; do
_name = " ${ _name_val %% : * } "
_val = " ${ _name_val ##* : } "
if ! [ [ " $_val " = ~ ^[ 0-9] +$ ] ] || [ " $_val " -lt 1 ] ; then
echo " Error: -- ${ _name } must be a positive integer (got: $_val ) " >& 2
exit 2
fi
done
2026-03-12 22:29:23 +00:00
if ! [ [ " $ELITES " = ~ ^[ 0-9] +$ ] ] ; then
echo " Error: --elites must be a non-negative integer (got: $ELITES ) " >& 2
exit 2
fi
2026-03-11 20:56:19 +00:00
# Canonicalize paths
SEED = " $( cd " $( dirname " $SEED " ) " && pwd ) / $( basename " $SEED " ) "
2026-03-14 11:08:04 +00:00
# Resolve base output dir and create run_NNN subdirectory
mkdir -p " $OUTPUT_DIR "
BASE_DIR = " $( cd " $OUTPUT_DIR " && pwd ) "
# Auto-increment: find the highest run_NNN directory under BASE_DIR and add 1
RUN_NUM = $( python3 - " $BASE_DIR " <<'PYEOF'
import sys, os, re
base = sys.argv[ 1]
max_n = 0
if os.path.isdir( base) :
for name in os.listdir( base) :
2026-03-14 11:27:53 +00:00
m = re.fullmatch( r'run_(\d+)' , name)
2026-03-14 11:08:04 +00:00
if m and os.path.isdir( os.path.join( base, name) ) :
max_n = max( max_n, int( m.group( 1) ) )
print( f"{max_n + 1:03d}" )
PYEOF
)
OUTPUT_DIR = " $BASE_DIR /run_ ${ RUN_NUM } "
2026-03-11 20:56:19 +00:00
mkdir -p " $OUTPUT_DIR "
LOG = " $OUTPUT_DIR /evolution.log "
2026-03-13 20:45:03 +01:00
# Seeds pool — persistent candidate pool across all runs
SEEDS_DIR = " $SCRIPT_DIR /seeds "
POOL_MANIFEST = " $SEEDS_DIR /manifest.jsonl "
ADMISSION_THRESHOLD = 6000000000000000000000 # 6e21 wei
# Validate/auto-compute RUN_ID
if [ -n " $RUN_ID " ] ; then
if ! [ [ " $RUN_ID " = ~ ^[ 0-9] +$ ] ] || [ " $RUN_ID " -lt 1 ] ; then
echo " Error: --run-id must be a positive integer (got: $RUN_ID ) " >& 2
exit 2
fi
RUN_ID = $( printf '%03d' " $RUN_ID " )
else
# Auto-increment: find the highest run ID in the manifest and add 1
if [ -f " $POOL_MANIFEST " ] ; then
RUN_ID = $( python3 - " $POOL_MANIFEST " <<'PYEOF'
import json, sys
max_run = 0
with open( sys.argv[ 1] ) as f:
for line in f:
line = line.strip( )
if not line:
continue
try:
d = json.loads( line)
r = d.get( "run" )
if r is not None:
max_run = max( max_run, int( r) )
except ( json.JSONDecodeError, ValueError, TypeError) :
pass
print( f"{max_run + 1:03d}" )
PYEOF
) || RUN_ID = "001"
else
RUN_ID = "001"
fi
fi
2026-03-11 20:56:19 +00:00
# =============================================================================
# Helpers
# =============================================================================
log( ) {
local msg = " [evolve] $* "
echo " $msg " >& 2
echo " $msg " >> " $LOG "
}
fail( ) {
log " ERROR: $* "
exit 2
}
# Locate a tsx runner (TypeScript executor for mutate-cli.ts).
# Tries: tsx in PATH → local node_modules → npx tsx.
find_tsx_cmd( ) {
if command -v tsx & >/dev/null; then
echo "tsx"
elif [ -x " $SCRIPT_DIR /node_modules/.bin/tsx " ] ; then
echo " $SCRIPT_DIR /node_modules/.bin/tsx "
elif command -v npx & >/dev/null; then
echo "npx tsx"
else
return 1
fi
}
# Run the mutate-cli.ts with the given arguments.
# All mutation operations run from SCRIPT_DIR so relative TS imports resolve.
run_mutate_cli( ) {
( cd " $SCRIPT_DIR " && $TSX_CMD " $MUTATE_CLI " " $@ " )
}
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
# Run the seed-gen-cli.ts with the given arguments.
run_seed_gen_cli( ) {
( cd " $SCRIPT_DIR " && $TSX_CMD " $SEED_GEN_CLI " " $@ " )
}
2026-03-11 20:56:19 +00:00
# Integer min/max/mean via python3 (bash arithmetic overflows on wei values).
py_stats( ) {
2026-03-13 10:02:24 +01:00
python3 -c "import sys; nums = [int(x) for x in sys.stdin.read().split()]; print(min(nums) if nums else 0, max(nums) if nums else 0, round(sum(nums)/len(nums)) if nums else 0)"
2026-03-11 20:56:19 +00:00
}
2026-03-12 22:29:23 +00:00
# Top-N selection: return filepaths of the N highest-scoring candidates (descending).
py_top_n( ) {
local n = " $1 "
local scores_file = " $2 "
python3 - " $n " " $scores_file " <<'PYEOF'
import sys
n = int( sys.argv[ 1] )
entries = [ ]
with open( sys.argv[ 2] ) as f:
for line in f:
parts = line.rstrip( '\n' ) .split( '\t' )
if len( parts) >= 3:
entries.append( ( int( parts[ 1] ) , parts[ 2] ) )
entries.sort( key = lambda x: x[ 0] , reverse = True)
for _, path in entries[ :n] :
print( path)
PYEOF
}
2026-03-11 21:29:14 +00:00
# Tournament selection: given a scores file (one "idx score filepath" per line),
# run k tournaments of size 2 and return winner filepaths (one per line).
2026-03-11 20:56:19 +00:00
py_tournament( ) {
local k = " $1 "
local scores_file = " $2 "
python3 - " $k " " $scores_file " <<'PYEOF'
import sys, random
k = int( sys.argv[ 1] )
entries = [ ]
with open( sys.argv[ 2] ) as f:
for line in f:
2026-03-11 22:06:18 +00:00
parts = line.rstrip( '\n' ) .split( '\t' )
2026-03-11 21:29:14 +00:00
if len( parts) >= 3:
entries.append( ( int( parts[ 0] ) , int( parts[ 1] ) , parts[ 2] ) )
2026-03-11 20:56:19 +00:00
if not entries:
sys.exit( 1)
for _ in range( k) :
a = random.choice( entries)
b = random.choice( entries)
winner = a if a[ 1] >= b[ 1] else b
2026-03-11 21:29:14 +00:00
print( winner[ 2] )
2026-03-11 20:56:19 +00:00
PYEOF
}
# =============================================================================
# Tool checks
# =============================================================================
for _tool in python3 node; do
command -v " $_tool " & >/dev/null || fail " $_tool not found in PATH "
done
[ -f " $FITNESS_SH " ] || fail " fitness.sh not found at $FITNESS_SH "
[ -f " $MUTATE_CLI " ] || fail " mutate-cli.ts not found at $MUTATE_CLI "
[ -x " $FITNESS_SH " ] || chmod +x " $FITNESS_SH "
fix: feat: revm-based fitness evaluator for evolution at scale (#604)
Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.
New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
and emits one {"candidate_id","fitness"} JSON line per candidate.
Skips gracefully when BASE_RPC_URL is unset (CI-safe).
- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
transpiles+compiles each candidate sequentially, writes a two-file
manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
in a single forge test run and parses the score JSON from stdout.
Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
When EVAL_MODE=revm, batch-scores every candidate in a generation with
one batch-eval.sh call instead of N sequential fitness.sh processes;
scores are looked up from the JSONL output in the per-candidate loop.
Default remains EVAL_MODE=anvil for backward compatibility.
Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 11:54:41 +00:00
if [ " $EVAL_MODE " = "revm" ] ; then
[ -f " $BATCH_EVAL_SH " ] || fail " batch-eval.sh not found at $BATCH_EVAL_SH "
[ -x " $BATCH_EVAL_SH " ] || chmod +x " $BATCH_EVAL_SH "
[ -n " ${ BASE_RPC_URL :- } " ] || fail "EVAL_MODE=revm requires BASE_RPC_URL env var (Base network RPC)"
command -v forge & >/dev/null || fail "forge not found in PATH (required for EVAL_MODE=revm)"
elif [ " $EVAL_MODE " != "anvil" ] ; then
fail " Unknown EVAL_MODE ' $EVAL_MODE ' — must be 'anvil' or 'revm' "
fi
2026-03-11 20:56:19 +00:00
TSX_CMD = " $( find_tsx_cmd) " || fail \
"No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH."
# =============================================================================
# Work directory — holds all candidate .push3 files across generations
# =============================================================================
WORK_DIR = " $( mktemp -d) "
cleanup( ) { rm -rf " $WORK_DIR " ; }
trap cleanup EXIT
# =============================================================================
# Log run header
# =============================================================================
log "========================================================"
log " Push3 Evolution — $( date -u '+%Y-%m-%dT%H:%M:%SZ' ) "
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
log " Seed: $SEED "
log " Population: $POPULATION "
log " Generations: $GENERATIONS "
log " Mutation rate: $MUTATION_RATE "
log " Elites: $ELITES "
log " Diverse seeds: $DIVERSE_SEEDS "
2026-03-13 20:45:03 +01:00
log " Run ID: $RUN_ID "
2026-03-14 11:08:04 +00:00
log " Base dir: $BASE_DIR "
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
log " Output: $OUTPUT_DIR "
log " TSX: $TSX_CMD "
log " Eval mode: $EVAL_MODE "
2026-03-11 20:56:19 +00:00
log "========================================================"
# =============================================================================
# Step 1 — Initialize generation 0
#
# N copies of the seed, each independently mutated MUTATION_RATE times.
# =============================================================================
log ""
log "=== Initializing population ==="
GEN_DIR = " $WORK_DIR /gen_0 "
mkdir -p " $GEN_DIR "
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
if [ " $DIVERSE_SEEDS " = "true" ] ; then
2026-03-13 20:45:03 +01:00
# --- Diverse-seeds mode: prefer persistent pool; fall back to seed-gen-cli ---
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
VARIANT_IDX = 0
2026-03-13 20:45:03 +01:00
# Build a random sample list from the pool in one pass (also determines if
# the pool has any usable entries, avoiding a second manifest parse).
POOL_SAMPLE_LIST = " $WORK_DIR /pool_sample.txt "
POOL_COUNT = 0
if [ -f " $POOL_MANIFEST " ] ; then
python3 - " $POOL_MANIFEST " " $SEEDS_DIR " " $POPULATION " > " $POOL_SAMPLE_LIST " <<'PYEOF'
import json, sys, os, random
manifest_path, seeds_dir, n = sys.argv[ 1] , sys.argv[ 2] , int( sys.argv[ 3] )
entries = [ ]
with open( manifest_path) as f:
for line in f:
line = line.strip( )
if not line:
continue
try:
d = json.loads( line)
fpath = os.path.join( seeds_dir, d.get( 'file' , '' ) )
if os.path.exists( fpath) :
entries.append( fpath)
except json.JSONDecodeError:
pass
random.shuffle( entries)
for path in entries[ :n] :
print( path)
PYEOF
POOL_COUNT = $( wc -l < " $POOL_SAMPLE_LIST " 2>/dev/null || echo 0)
fi
if [ " $POOL_COUNT " -gt 0 ] ; then
# --- Pool mode: random sample from the seeds pool ---
log " diverse-seeds: sampling up to $POPULATION candidates from pool ( $POOL_COUNT available) "
2026-03-13 05:21:05 +00:00
2026-03-13 20:45:03 +01:00
while IFS = read -r POOL_FILE && [ " $VARIANT_IDX " -lt " $POPULATION " ] ; do
CAND_FILE = " $GEN_DIR /candidate_ $( printf '%03d' $VARIANT_IDX ) .push3 "
cp " $POOL_FILE " " $CAND_FILE "
printf '0\n' > " ${ CAND_FILE %.push3 } .ops "
VARIANT_IDX = $(( VARIANT_IDX + 1 ))
done < " $POOL_SAMPLE_LIST "
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
2026-03-13 20:45:03 +01:00
log " diverse-seeds: seeded $VARIANT_IDX candidate(s) from pool "
else
# --- Fallback: parametric variants from seed-gen-cli (pool is empty) ---
log " diverse-seeds: pool empty, falling back to seed-gen-cli parametric variants"
[ -f " $SEED_GEN_CLI " ] || fail " seed-gen-cli.ts not found at $SEED_GEN_CLI "
SEED_VARIANTS_DIR = " $WORK_DIR /seed_variants "
SEED_VARIANTS_LIST = " $WORK_DIR /seed_variants_list.txt "
# Run seed-gen-cli as a direct command (not inside <(...)) so its exit code is
# checked by the parent shell and fail() aborts the entire script on error.
# Stderr goes to the log file for diagnostics rather than being discarded.
run_seed_gen_cli --count " $POPULATION " --output-dir " $SEED_VARIANTS_DIR " \
> " $SEED_VARIANTS_LIST " 2>>" $LOG " \
|| fail "seed-gen-cli.ts failed to generate variants"
while IFS = read -r VARIANT_FILE && [ " $VARIANT_IDX " -lt " $POPULATION " ] ; do
CAND_FILE = " $GEN_DIR /candidate_ $( printf '%03d' $VARIANT_IDX ) .push3 "
cp " $VARIANT_FILE " " $CAND_FILE "
printf '0\n' > " ${ CAND_FILE %.push3 } .ops "
VARIANT_IDX = $(( VARIANT_IDX + 1 ))
done < " $SEED_VARIANTS_LIST "
fi
# Fill any remaining slots with mutations of the seed
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
while [ " $VARIANT_IDX " -lt " $POPULATION " ] ; do
CAND_FILE = " $GEN_DIR /candidate_ $( printf '%03d' $VARIANT_IDX ) .push3 "
MUTATED = $( run_mutate_cli mutate " $SEED " " $MUTATION_RATE " ) \
|| fail " Failed to mutate seed for fallback candidate $VARIANT_IDX "
printf '%s\n' " $MUTATED " > " $CAND_FILE "
printf '%d\n' " $MUTATION_RATE " > " ${ CAND_FILE %.push3 } .ops "
VARIANT_IDX = $(( VARIANT_IDX + 1 ))
done
2026-03-13 20:45:03 +01:00
log " Initialized ${ POPULATION } candidates in gen_0 (diverse-seeds, pool= $POOL_COUNT ) "
fix: feat: Push3 evolution — diverse seed population (#638)
Add seed-generator.ts module and seed-gen-cli.ts CLI that produce
parametric Push3 variants for initial population seeding.
Variants systematically cover:
- Staked% thresholds: 80, 85, 88, 91, 94, 97
- Penalty thresholds: 30, 50, 70, 100
- Bull params: 4 presets (aggressive → mild)
- Bear params: 4 presets (standard → very mild)
- Tax distributions: exponential (seed), linear, sqrt
Total combination space: 6×4×4×4×3 = 1152 variants.
selectVariants(n) samples evenly so every axis is represented.
evolve.sh gains --diverse-seeds flag: when set, gen_0 is seeded with
parametric variants instead of N copies of the same mutated seed.
Remaining slots (if population > generated variants) fall back to
mutations of the base seed.
All generated programs pass transpiler stack validation (33 new tests).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 04:48:04 +00:00
else
# --- Default mode: N copies of the seed, each independently mutated ---
for i in $( seq 0 $(( POPULATION - 1 )) ) ; do
CAND_FILE = " $GEN_DIR /candidate_ $( printf '%03d' $i ) .push3 "
MUTATED = $( run_mutate_cli mutate " $SEED " " $MUTATION_RATE " ) \
|| fail " Failed to mutate seed for initial candidate $i "
printf '%s\n' " $MUTATED " > " $CAND_FILE "
printf '%d\n' " $MUTATION_RATE " > " ${ CAND_FILE %.push3 } .ops "
done
log " Initialized ${ POPULATION } candidates in gen_0 "
fi
2026-03-11 20:56:19 +00:00
# =============================================================================
# Step 2 — Evolution loop
# =============================================================================
GLOBAL_BEST_FITNESS = -1
GLOBAL_BEST_GEN = -1
GLOBAL_BEST_CAND = ""
CURRENT_GEN_DIR = " $GEN_DIR "
for gen in $( seq 0 $(( GENERATIONS - 1 )) ) ; do
log ""
log " === Generation $(( gen + 1 )) / $GENERATIONS === "
JSONL_FILE = " $OUTPUT_DIR /generation_ ${ gen } .jsonl "
SCORES_FILE = " $WORK_DIR /scores_gen_ ${ gen } .txt "
# --- a. Score all candidates ---
SCORE_VALUES = ""
CAND_COUNT = 0
fix: feat: revm-based fitness evaluator for evolution at scale (#604)
Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.
New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
and emits one {"candidate_id","fitness"} JSON line per candidate.
Skips gracefully when BASE_RPC_URL is unset (CI-safe).
- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
transpiles+compiles each candidate sequentially, writes a two-file
manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
in a single forge test run and parses the score JSON from stdout.
Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
When EVAL_MODE=revm, batch-scores every candidate in a generation with
one batch-eval.sh call instead of N sequential fitness.sh processes;
scores are looked up from the JSONL output in the per-candidate loop.
Default remains EVAL_MODE=anvil for backward compatibility.
Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 11:54:41 +00:00
# In revm mode, batch-score all candidates in one forge test invocation before
# the per-candidate loop. Scores are written to a temp JSONL file that the
# loop reads with a fast Python lookup.
BATCH_SCORES_FILE = " $WORK_DIR /batch_scores_gen_ ${ gen } .jsonl "
if [ " $EVAL_MODE " = "revm" ] ; then
declare -a _BATCH_FILES = ( )
for _CF in " $CURRENT_GEN_DIR " /candidate_*.push3; do
[ -f " $_CF " ] && _BATCH_FILES += ( " $_CF " )
done
if [ " ${# _BATCH_FILES [@] } " -gt 0 ] ; then
BATCH_EC = 0
bash " $BATCH_EVAL_SH " " ${ _BATCH_FILES [@] } " > " $BATCH_SCORES_FILE " 2>/dev/null \
|| BATCH_EC = $?
if [ " $BATCH_EC " -eq 2 ] ; then
fail "batch-eval.sh reported an infrastructure error (exit 2) — aborting evolution"
fi
log " revm batch scoring complete (exit $BATCH_EC ) "
fi
fi
2026-03-11 20:56:19 +00:00
for CAND_FILE in " $CURRENT_GEN_DIR " /candidate_*.push3; do
[ -f " $CAND_FILE " ] || continue
CAND_IDX = " ${ CAND_FILE ##*candidate_ } "
CAND_IDX = " ${ CAND_IDX %.push3 } "
2026-03-14 04:07:00 +00:00
# Canonical candidate_id format: "candidate_XXX" (matches source filename and batch-eval IDs).
CID = " candidate_ ${ CAND_IDX } "
2026-03-11 20:56:19 +00:00
2026-03-11 21:29:14 +00:00
# Read mutations_applied from sidecar; default 0 if missing.
OPS_FILE = " ${ CAND_FILE %.push3 } .ops "
MUTATIONS_APPLIED = 0
[ -f " $OPS_FILE " ] && MUTATIONS_APPLIED = $( cat " $OPS_FILE " )
2026-03-11 20:56:19 +00:00
SCORE = 0
FITNESS_EC = 0
fix: feat: revm-based fitness evaluator for evolution at scale (#604)
Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.
New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
and emits one {"candidate_id","fitness"} JSON line per candidate.
Skips gracefully when BASE_RPC_URL is unset (CI-safe).
- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
transpiles+compiles each candidate sequentially, writes a two-file
manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
in a single forge test run and parses the score JSON from stdout.
Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
When EVAL_MODE=revm, batch-scores every candidate in a generation with
one batch-eval.sh call instead of N sequential fitness.sh processes;
scores are looked up from the JSONL output in the per-candidate loop.
Default remains EVAL_MODE=anvil for backward compatibility.
Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 11:54:41 +00:00
if [ " $EVAL_MODE " = "revm" ] && [ -f " $BATCH_SCORES_FILE " ] ; then
# Look up pre-computed score from batch-eval.sh output.
2026-03-13 10:02:24 +01:00
# batch-eval uses filename as candidate_id (e.g. "candidate_000")
BATCH_CID = " $( basename " $CAND_FILE " .push3) "
SCORE = $( python3 - " $BATCH_CID " " $BATCH_SCORES_FILE " <<'PYEOF'
fix: feat: revm-based fitness evaluator for evolution at scale (#604)
Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.
New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
and emits one {"candidate_id","fitness"} JSON line per candidate.
Skips gracefully when BASE_RPC_URL is unset (CI-safe).
- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
transpiles+compiles each candidate sequentially, writes a two-file
manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
in a single forge test run and parses the score JSON from stdout.
Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
When EVAL_MODE=revm, batch-scores every candidate in a generation with
one batch-eval.sh call instead of N sequential fitness.sh processes;
scores are looked up from the JSONL output in the per-candidate loop.
Default remains EVAL_MODE=anvil for backward compatibility.
Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 11:54:41 +00:00
import json, sys
cid = sys.argv[ 1]
with open( sys.argv[ 2] ) as f:
for line in f:
try:
d = json.loads( line)
if d.get( "candidate_id" ) = = cid:
print( d[ "fitness" ] )
sys.exit( 0)
except ( json.JSONDecodeError, KeyError) :
pass
print( 0)
PYEOF
) || SCORE = 0
else
# Anvil mode (or revm fallback): score candidate individually.
SCORE = $( bash " $FITNESS_SH " " $CAND_FILE " 2>/dev/null) || FITNESS_EC = $?
# Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
if [ " $FITNESS_EC " -eq 2 ] ; then
fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
fi
2026-03-11 21:29:14 +00:00
fi
# Validate that score is a non-negative integer; treat any other output as invalid.
fix: feat: revm-based fitness evaluator for evolution at scale (#604)
Replace per-candidate Anvil+forge-script pipeline with in-process EVM
execution using Foundry's native revm backend, achieving 10-100× speedup
for evolutionary search at scale.
New files:
- onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once,
deploys the full KRAIKEN stack, then for each candidate uses vm.etch to
inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all
attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead),
and emits one {"candidate_id","fitness"} JSON line per candidate.
Skips gracefully when BASE_RPC_URL is unset (CI-safe).
- tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that
transpiles+compiles each candidate sequentially, writes a two-file
manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol
in a single forge test run and parses the score JSON from stdout.
Modified:
- tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm).
When EVAL_MODE=revm, batch-scores every candidate in a generation with
one batch-eval.sh call instead of N sequential fitness.sh processes;
scores are looked up from the JSONL output in the per-candidate loop.
Default remains EVAL_MODE=anvil for backward compatibility.
Key design decisions:
- Per-candidate Solidity compilation is unavoidable (each Push3 candidate
produces different Solidity); the speedup is in the evaluation phase.
- vm.snapshot/revertTo in forge test are O(1) memory operations (true
revm), not RPC calls — this is the core speedup vs Anvil.
- recenterAccess is set in bootstrap so TWAP stability checks are bypassed
during attack sequences (mirrors the existing fitness.sh bootstrap).
- Test skips cleanly when BASE_RPC_URL is absent, keeping CI green.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 11:54:41 +00:00
if ! [ [ " $SCORE " = ~ ^[ 0-9] +$ ] ] ; then
log " $CID : invalid/missing score, using 0 "
2026-03-11 20:56:19 +00:00
SCORE = 0
else
log " $CID : fitness= $SCORE "
fi
2026-03-11 21:29:14 +00:00
# Append to JSONL — use the actual operations recorded for this candidate.
2026-03-11 20:56:19 +00:00
printf '{"candidate_id":"%s","fitness":%s,"mutations_applied":%d}\n' \
2026-03-11 21:29:14 +00:00
" $CID " " $SCORE " " $MUTATIONS_APPLIED " >> " $JSONL_FILE "
2026-03-11 20:56:19 +00:00
2026-03-11 22:06:18 +00:00
# Record index, score, and filepath for selection (tab-delimited so paths with spaces are safe).
printf '%d\t%s\t%s\n' " $CAND_COUNT " " $SCORE " " $CAND_FILE " >> " $SCORES_FILE "
2026-03-11 20:56:19 +00:00
SCORE_VALUES = " $SCORE_VALUES $SCORE "
CAND_COUNT = $(( CAND_COUNT + 1 ))
done
if [ " $CAND_COUNT " -eq 0 ] ; then
fail " No candidates found in $CURRENT_GEN_DIR "
fi
# --- b. Log generation stats ---
read -r MIN MAX MEAN < <( printf '%s' " $SCORE_VALUES " | py_stats)
log " Stats: min= $MIN max= $MAX mean= $MEAN candidates= $CAND_COUNT "
2026-03-11 21:29:14 +00:00
# Find best candidate for this generation (filepath returned directly).
BEST_FILE_THIS_GEN = $( python3 - " $SCORES_FILE " <<'PYEOF'
2026-03-11 20:56:19 +00:00
import sys
entries = [ ]
2026-03-11 21:29:14 +00:00
with open( sys.argv[ 1] ) as f:
2026-03-11 20:56:19 +00:00
for line in f:
2026-03-11 22:06:18 +00:00
parts = line.rstrip( '\n' ) .split( '\t' )
2026-03-11 21:29:14 +00:00
if len( parts) >= 3:
entries.append( ( int( parts[ 1] ) , parts[ 2] ) )
if not entries:
sys.exit( 1)
print( max( entries, key = lambda x: x[ 0] ) [ 1] )
PYEOF
) || fail " Could not determine best candidate from $SCORES_FILE "
2026-03-11 20:56:19 +00:00
2026-03-13 10:02:24 +01:00
if python3 -c "import sys; sys.exit(0 if int(sys.argv[1]) > int(sys.argv[2]) else 1)" " $MAX " " $GLOBAL_BEST_FITNESS " || [ " $GLOBAL_BEST_FITNESS " = "-1" ] ; then
2026-03-11 20:56:19 +00:00
GLOBAL_BEST_FITNESS = " $MAX "
GLOBAL_BEST_GEN = " $gen "
GLOBAL_BEST_CAND = " $BEST_FILE_THIS_GEN "
2026-03-11 22:06:18 +00:00
log " New global best: gen= $gen fitness= $GLOBAL_BEST_FITNESS file= $( basename " $BEST_FILE_THIS_GEN " ) "
2026-03-11 20:56:19 +00:00
fi
# Skip next-generation creation after the final generation
[ " $gen " -eq " $(( GENERATIONS - 1 )) " ] && break
# --- c. Tournament selection (k = population / 2) ---
K = $(( POPULATION / 2 ))
[ " $K " -lt 1 ] && K = 1
SURVIVOR_FILES = ( )
2026-03-11 21:29:14 +00:00
while IFS = read -r WIN_FILE; do
2026-03-11 20:56:19 +00:00
SURVIVOR_FILES += ( " $WIN_FILE " )
done < <( py_tournament " $K " " $SCORES_FILE " )
log " Selected ${# SURVIVOR_FILES [@] } survivors via tournament "
2026-03-12 22:29:23 +00:00
# --- d/e. Generate next population (elitism + offspring) ---
2026-03-11 20:56:19 +00:00
NEXT_GEN_DIR = " $WORK_DIR /gen_ $(( gen + 1 )) "
mkdir -p " $NEXT_GEN_DIR "
NEXT_IDX = 0
2026-03-12 22:29:23 +00:00
# --- d. Elitism: copy top ELITES candidates unchanged ---
if [ " $ELITES " -gt 0 ] ; then
ELITE_FILES = ( )
while IFS = read -r ELITE_FILE; do
[ -f " $ELITE_FILE " ] && ELITE_FILES += ( " $ELITE_FILE " )
done < <( py_top_n " $ELITES " " $SCORES_FILE " )
for ELITE_FILE in " ${ ELITE_FILES [@] } " ; do
DEST = " $NEXT_GEN_DIR /candidate_ $( printf '%03d' $NEXT_IDX ) .push3 "
cp " $ELITE_FILE " " $DEST "
printf '0\n' > " ${ DEST %.push3 } .ops "
NEXT_IDX = $(( NEXT_IDX + 1 ))
done
log " Elitism: carried over ${# ELITE_FILES [@] } top candidate(s) unchanged "
fi
# --- e. Fill remaining slots with mutation and crossover offspring ---
NON_ELITE = $(( POPULATION - NEXT_IDX))
HALF = $(( NON_ELITE / 2 ))
# First half of remaining: mutate random survivors
2026-03-11 21:29:14 +00:00
for _i in $( seq 1 $HALF ) ; do
2026-03-11 20:56:19 +00:00
SUR = " ${ SURVIVOR_FILES [ $(( RANDOM % ${# SURVIVOR_FILES [@] } )) ] } "
DEST = " $NEXT_GEN_DIR /candidate_ $( printf '%03d' $NEXT_IDX ) .push3 "
if MUTATED = $( run_mutate_cli mutate " $SUR " " $MUTATION_RATE " 2>/dev/null) ; then
printf '%s\n' " $MUTATED " > " $DEST "
2026-03-11 21:29:14 +00:00
printf '%d\n' " $MUTATION_RATE " > " ${ DEST %.push3 } .ops "
2026-03-11 20:56:19 +00:00
else
# Fallback: copy the survivor as-is to keep population size stable
cp " $SUR " " $DEST "
2026-03-11 21:29:14 +00:00
printf '0\n' > " ${ DEST %.push3 } .ops "
2026-03-11 20:56:19 +00:00
fi
NEXT_IDX = $(( NEXT_IDX + 1 ))
done
2026-03-12 22:29:23 +00:00
# Second half of remaining: crossover random survivor pairs
REMAINING = $(( POPULATION - NEXT_IDX))
2026-03-11 21:29:14 +00:00
for _i in $( seq 1 $REMAINING ) ; do
2026-03-11 20:56:19 +00:00
SUR_A = " ${ SURVIVOR_FILES [ $(( RANDOM % ${# SURVIVOR_FILES [@] } )) ] } "
SUR_B = " ${ SURVIVOR_FILES [ $(( RANDOM % ${# SURVIVOR_FILES [@] } )) ] } "
DEST = " $NEXT_GEN_DIR /candidate_ $( printf '%03d' $NEXT_IDX ) .push3 "
if CROSSED = $( run_mutate_cli crossover " $SUR_A " " $SUR_B " 2>/dev/null) ; then
printf '%s\n' " $CROSSED " > " $DEST "
2026-03-11 21:29:14 +00:00
printf '0\n' > " ${ DEST %.push3 } .ops "
2026-03-11 20:56:19 +00:00
else
# Fallback: mutate one survivor
if MUTATED = $( run_mutate_cli mutate " $SUR_A " " $MUTATION_RATE " 2>/dev/null) ; then
printf '%s\n' " $MUTATED " > " $DEST "
2026-03-11 21:29:14 +00:00
printf '%d\n' " $MUTATION_RATE " > " ${ DEST %.push3 } .ops "
2026-03-11 20:56:19 +00:00
else
cp " $SUR_A " " $DEST "
2026-03-11 21:29:14 +00:00
printf '0\n' > " ${ DEST %.push3 } .ops "
2026-03-11 20:56:19 +00:00
fi
fi
NEXT_IDX = $(( NEXT_IDX + 1 ))
done
log " Generated ${ NEXT_IDX } candidates for generation $(( gen + 1 )) "
CURRENT_GEN_DIR = " $NEXT_GEN_DIR "
done
# =============================================================================
# Step 3 — Output best candidate
# =============================================================================
if [ -z " $GLOBAL_BEST_CAND " ] || [ ! -f " $GLOBAL_BEST_CAND " ] ; then
fail "No valid best candidate recorded — evolution produced no scorable output"
fi
BEST_OUTPUT = " $OUTPUT_DIR /best.push3 "
cp " $GLOBAL_BEST_CAND " " $BEST_OUTPUT "
log ""
log " Best candidate → $BEST_OUTPUT "
log " Fitness: $GLOBAL_BEST_FITNESS (generation $GLOBAL_BEST_GEN ) "
# =============================================================================
# Step 4 — Diff: original vs evolved constants
# =============================================================================
DIFF_OUTPUT = " $OUTPUT_DIR /diff.txt "
python3 - " $SEED " " $BEST_OUTPUT " > " $DIFF_OUTPUT " <<'PYEOF'
import sys, re
def extract_ints( path) :
"" "Extract all large integer literals (≥6 digits) from a Push3 file." ""
text = open( path) .read( )
text = re.sub( r';;[^\n]*' , '' , text) # strip comments
return [ int( m) for m in re.findall( r'\b(\d{6,})\b' , text) ]
seed_path, best_path = sys.argv[ 1] , sys.argv[ 2]
orig = extract_ints( seed_path)
best = extract_ints( best_path)
print( f"=== Push3 Evolution Diff ===" )
print( f"Seed: {seed_path}" )
print( f"Best: {best_path}" )
print( )
changed = 0
for i, ( o, b) in enumerate( zip( orig, best) ) :
if o != b:
pct = ( b - o) / o * 100 if o != 0 else float( 'inf' )
print( f" const[{i:3d}]: {o:>25d} → {b:>25d} (Δ={b - o:+d}, {pct:+.2f}%)" )
changed += 1
if len( orig) != len( best) :
added = len( best) - len( orig)
if added > 0:
for i, val in enumerate( best[ len( orig) :] ) :
print( f" const[{len(orig) + i:3d}]: {'(new)':>25s} → {val:>25d}" )
else :
print( f" ({-added} constant(s) removed from end)" )
print( )
if changed = = 0 and len( orig) = = len( best) :
print( "No constant changes — evolution applied structural mutations only." )
else :
total = min( len( orig) , len( best) )
print( f"Summary: {changed} of {total} constant(s) changed." )
PYEOF
log " Diff written to $DIFF_OUTPUT "
log ""
cat " $DIFF_OUTPUT " >& 2
log "========================================================"
log "Evolution complete."
log " Generations run: $GENERATIONS "
log " Best fitness: $GLOBAL_BEST_FITNESS "
log " Best from gen: $GLOBAL_BEST_GEN "
log " Output directory: $OUTPUT_DIR "
log "========================================================"
2026-03-13 20:45:03 +01:00
# =============================================================================
# Step 5 — Seed pool admission
#
# Scan all generation JSONL files for candidates that scored above the
# admission threshold (6e21). Deduplicate by Push3 content hash against the
# existing pool. Admit qualifying candidates into seeds/ and rewrite
# manifest.jsonl, keeping at most the top-100 by fitness.
# =============================================================================
log ""
log " === Seed pool admission (run= $RUN_ID , threshold= $ADMISSION_THRESHOLD ) === "
mkdir -p " $SEEDS_DIR "
_ADMISSION_OUT = " $WORK_DIR /admission_output.txt "
_ADMISSION_RC = 0
python3 - " $OUTPUT_DIR " " $WORK_DIR " " $SEEDS_DIR " \
" $ADMISSION_THRESHOLD " " $RUN_ID " " $( date -u '+%Y-%m-%d' ) " \
> " $_ADMISSION_OUT " 2>& 1 <<'PYEOF' || _ADMISSION_RC = $?
import json, sys, os, hashlib, shutil, tempfile
output_dir, work_dir, seeds_dir = sys.argv[ 1] , sys.argv[ 2] , sys.argv[ 3]
threshold = int( sys.argv[ 4] )
run_id = sys.argv[ 5]
today = sys.argv[ 6]
MAX_EVOLVED = 100 # cap applies to evolved entries only; hand-written are always pinned
manifest_path = os.path.join( seeds_dir, 'manifest.jsonl' )
# ── 1. Read existing manifest ─────────────────────────────────────────────────
existing = [ ]
if os.path.exists( manifest_path) :
with open( manifest_path) as f:
for line in f:
line = line.strip( )
if line:
try:
existing.append( json.loads( line) )
except json.JSONDecodeError:
pass
# ── 2. Hash existing pool files for deduplication ────────────────────────────
def file_hash( path) :
with open( path, 'rb' ) as fh:
return hashlib.sha256( fh.read( ) ) .hexdigest( )
existing_hashes = set( )
for entry in existing:
fpath = os.path.join( seeds_dir, entry.get( 'file' , '' ) )
if os.path.exists( fpath) :
existing_hashes.add( file_hash( fpath) )
# ── 3. Collect qualifying candidates from generation JSONL files ──────────────
qualifying = [ ] # (fitness, push3_path, gen_idx, cand_str)
for fname in sorted( os.listdir( output_dir) ) :
if not ( fname.startswith( 'generation_' ) and fname.endswith( '.jsonl' ) ) :
continue
try:
2026-03-14 04:27:59 +00:00
gen_idx = int( fname[ len( 'generation_' ) :-len( '.jsonl' ) ] ) # validate integer suffix
2026-03-13 20:45:03 +01:00
except ValueError:
continue
with open( os.path.join( output_dir, fname) ) as f:
for line in f:
try:
d = json.loads( line)
cid = d.get( 'candidate_id' , '' )
fitness = int( d.get( 'fitness' , 0) )
if fitness < threshold:
continue
2026-03-14 04:27:59 +00:00
# cid format: "candidate_XXX" (gen_idx derived from enclosing filename)
if not cid.startswith( 'candidate_' ) :
2026-03-13 20:45:03 +01:00
continue
2026-03-14 04:27:59 +00:00
cand_str = cid[ len( 'candidate_' ) :] # numeric suffix, e.g. "001"
push3_path = os.path.join(
2026-03-13 20:45:03 +01:00
work_dir, f'gen_{gen_idx}' ,
f'candidate_{int(cand_str):03d}.push3'
)
if os.path.exists( push3_path) :
qualifying.append( ( fitness, push3_path, gen_idx, cand_str) )
except ( json.JSONDecodeError, ValueError, TypeError, AttributeError) :
pass
qualifying.sort( key = lambda x: x[ 0] , reverse = True)
# ── 4. Deduplicate and assign filenames (resolve --run-id reuse collisions) ───
new_items = [ ] # (fitness, push3_path, manifest_entry)
seen = set( existing_hashes)
for fitness, push3_path, gen_idx, cand_str in qualifying:
h = file_hash( push3_path)
if h in seen:
continue
seen.add( h)
# Canonical name: run{run_id}_gen{gen_idx:03d}_c{cand_str}.push3
# If a different file already occupies that name (same run-id reused), add
# a counter suffix (_r2, _r3, …) until we find an unused or same-content slot.
base = f'run{run_id}_gen{gen_idx:03d}_c{cand_str}'
filename = f'{base}.push3'
dest = os.path.join( seeds_dir, filename)
if os.path.exists( dest) and file_hash( dest) != h:
counter = 2
while True:
filename = f'{base}_r{counter}.push3'
dest = os.path.join( seeds_dir, filename)
if not os.path.exists( dest) or file_hash( dest) = = h:
break
counter += 1
entry = {
'file' : filename,
'fitness' : fitness,
'origin' : 'evolved' ,
'run' : run_id,
'generation' : gen_idx,
'date' : today,
}
new_items.append( ( fitness, push3_path, entry) )
if not new_items:
print( f'No new qualifying candidates from run {run_id} '
f'(threshold={threshold}, scanned {len(qualifying)} above-threshold hits)' )
sys.exit( 0)
# ── 5. Separate pinned (hand-written) from evolved; top-100 cap on evolved only
2026-03-14 01:08:13 +00:00
#
# NOTE: raw fitness values are only comparable within the same evaluation run.
# Entries with fitness_flags='token_value_inflation' (or other flags) are ranked
# as fitness=0 so that inflated scores do not bias pool admission or eviction.
def effective_fitness( entry) :
flags = entry.get( 'fitness_flags' ) or ''
if 'token_value_inflation' in flags:
return 0
return int( entry.get( 'fitness' ) or 0)
pinned = [ ( effective_fitness( e) , e, None) for e in existing
2026-03-13 20:45:03 +01:00
if e.get( 'origin' ) != 'evolved' ]
2026-03-14 01:08:13 +00:00
evolved = [ ( effective_fitness( e) , e, None) for e in existing
2026-03-13 20:45:03 +01:00
if e.get( 'origin' ) = = 'evolved' ]
for fitness, push3_path, entry in new_items:
evolved.append( ( fitness, entry, push3_path) )
evolved.sort( key = lambda x: x[ 0] , reverse = True)
admitted_evolved = evolved[ :MAX_EVOLVED]
evicted_evolved = evolved[ MAX_EVOLVED:]
# ── 6. Copy admitted new files; remove evicted evolved files ─────────────────
admitted_count = 0
for _, entry, src_path in admitted_evolved:
if src_path is not None: # new candidate
dest = os.path.join( seeds_dir, entry[ 'file' ] )
shutil.copy2( src_path, dest)
print( f' admitted: {entry["file"]} fitness={entry["fitness"]}' )
admitted_count += 1
for _, entry, src_path in evicted_evolved:
if src_path is not None: # rejected before being copied
print( f' rejected (below pool floor): {entry["file"]} fitness={entry["fitness"]}' )
else : # existing evolved entry pushed out
fpath = os.path.join( seeds_dir, entry.get( 'file' , '' ) )
if os.path.exists( fpath) :
os.remove( fpath)
print( f' evicted from pool: {entry["file"]} fitness={entry["fitness"]}' )
# Warn if any pinned (hand-written) entry ranks below the current pool floor
if evicted_evolved and pinned:
pool_floor = evicted_evolved[ 0] [ 0]
for fit, entry, _ in pinned:
if fit <= pool_floor:
print( f' WARNING: pinned seed "{entry.get("file")}" (fitness={fit}) '
f'ranks below evolved pool floor ({pool_floor}) — kept in manifest regardless' )
# ── 7. Rewrite manifest.jsonl atomically via temp-file + rename ──────────────
admitted = admitted_evolved + pinned
admitted.sort( key = lambda x: x[ 0] , reverse = True)
manifest_dir = os.path.dirname( manifest_path)
with tempfile.NamedTemporaryFile( 'w' , dir = manifest_dir, delete = False, suffix = '.tmp' ) as tmp:
tmp_path = tmp.name
for _, entry, _ in admitted:
tmp.write( json.dumps( entry) + '\n' )
os.replace( tmp_path, manifest_path)
print( f'Pool updated: {len(admitted)} entries total '
f'({len(admitted_evolved)} evolved + {len(pinned)} pinned), '
f'+{admitted_count} from run {run_id}' )
PYEOF
while IFS = read -r _line; do log " $_line " ; done < " $_ADMISSION_OUT "
if [ " $_ADMISSION_RC " -ne 0 ] ; then
log " WARNING: seed pool admission failed (exit $_ADMISSION_RC ) — pool unchanged "
fi