2026-03-19 11:25:01 +00:00
# formulas/run-evolution.toml
#
# Push3 optimizer evolution pipeline — evaluate seed pool, evolve a population
# of candidates, admit survivors back to the pool, deliver champions via PR.
#
# Type: act. Produces git artifacts (new .push3 champions + updated
# manifest.jsonl via PR to main; evidence file committed to main).
#
# Depends on: #973 (evidence/evolution/ directory structure)
[ formula ]
id = "run-evolution"
name = "Push3 Optimizer Evolution"
description = "Evaluate seed pool, evolve Push3 optimizer population, admit survivors, deliver champions via PR."
type = "act"
# "sense" → read-only, produces metrics only
# "act" → produces git artifacts (cf. run-red-team, run-evolution)
depends_on = [ 973 ]
# ── Inputs ─────────────────────────────────────────────────────────────────────
[ inputs . seed ]
type = "string"
required = false
default = "tools/push3-evolution/seeds/optimizer_v3.push3"
description = "Starting seed .push3 file (passed as --seed to evolve.sh). Serves as the fallback mutation source when the pool does not fill the full population."
[ inputs . population ]
type = "integer"
required = false
default = 10
description = "Number of candidates per generation (--population)."
[ inputs . generations ]
type = "integer"
required = false
default = 5
description = "Number of evolution generations to run (--generations)."
[ inputs . mutation_rate ]
type = "integer"
required = false
default = 2
description = "Mutations applied per candidate per generation (--mutation-rate)."
[ inputs . elites ]
type = "integer"
required = false
default = 2
description = "Top-scoring candidates carried forward unchanged each generation (--elites)."
[ inputs . base_rpc_url ]
type = "string"
required = true
description = "" "
Base network RPC endpoint forwarded as BASE_RPC_URL to both evaluate-seeds . sh
and evolve . sh . Required for the revm evaluator ( default EVAL_MODE ) .
Example : https : / / mainnet . base . org or a fork URL from a running Anvil instance .
"" "
[ inputs . run_id ]
type = "integer"
required = false
description = "" "
Override the run ID used when naming candidates admitted to the seed pool
( e . g . run009_gen2_c005 . push3 ) . Auto-incremented from the highest existing
run in manifest . jsonl when omitted ( recommended ) .
"" "
[ inputs . attack_dir ]
type = "string"
required = false
default = "onchain/script/backtesting/attacks"
description = "" "
Directory of . jsonl adversarial attack scenarios . Intended as an adversarial
fitness input — candidates scored against these patterns in addition to the
revm fitness metric . Not yet forwarded to evolve . sh ; documented here as a
forward spec .
"" "
status = "planned"
# ── Execution ──────────────────────────────────────────────────────────────────
#
# Step 0 — evaluate-seeds.sh — runs before the main evolution loop.
# Scores any manifest.jsonl entries with fitness: null so the pool
# sampler has real fitness values when selecting gen_0 candidates.
#
# Steps 1-5 — evolve.sh — owns the full evolution lifecycle:
# 1. Initialise population: random sample from seed pool (--diverse-seeds).
# 2. Score candidates via revm batch evaluator (batch-eval.sh).
# 3. Tournament-select survivors; apply elitism + mutation / crossover.
# 4. Repeat for N generations; track global best.
# 5. Admit candidates above threshold (6e21 wei) into seeds/; rewrite manifest.
#
# evolve.sh always passes --diverse-seeds so gen_0 inherits pool diversity.
# --run-id is omitted to let evolve.sh auto-increment from manifest.jsonl.
[ execution ]
pre_script = "tools/push3-evolution/evaluate-seeds.sh"
pre_invocation = "BASE_RPC_URL={base_rpc_url} bash tools/push3-evolution/evaluate-seeds.sh"
script = "tools/push3-evolution/evolve.sh"
invocation = "BASE_RPC_URL={base_rpc_url} bash tools/push3-evolution/evolve.sh --seed {seed} --population {population} --generations {generations} --mutation-rate {mutation_rate} --elites {elites} --output tmp/evolution --diverse-seeds"
# Exit codes propagated by evolve.sh:
# 0 evolution complete; best candidate found and pool admission attempted
# 2 infrastructure error (RPC unreachable, missing tool, revm eval failed)
# ── Steps ──────────────────────────────────────────────────────────────────────
[ [ steps ] ]
id = "evaluate-seeds"
description = "" "
Score manifest entries with fitness : null before the evolution loop begins .
tools / push3-evolution / evaluate-seeds . sh :
- Reads tools / push3-evolution / seeds / manifest . jsonl .
- For every entry where fitness is null , runs fitness . sh against the
corresponding . push3 file and records the numeric score .
- Rewrites manifest . jsonl atomically ( temp-file rename ) .
- Exits 0 when nothing to do ( idempotent ; safe to re-run ) .
- Exits 2 on infrastructure error ( eval stack unreachable ) .
Primary targets : LLM-generated seeds ( origin = llm ) and evolved entries whose
fitness was nulled due to scoring inflation ( fitness_flags : token_value_inflation ,
processExecIf_fix ) . Real fitness values allow --diverse-seeds to weight the
gen_0 sample correctly .
"" "
script = "tools/push3-evolution/evaluate-seeds.sh"
[ [ steps ] ]
id = "evolve"
description = "" "
Run the outer evolutionary loop via tools / push3-evolution / evolve . sh .
Initialisation ( gen_0 ) :
A random sample of up to { population } candidates is drawn from the seed pool
( tools / push3-evolution / seeds / ) ; any shortfall is filled by mutating { seed } .
Seeds with unevaluated fitness ( null ) are included in the sample with equal
probability — evaluate-seeds ( step 0 ) should have resolved most of these .
Per-generation loop ( { generations } iterations ) :
a . Score all candidates in a single forge test invocation via
tools / push3-evolution / revm-evaluator / batch-eval . sh ( EVAL_MODE = revm ) .
Falls back to per-candidate fitness . sh ( EVAL_MODE = anvil ) if revm is
unavailable .
b . Log generation stats : min / max / mean fitness , best candidate file .
c . Tournament-select survivors ( k = population / 2 ) .
d . Elitism : carry the top { elites } candidates forward unchanged .
e . Fill remaining slots : mutate random survivors ( first half ) and apply
pairwise crossover ( second half ) ; fall back to copy on failure .
Output per run ( tmp / evolution / run_NNN / ) :
generation_0 . jsonl … generation_N . jsonl per-candidate fitness records
best . push3 global champion
diff . txt constant delta vs seed
evolution . log full run transcript
Pool admission ( after final generation ) :
Candidates scoring above 6 e21 wei are deduplicated by content hash and
admitted to tools / push3-evolution / seeds / , named run { NNN } _gen { G } _c { C } . push3 .
manifest . jsonl is rewritten atomically ; the evolved pool is capped at 100
entries by fitness rank ( hand-written / LLM seeds are always pinned ) .
"" "
script = "tools/push3-evolution/evolve.sh"
output_dir = "tmp/evolution"
[ [ steps ] ]
id = "score-attacks"
description = "" "
[ Planned ] Score the champion against known adversarial attack scenarios in
{ attack_dir } / * . jsonl via onchain / script / backtesting / AttackRunner . s . sol .
For each attack file :
- Replay the op sequence against a fresh Anvil snapshot .
- Record LM total ETH before and after .
- Emit one fitness adjustment : penalise the candidate ' s score if the
attack succeeds ( floor broken ) , reward if the floor holds .
Results feed back into the adversarial fitness component — candidates that
survive all known attacks rank higher in the evidence record .
Skipped when { attack_dir } is empty or AttackRunner is unavailable .
"" "
status = "planned"
attack_source = "{attack_dir}/*.jsonl"
forge_script = "onchain/script/backtesting/AttackRunner.s.sol"
[ [ steps ] ]
id = "collect"
description = "" "
Aggregate evolve . sh outputs into evidence / evolution / { date } . json .
Reads :
- tmp / evolution / run_NNN / generation_N . jsonl per-generation fitness records
- tmp / evolution / run_NNN / best . push3 champion file
- tools / push3-evolution / seeds / manifest . jsonl admission results
Writes evidence / evolution / { date } . json conforming to the schema in
evidence / README . md ## Schema: evolution/YYYY-MM-DD.json.
Verdict : "improved" if best_fitness > best seed fitness in manifest before
the run ; "no_improvement" otherwise .
"" "
output = "evidence/evolution/{date}.json"
schema = "evidence/README.md"
[ [ steps ] ]
2026-03-21 12:47:59 +00:00
id = "cleanup"
2026-03-19 11:25:01 +00:00
description = "" "
2026-03-21 12:47:59 +00:00
Remove intermediate per-generation candidate files that are not part of the
final results . Only the following files are retained after this step :
tmp / evolution / run_NNN / best . push3 global champion
tmp / evolution / run_NNN / diff . txt constant delta vs seed
tmp / evolution / run_NNN / evolution . log full run transcript
tools / push3-evolution / seeds / run { NNN } _ * . push3
top-N newly admitted seeds
( ≤ elites per generation )
Files removed :
tmp / evolution / run_NNN / generation_ * . jsonl per-candidate fitness records
( already aggregated into evidence )
tmp / evolution / run_NNN / candidate_ * . push3 intermediate per-generation
candidates that are not elites
Rationale : the evolution box reached 91 % disk utilisation in run #1025 because
these intermediate files were never cleaned up . Aggregated fitness data is
preserved in evidence / evolution / { date } . json ; the per-candidate . push3 files for
non-elite generations are not needed once the evidence file is written .
"" "
2026-03-19 11:25:01 +00:00
2026-03-21 12:47:59 +00:00
[ [ steps ] ]
id = "deliver"
description = "" "
Commit results to a branch , push , open PR , then post summary comment .
ORDERING IS MANDATORY — each sub-step must complete before the next begins .
Do NOT post to the issue before the PR URL is available .
1 . CLEAN GIT STATE
Run ` git checkout -- . ` to discard any working-tree modifications that are
NOT part of the evolution results ( e . g . . sol files left over from a prior
session , scratch files ) . Only stage files that belong to this run :
- evidence / evolution / { date } . json
2026-03-19 11:25:01 +00:00
- tools / push3-evolution / seeds / evo_run { NNN } _champion . push3
2026-03-21 12:47:59 +00:00
- tools / push3-evolution / seeds / manifest . jsonl
Verify ` git diff --check ` passes before committing .
2 . COMMIT TO BRANCH
Create branch evidence / evolution-run- { run_id } from master .
Commit the staged result files with message :
"evo: run{NNN} results — fitness={best_fitness}"
The commit MUST include all three files above .
3 . PUSH AND CREATE PR
Push the branch to origin .
Open a Codeberg PR targeting master :
Title : "evo: run{NNN} champion — fitness={best_fitness}"
Body : generation-by-generation table ( gen , best , mean , worst fitness ) ,
2026-03-19 11:25:01 +00:00
top-3 admitted candidates with fitness scores , constant diff vs
seed ( from diff . txt ) , link to evidence file .
2026-03-21 12:47:59 +00:00
If ` git push ` or PR creation fails :
a . Post an error comment to the originating issue with the failure reason
and the path of the local evidence file .
b . Leave the issue OPEN .
c . Exit with a non-zero status — do NOT proceed to step 4 .
4 . POST SUMMARY COMMENT ( only after PR URL is confirmed )
Post a comment to the originating issue containing :
2026-03-19 11:25:01 +00:00
- Verdict ( improved / no_improvement ) .
- Best fitness achieved and which generation it was found in .
- Admission count : N candidates added to seed pool .
2026-03-21 12:47:59 +00:00
- Link to the champion PR ( required — do not post without it ) .
- Link to evidence file committed in the PR .
- If no_improvement : best fitness achieved and seed pool size .
Do NOT close the issue in this step ; closing is the orchestrator ' s
responsibility once the PR is merged .
2026-03-19 11:25:01 +00:00
"" "
# ── Products ───────────────────────────────────────────────────────────────────
[ products . evidence_file ]
path = "evidence/evolution/{date}.json"
2026-03-21 12:47:59 +00:00
delivery = "PR to main (same PR as champion_files, on branch evidence/evolution-run-{run_id})"
2026-03-19 11:25:01 +00:00
schema = "evidence/README.md" # see ## Schema: evolution/YYYY-MM-DD.json
[ products . champion_files ]
path = "tools/push3-evolution/seeds/evo_run{NNN}_champion.push3"
# {NNN} is the auto-incremented run ID assigned by evolve.sh at runtime.
delivery = "PR to main"
note = "Only created when at least one candidate exceeds the admission threshold (6e21 wei)."
[ products . manifest ]
path = "tools/push3-evolution/seeds/manifest.jsonl"
delivery = "PR to main (same PR as champion_files)"
note = "Updated with newly admitted entries and fitness scores from evaluate-seeds."
[ products . issue_comment ]
2026-03-21 12:47:59 +00:00
delivery = "post to originating issue AFTER PR is created and URL is confirmed"
content = "verdict (improved/no_improvement), best fitness, generation found, admission count, link to champion PR (mandatory), link to evidence file"
on_pr_failure = "post error comment with failure reason and local evidence path; leave issue OPEN; do not close"
on_run_failure = "include best fitness achieved, last generation completed, full log available in tmp/evolution/run_NNN/evolution.log; do not close issue"
ordering_note = "The comment MUST NOT be posted before the PR URL exists. Closing the issue is the orchestrator's responsibility after PR merge, not this formula's."
2026-03-19 11:25:01 +00:00
# ── Resources ──────────────────────────────────────────────────────────────────
[ resources ]
profile = "heavy"
compute = "CPU + RAM intensive — transpile + compile + deploy + revm eval per candidate"
rpc = "Base network RPC (BASE_RPC_URL) for revm fork; or Anvil (EVAL_MODE=anvil)"
concurrency = "exclusive — revm evaluator and optional Anvil share port 8545 with run-holdout and run-red-team"
# ── Notes ──────────────────────────────────────────────────────────────────────
[ notes ]
no_uups_deployment = "" "
The evolution pipeline produces Push3 candidate files only — no UUPS proxy
deployment step is wired . Candidates are scored in simulation ( revm or Anvil )
and admitted to the seed pool for future runs . Deployment to a live chain is
out of scope until the champion passes holdout and red-team gates .
"" "
eval_mode = "" "
Default EVAL_MODE is revm ( batch-eval . sh ) : all candidates in a generation are
scored in a single forge test invocation against a Base fork , 10 -100 × faster
than per-candidate Anvil . Set EVAL_MODE = anvil to fall back to fitness . sh
( slower , but does not require BASE_RPC_URL if Anvil is already running ) .
Gas limit : revm evaluator runs at ~ 25 candidates × 100 trades per batch .
For larger populations , increase the batch budget in batch-eval . sh .
"" "
adversarial_fitness = "" "
Adversarial fitness against attack scenarios ( { attack_dir } / * . jsonl ) is planned
but not yet implemented ( score-attacks step is status = planned ) . Currently the
only fitness signal is the revm / Anvil metric from batch-eval . sh / fitness . sh .
When implemented , attack survival will penalise candidates whose floor breaks
under known attack patterns , biasing the population toward safer programs .
"" "
fee_fitness = "" "
Fee optimization against in-market pool data is planned as a second fitness
dimension . Not yet implemented ; tracked as a follow-up issue .
"" "
pool_cap = "" "
The evolved seed pool is capped at 100 entries by fitness rank . Hand-written
( origin = hand-written ) and LLM-generated ( origin = llm ) seeds are always pinned
regardless of fitness . Evolved entries below the pool floor are evicted when
new higher-scoring candidates are admitted . Raw fitness values are only
comparable within the same evaluation run ; entries with fitness_flags
( token_value_inflation , processExecIf_fix ) are ranked as fitness = 0 for
admission and eviction purposes .
"" "