From c42a1ca7687038a60343a10b1d7f37ae5eedf980 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 14 Mar 2026 01:08:13 +0000 Subject: [PATCH] fix: evo_run004_champion fitness inflated by token value (#670) (#704) - Add fitness_flags="token_value_inflation" to evo_run004_champion in manifest.jsonl so callers can detect the inflated value without discarding the entry entirely. - Add effective_fitness() helper in evolve.sh pool admission (step 5) that returns 0 for any entry with a token_value_inflation flag, preventing inflated scores from biasing the top-100 evolved pool ranking or eviction decisions. - Document in evolve.sh that raw fitness values are only comparable within the same evaluation run. Co-Authored-By: Claude Sonnet 4.6 --- tools/push3-evolution/evolve.sh | 14 ++++++++++++-- tools/push3-evolution/seeds/manifest.jsonl | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tools/push3-evolution/evolve.sh b/tools/push3-evolution/evolve.sh index 95d334e..89624ff 100755 --- a/tools/push3-evolution/evolve.sh +++ b/tools/push3-evolution/evolve.sh @@ -828,9 +828,19 @@ if not new_items: sys.exit(0) # ── 5. Separate pinned (hand-written) from evolved; top-100 cap on evolved only -pinned = [(int(e.get('fitness') or 0), e, None) for e in existing +# +# NOTE: raw fitness values are only comparable within the same evaluation run. +# Entries with fitness_flags='token_value_inflation' (or other flags) are ranked +# as fitness=0 so that inflated scores do not bias pool admission or eviction. +def effective_fitness(entry): + flags = entry.get('fitness_flags') or '' + if 'token_value_inflation' in flags: + return 0 + return int(entry.get('fitness') or 0) + +pinned = [(effective_fitness(e), e, None) for e in existing if e.get('origin') != 'evolved'] -evolved = [(int(e.get('fitness') or 0), e, None) for e in existing +evolved = [(effective_fitness(e), e, None) for e in existing if e.get('origin') == 'evolved'] for fitness, push3_path, entry in new_items: evolved.append((fitness, entry, push3_path)) diff --git a/tools/push3-evolution/seeds/manifest.jsonl b/tools/push3-evolution/seeds/manifest.jsonl index 6f0e0e0..097636c 100644 --- a/tools/push3-evolution/seeds/manifest.jsonl +++ b/tools/push3-evolution/seeds/manifest.jsonl @@ -1,4 +1,4 @@ {"file":"optimizer_v3.push3","fitness":8259844243839650390792,"origin":"hand-written","run":null,"generation":null,"date":"2026-03-10","note":"Original seed optimizer"} -{"file":"evo_run004_champion.push3","fitness":2307549972110081697617459,"origin":"evolved","run":"004","generation":3,"date":"2026-03-13","note":"First evolution champion. Fitness inflated by token value (#670). Always-bull strategy."} +{"file":"evo_run004_champion.push3","fitness":2307549972110081697617459,"fitness_flags":"token_value_inflation","origin":"evolved","run":"004","generation":3,"date":"2026-03-13","note":"First evolution champion. Fitness inflated by token value (#670). Always-bull strategy."} {"file":"llm_momentum.push3","fitness":null,"origin":"llm","run":null,"generation":null,"date":"2026-03-13","note":"Momentum Follower: smooth sentiment-tracking via direct percentageStaked multiplication. Safety net: floor always >=20%. AnchorWidth scales with tax volatility."} {"file":"llm_fee_maximizer.push3","fitness":null,"origin":"llm","run":null,"generation":null,"date":"2026-03-13","note":"LLM seed: fee maximizer. Wide anchor + high share in all regimes. Bullish/high-tax modes push DD and AS to max. (#673)"}