fix: evolution scoring — 3 bugs made all candidates report fitness=0 (#665)

## Three bugs in evolve.sh 1. **Heredoc stdin conflict** — `py_stats()` used `<<PYEOF` heredoc which stole stdin from the pipe, so python never received score values → stats always `min=0 max=0 mean=0` 2. **Bash integer overflow** — global best comparison used `[ $MAX -gt $GLOBAL_BEST_FITNESS ]` which overflows on uint256 wei values (>9.2e18) → best always tracked as 0 3. **candidate_id mismatch** — evolve.sh looked up `gen0_c000` but batch-eval produces `candidate_000` (derived from filename) → score lookup always returned default 0 All 3 previous evolution runs (150+ candidates) reported all zeros despite batch-eval correctly scoring them at ~8.26e21 wei. ## Fix - `py_stats`: heredoc → `python3 -c` inline - Global best: bash `[ -gt ]` → `python3` big number comparison - Score lookup: use `basename $CAND_FILE` instead of synthetic CID Co-authored-by: root <root@debian-g-2vcpu-8gb-ams3-01> Reviewed-on: https://codeberg.org/johba/harb/pulls/665 Reviewed-by: review_bot <review_bot@noreply.codeberg.org>
2026-03-13 10:02:24 +01:00 · 2026-03-13 10:02:24 +01:00 · 3f435f8459
commit 3f435f8459
parent 5127e96ab3
1 changed files with 5 additions and 11 deletions
--- a/tools/push3-evolution/evolve.sh
+++ b/tools/push3-evolution/evolve.sh
@ -156,15 +156,7 @@ run_seed_gen_cli() {

 # Integer min/max/mean via python3 (bash arithmetic overflows on wei values).
 py_stats() {
-  # Args: space-separated integers on stdin as a Python list literal
-  python3 - "$@" <<'PYEOF'
-import sys
-nums = [int(x) for x in sys.stdin.read().split()]
-if not nums:
-    print("0 0 0")
-    sys.exit(0)
-print(min(nums), max(nums), round(sum(nums) / len(nums)))
-PYEOF
+  python3 -c "import sys; nums = [int(x) for x in sys.stdin.read().split()]; print(min(nums) if nums else 0, max(nums) if nums else 0, round(sum(nums)/len(nums)) if nums else 0)"
 }

 # Top-N selection: return filepaths of the N highest-scoring candidates (descending).
@ -383,7 +375,9 @@ for gen in $(seq 0 $((GENERATIONS - 1))); do

    if [ "$EVAL_MODE" = "revm" ] && [ -f "$BATCH_SCORES_FILE" ]; then
      # Look up pre-computed score from batch-eval.sh output.
-      SCORE=$(python3 - "$CID" "$BATCH_SCORES_FILE" <<'PYEOF'
+      # batch-eval uses filename as candidate_id (e.g. "candidate_000")
+      BATCH_CID="$(basename "$CAND_FILE" .push3)"
+      SCORE=$(python3 - "$BATCH_CID" "$BATCH_SCORES_FILE" <<'PYEOF'
 import json, sys
 cid = sys.argv[1]
 with open(sys.argv[2]) as f:
@ -451,7 +445,7 @@ print(max(entries, key=lambda x: x[0])[1])
 PYEOF
 ) || fail "Could not determine best candidate from $SCORES_FILE"

-  if [ "$MAX" -gt "$GLOBAL_BEST_FITNESS" ] || [ "$GLOBAL_BEST_FITNESS" -eq -1 ]; then
+  if python3 -c "import sys; sys.exit(0 if int(sys.argv[1]) > int(sys.argv[2]) else 1)" "$MAX" "$GLOBAL_BEST_FITNESS" || [ "$GLOBAL_BEST_FITNESS" = "-1" ]; then
    GLOBAL_BEST_FITNESS="$MAX"
    GLOBAL_BEST_GEN="$gen"
    GLOBAL_BEST_CAND="$BEST_FILE_THIS_GEN"