fix: address review findings in evolve.sh (#546)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-11 21:29:14 +00:00
parent 547e8beae8
commit 2ee7feb621

View file

@ -33,6 +33,8 @@
#
# Environment:
# ANVIL_FORK_URL Passed through to fitness.sh when Anvil is not running.
#
# TSX resolution order: tsx in PATH → node_modules/.bin/tsx → npx tsx.
# =============================================================================
set -euo pipefail
@ -130,8 +132,8 @@ print(min(nums), max(nums), round(sum(nums) / len(nums)))
PYEOF
}
# Tournament selection: given a scores file (one "idx score" per line),
# run T tournaments of size 2 and return winner indices (one per line).
# Tournament selection: given a scores file (one "idx score filepath" per line),
# run k tournaments of size 2 and return winner filepaths (one per line).
py_tournament() {
local k="$1"
local scores_file="$2"
@ -141,15 +143,16 @@ k = int(sys.argv[1])
entries = []
with open(sys.argv[2]) as f:
for line in f:
idx, score = line.split()
entries.append((int(idx), int(score)))
parts = line.split()
if len(parts) >= 3:
entries.append((int(parts[0]), int(parts[1]), parts[2]))
if not entries:
sys.exit(1)
for _ in range(k):
a = random.choice(entries)
b = random.choice(entries)
winner = a if a[1] >= b[1] else b
print(winner[0])
print(winner[2])
PYEOF
}
@ -204,10 +207,10 @@ mkdir -p "$GEN_DIR"
for i in $(seq 0 $((POPULATION - 1))); do
CAND_FILE="$GEN_DIR/candidate_$(printf '%03d' $i).push3"
MUTATED=""
MUTATED=$(run_mutate_cli mutate "$SEED" "$MUTATION_RATE") \
|| fail "Failed to mutate seed for initial candidate $i"
printf '%s\n' "$MUTATED" > "$CAND_FILE"
printf '%d\n' "$MUTATION_RATE" > "${CAND_FILE%.push3}.ops"
done
log "Initialized ${POPULATION} candidates in gen_0"
@ -242,23 +245,34 @@ for gen in $(seq 0 $((GENERATIONS - 1))); do
CAND_IDX="${CAND_IDX%.push3}"
CID="gen${gen}_c${CAND_IDX}"
# Read mutations_applied from sidecar; default 0 if missing.
OPS_FILE="${CAND_FILE%.push3}.ops"
MUTATIONS_APPLIED=0
[ -f "$OPS_FILE" ] && MUTATIONS_APPLIED=$(cat "$OPS_FILE")
SCORE=0
FITNESS_EC=0
SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$?
if [ "$FITNESS_EC" -ne 0 ]; then
# Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
if [ "$FITNESS_EC" -eq 2 ]; then
fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
fi
# Validate that score is a non-negative integer; treat any other output as invalid.
if [ "$FITNESS_EC" -ne 0 ] || ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then
log " $CID: invalid candidate (fitness.sh exit $FITNESS_EC), score=0"
SCORE=0
else
log " $CID: fitness=$SCORE"
fi
# Append to JSONL
# Append to JSONL — use the actual operations recorded for this candidate.
printf '{"candidate_id":"%s","fitness":%s,"mutations_applied":%d}\n' \
"$CID" "$SCORE" "$MUTATION_RATE" >> "$JSONL_FILE"
"$CID" "$SCORE" "$MUTATIONS_APPLIED" >> "$JSONL_FILE"
# Record index→score for selection
echo "$CAND_COUNT $SCORE" >> "$SCORES_FILE"
# Record index, score, and filepath for selection (filepath avoids index→file mapping issues).
printf '%d %s %s\n' "$CAND_COUNT" "$SCORE" "$CAND_FILE" >> "$SCORES_FILE"
SCORE_VALUES="$SCORE_VALUES $SCORE"
CAND_COUNT=$((CAND_COUNT + 1))
@ -273,20 +287,20 @@ for gen in $(seq 0 $((GENERATIONS - 1))); do
read -r MIN MAX MEAN < <(printf '%s' "$SCORE_VALUES" | py_stats)
log " Stats: min=$MIN max=$MAX mean=$MEAN candidates=$CAND_COUNT"
# Find best candidate index for this generation
BEST_IDX_THIS_GEN=$(python3 -c "
# Find best candidate for this generation (filepath returned directly).
BEST_FILE_THIS_GEN=$(python3 - "$SCORES_FILE" <<'PYEOF'
import sys
entries = []
with open('$SCORES_FILE') as f:
with open(sys.argv[1]) as f:
for line in f:
idx, score = line.split()
entries.append((int(idx), int(score)))
best = max(entries, key=lambda x: x[1])
print(best[0])
")
BEST_FILE_THIS_GEN=$(ls "$CURRENT_GEN_DIR"/candidate_*.push3 \
| sort | awk "NR==$((BEST_IDX_THIS_GEN + 1))")
parts = line.split()
if len(parts) >= 3:
entries.append((int(parts[1]), parts[2]))
if not entries:
sys.exit(1)
print(max(entries, key=lambda x: x[0])[1])
PYEOF
) || fail "Could not determine best candidate from $SCORES_FILE"
if [ "$MAX" -gt "$GLOBAL_BEST_FITNESS" ] || [ "$GLOBAL_BEST_FITNESS" -eq -1 ]; then
GLOBAL_BEST_FITNESS="$MAX"
@ -304,9 +318,7 @@ print(best[0])
[ "$K" -lt 1 ] && K=1
SURVIVOR_FILES=()
while IFS= read -r WIN_IDX; do
WIN_FILE=$(ls "$CURRENT_GEN_DIR"/candidate_*.push3 \
| sort | awk "NR==$((WIN_IDX + 1))")
while IFS= read -r WIN_FILE; do
SURVIVOR_FILES+=("$WIN_FILE")
done < <(py_tournament "$K" "$SCORES_FILE")
@ -321,34 +333,37 @@ print(best[0])
HALF=$((POPULATION / 2))
# First half: mutate random survivors
for _ in $(seq 1 $HALF); do
for _i in $(seq 1 $HALF); do
SUR="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
MUTATED=""
if MUTATED=$(run_mutate_cli mutate "$SUR" "$MUTATION_RATE" 2>/dev/null); then
printf '%s\n' "$MUTATED" > "$DEST"
printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops"
else
# Fallback: copy the survivor as-is to keep population size stable
cp "$SUR" "$DEST"
printf '0\n' > "${DEST%.push3}.ops"
fi
NEXT_IDX=$((NEXT_IDX + 1))
done
# Second half: crossover random survivor pairs
REMAINING=$((POPULATION - HALF))
for _ in $(seq 1 $REMAINING); do
for _i in $(seq 1 $REMAINING); do
SUR_A="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
SUR_B="${SURVIVOR_FILES[$((RANDOM % ${#SURVIVOR_FILES[@]}))]}"
DEST="$NEXT_GEN_DIR/candidate_$(printf '%03d' $NEXT_IDX).push3"
CROSSED=""
if CROSSED=$(run_mutate_cli crossover "$SUR_A" "$SUR_B" 2>/dev/null); then
printf '%s\n' "$CROSSED" > "$DEST"
printf '0\n' > "${DEST%.push3}.ops"
else
# Fallback: mutate one survivor
if MUTATED=$(run_mutate_cli mutate "$SUR_A" "$MUTATION_RATE" 2>/dev/null); then
printf '%s\n' "$MUTATED" > "$DEST"
printf '%d\n' "$MUTATION_RATE" > "${DEST%.push3}.ops"
else
cp "$SUR_A" "$DEST"
printf '0\n' > "${DEST%.push3}.ops"
fi
fi
NEXT_IDX=$((NEXT_IDX + 1))