From c508efa31fc09e16af5a8202e313c6f162f4be59 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 15 Mar 2026 03:29:47 +0000 Subject: [PATCH] fix: address review findings for evaluate-seeds.sh (#724) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace unquoted heredoc (shell-injection path) with a temp file: the shell loop now appends tab-separated filename/score lines to a temp file, which is passed as a plain path argument to the Python manifest- rewrite block. Python reads only file contents, never executes shell- expanded strings. - Add early abort on fitness.sh exit code 2 (infra error: Anvil down, missing tool). Iterating past an infra failure produces no useful results; aborting immediately surfaces the real problem. - Remove unused `os` import from the manifest-rewrite Python block. - Fix inaccurate comment in evolve.sh --diverse-seeds sampling: the pool sampler does a flat random shuffle with no fitness weighting; null- fitness seeds are not "treated as 0" — they are sampled with equal probability to any other seed. Co-Authored-By: Claude Sonnet 4.6 --- tools/push3-evolution/evaluate-seeds.sh | 45 ++++++++++++++++--------- tools/push3-evolution/evolve.sh | 7 ++-- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/tools/push3-evolution/evaluate-seeds.sh b/tools/push3-evolution/evaluate-seeds.sh index feeeb2a..ece31dc 100755 --- a/tools/push3-evolution/evaluate-seeds.sh +++ b/tools/push3-evolution/evaluate-seeds.sh @@ -100,8 +100,11 @@ fi FAILURES=0 -# scores: associative array file -> score (bash 4+) -declare -A SCORES +# Scores are accumulated in a temp file as tab-separated "filename\tscore" +# lines. Using a file (rather than a shell associative array embedded in a +# heredoc) avoids injecting values into Python source code. +SCORES_FILE="$(mktemp)" +trap 'rm -f "$SCORES_FILE"' EXIT while IFS= read -r FNAME; do [ -z "$FNAME" ] && continue @@ -117,6 +120,13 @@ while IFS= read -r FNAME; do FITNESS_EC=0 SCORE=$("$FITNESS_SH" "$SEED_FILE") || FITNESS_EC=$? + if [ "$FITNESS_EC" -eq 2 ]; then + # Exit code 2 = infra error (Anvil down, missing tool, etc.). + # All subsequent evaluations will fail for the same reason; abort early. + log "ERROR: fitness.sh reported infra failure (exit 2) for $FNAME — aborting" + exit 2 + fi + if [ "$FITNESS_EC" -ne 0 ] || [ -z "$SCORE" ]; then log "WARNING: fitness.sh failed for $FNAME (exit $FITNESS_EC) — skipping" FAILURES=$((FAILURES + 1)) @@ -124,10 +134,10 @@ while IFS= read -r FNAME; do fi log " $FNAME → fitness=$SCORE" - SCORES["$FNAME"]="$SCORE" + printf '%s\t%s\n' "$FNAME" "$SCORE" >> "$SCORES_FILE" done <<< "$NULL_ENTRIES" -if [ "${#SCORES[@]}" -eq 0 ]; then +if [ ! -s "$SCORES_FILE" ]; then log "No seeds were successfully evaluated." exit 1 fi @@ -137,22 +147,25 @@ fi # ============================================================================= MANIFEST_TMP="$(mktemp "${MANIFEST}.XXXXXX")" -trap 'rm -f "$MANIFEST_TMP"' EXIT +# Update trap to clean up both temp files. +trap 'rm -f "$SCORES_FILE" "$MANIFEST_TMP"' EXIT -python3 - "$MANIFEST" "$MANIFEST_TMP" <