From a23064f576f47e02bdd3641f315d2da766c6bc53 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 17 Mar 2026 06:09:18 +0000 Subject: [PATCH] fix: batch-eval.sh aborts entire generation on single candidate compile failure (#901) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add skip_candidate() helper that emits fitness=0 JSON to stdout and tracks the failed score for the output-dir file, satisfying the downstream scorer's expectation of one JSON line per candidate. - Unify all failure paths (transpile, forge build, bytecode extract, empty bytecode) through skip_candidate() with a distinct error key. - Log message now reads "WARNING: compile failed — scoring as 0" as required by the acceptance criteria. - Output-dir scores.jsonl now merges successful + failed scores so the file is complete even when some candidates fail to compile. - All-candidates-fail path (COMPILED_COUNT=0) still exits 2 (no viable population); true infra errors (missing tool, bad RPC) unchanged. Co-Authored-By: Claude Sonnet 4.6 --- .../revm-evaluator/batch-eval.sh | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/tools/push3-evolution/revm-evaluator/batch-eval.sh b/tools/push3-evolution/revm-evaluator/batch-eval.sh index 14e3c48..6fa0a94 100755 --- a/tools/push3-evolution/revm-evaluator/batch-eval.sh +++ b/tools/push3-evolution/revm-evaluator/batch-eval.sh @@ -117,6 +117,17 @@ BYTECODES_FILE="$MANIFEST_DIR/bytecodes.txt" COMPILED_COUNT=0 FAILED_IDS="" +FAILED_SCORES="" + +# Emit a fitness=0 JSON line for a candidate that failed to compile, and track it. +skip_candidate() { + local cid="$1" reason="$2" + log "WARNING: $cid compile failed — scoring as 0" + local line='{"candidate_id":"'"$cid"'","fitness":0,"error":"'"$reason"'"}' + printf '%s\n' "$line" + FAILED_SCORES="${FAILED_SCORES:+$FAILED_SCORES$'\n'}$line" + FAILED_IDS="$FAILED_IDS $cid" +} for PUSH3_FILE in "${PUSH3_FILES[@]}"; do PUSH3_FILE="$(cd "$(dirname "$PUSH3_FILE")" && pwd)/$(basename "$PUSH3_FILE")" @@ -125,8 +136,7 @@ for PUSH3_FILE in "${PUSH3_FILES[@]}"; do # Transpile Push3 → Solidity, extract function body, inject into OptimizerV3.sol INJECT_SCRIPT="$REPO_ROOT/tools/push3-transpiler/inject.sh" if ! bash "$INJECT_SCRIPT" "$PUSH3_FILE" "$OPTIMIZERV3_SOL" >/dev/null 2>&1; then - log "WARNING: transpile/inject failed for $CANDIDATE_ID — skipping" - FAILED_IDS="$FAILED_IDS $CANDIDATE_ID" + skip_candidate "$CANDIDATE_ID" "transpile_failed" continue fi @@ -135,8 +145,7 @@ for PUSH3_FILE in "${PUSH3_FILES[@]}"; do (cd "$ONCHAIN_DIR" && forge build --silent) >/dev/null 2>&1 || FORGE_EC=$? if [ "$FORGE_EC" -ne 0 ]; then - log "WARNING: forge build failed for $CANDIDATE_ID (exit $FORGE_EC) — skipping" - FAILED_IDS="$FAILED_IDS $CANDIDATE_ID" + skip_candidate "$CANDIDATE_ID" "compile_failed" continue fi @@ -151,11 +160,10 @@ if not bytecode.startswith("0x"): bytecode = "0x" + bytecode print(bytecode) PYEOF -)" || { log "WARNING: failed to extract bytecode for $CANDIDATE_ID — skipping"; FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"; continue; } +)" || { skip_candidate "$CANDIDATE_ID" "bytecode_extract_failed"; continue; } if [ -z "$BYTECODE_HEX" ] || [ "$BYTECODE_HEX" = "0x" ]; then - log "WARNING: empty bytecode for $CANDIDATE_ID — skipping" - FAILED_IDS="$FAILED_IDS $CANDIDATE_ID" + skip_candidate "$CANDIDATE_ID" "empty_bytecode" continue fi @@ -213,17 +221,18 @@ if [ -z "$SCORES_JSONL" ]; then fail2 "No score lines found in forge test output" fi -# Emit scores to stdout +# Emit scores to stdout (failed candidates already emitted individually above) printf '%s\n' "$SCORES_JSONL" -# Optionally write to output directory +# Optionally write to output directory (merge successful + failed scores) if [ -n "$OUTPUT_DIR" ]; then mkdir -p "$OUTPUT_DIR" - printf '%s\n' "$SCORES_JSONL" > "$OUTPUT_DIR/scores.jsonl" + ALL_SCORES="${SCORES_JSONL}${FAILED_SCORES:+$'\n'$FAILED_SCORES}" + printf '%s\n' "$ALL_SCORES" > "$OUTPUT_DIR/scores.jsonl" log "Scores written to $OUTPUT_DIR/scores.jsonl" fi -# Warn about any candidates that were skipped (compile failures) +# Warn summary if any candidates were skipped (compile failures) if [ -n "$FAILED_IDS" ]; then log "WARNING: the following candidates were skipped (compile failed): $FAILED_IDS" exit 1