fix: batch-eval.sh aborts entire generation on single candidate compile failure (#901)

- Add skip_candidate() helper that emits fitness=0 JSON to stdout and
  tracks the failed score for the output-dir file, satisfying the
  downstream scorer's expectation of one JSON line per candidate.
- Unify all failure paths (transpile, forge build, bytecode extract,
  empty bytecode) through skip_candidate() with a distinct error key.
- Log message now reads "WARNING: <id> compile failed — scoring as 0"
  as required by the acceptance criteria.
- Output-dir scores.jsonl now merges successful + failed scores so the
  file is complete even when some candidates fail to compile.
- All-candidates-fail path (COMPILED_COUNT=0) still exits 2 (no viable
  population); true infra errors (missing tool, bad RPC) unchanged.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-17 06:09:18 +00:00
parent f685f9a237
commit a23064f576

View file

@ -117,6 +117,17 @@ BYTECODES_FILE="$MANIFEST_DIR/bytecodes.txt"
COMPILED_COUNT=0 COMPILED_COUNT=0
FAILED_IDS="" FAILED_IDS=""
FAILED_SCORES=""
# Emit a fitness=0 JSON line for a candidate that failed to compile, and track it.
skip_candidate() {
local cid="$1" reason="$2"
log "WARNING: $cid compile failed — scoring as 0"
local line='{"candidate_id":"'"$cid"'","fitness":0,"error":"'"$reason"'"}'
printf '%s\n' "$line"
FAILED_SCORES="${FAILED_SCORES:+$FAILED_SCORES$'\n'}$line"
FAILED_IDS="$FAILED_IDS $cid"
}
for PUSH3_FILE in "${PUSH3_FILES[@]}"; do for PUSH3_FILE in "${PUSH3_FILES[@]}"; do
PUSH3_FILE="$(cd "$(dirname "$PUSH3_FILE")" && pwd)/$(basename "$PUSH3_FILE")" PUSH3_FILE="$(cd "$(dirname "$PUSH3_FILE")" && pwd)/$(basename "$PUSH3_FILE")"
@ -125,8 +136,7 @@ for PUSH3_FILE in "${PUSH3_FILES[@]}"; do
# Transpile Push3 → Solidity, extract function body, inject into OptimizerV3.sol # Transpile Push3 → Solidity, extract function body, inject into OptimizerV3.sol
INJECT_SCRIPT="$REPO_ROOT/tools/push3-transpiler/inject.sh" INJECT_SCRIPT="$REPO_ROOT/tools/push3-transpiler/inject.sh"
if ! bash "$INJECT_SCRIPT" "$PUSH3_FILE" "$OPTIMIZERV3_SOL" >/dev/null 2>&1; then if ! bash "$INJECT_SCRIPT" "$PUSH3_FILE" "$OPTIMIZERV3_SOL" >/dev/null 2>&1; then
log "WARNING: transpile/inject failed for $CANDIDATE_ID — skipping" skip_candidate "$CANDIDATE_ID" "transpile_failed"
FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
continue continue
fi fi
@ -135,8 +145,7 @@ for PUSH3_FILE in "${PUSH3_FILES[@]}"; do
(cd "$ONCHAIN_DIR" && forge build --silent) >/dev/null 2>&1 || FORGE_EC=$? (cd "$ONCHAIN_DIR" && forge build --silent) >/dev/null 2>&1 || FORGE_EC=$?
if [ "$FORGE_EC" -ne 0 ]; then if [ "$FORGE_EC" -ne 0 ]; then
log "WARNING: forge build failed for $CANDIDATE_ID (exit $FORGE_EC) — skipping" skip_candidate "$CANDIDATE_ID" "compile_failed"
FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
continue continue
fi fi
@ -151,11 +160,10 @@ if not bytecode.startswith("0x"):
bytecode = "0x" + bytecode bytecode = "0x" + bytecode
print(bytecode) print(bytecode)
PYEOF PYEOF
)" || { log "WARNING: failed to extract bytecode for $CANDIDATE_ID — skipping"; FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"; continue; } )" || { skip_candidate "$CANDIDATE_ID" "bytecode_extract_failed"; continue; }
if [ -z "$BYTECODE_HEX" ] || [ "$BYTECODE_HEX" = "0x" ]; then if [ -z "$BYTECODE_HEX" ] || [ "$BYTECODE_HEX" = "0x" ]; then
log "WARNING: empty bytecode for $CANDIDATE_ID — skipping" skip_candidate "$CANDIDATE_ID" "empty_bytecode"
FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
continue continue
fi fi
@ -213,17 +221,18 @@ if [ -z "$SCORES_JSONL" ]; then
fail2 "No score lines found in forge test output" fail2 "No score lines found in forge test output"
fi fi
# Emit scores to stdout # Emit scores to stdout (failed candidates already emitted individually above)
printf '%s\n' "$SCORES_JSONL" printf '%s\n' "$SCORES_JSONL"
# Optionally write to output directory # Optionally write to output directory (merge successful + failed scores)
if [ -n "$OUTPUT_DIR" ]; then if [ -n "$OUTPUT_DIR" ]; then
mkdir -p "$OUTPUT_DIR" mkdir -p "$OUTPUT_DIR"
printf '%s\n' "$SCORES_JSONL" > "$OUTPUT_DIR/scores.jsonl" ALL_SCORES="${SCORES_JSONL}${FAILED_SCORES:+$'\n'$FAILED_SCORES}"
printf '%s\n' "$ALL_SCORES" > "$OUTPUT_DIR/scores.jsonl"
log "Scores written to $OUTPUT_DIR/scores.jsonl" log "Scores written to $OUTPUT_DIR/scores.jsonl"
fi fi
# Warn about any candidates that were skipped (compile failures) # Warn summary if any candidates were skipped (compile failures)
if [ -n "$FAILED_IDS" ]; then if [ -n "$FAILED_IDS" ]; then
log "WARNING: the following candidates were skipped (compile failed): $FAILED_IDS" log "WARNING: the following candidates were skipped (compile failed): $FAILED_IDS"
exit 1 exit 1