fix: feat: revm-based fitness evaluator for evolution at scale (#604)

Replace per-candidate Anvil+forge-script pipeline with in-process EVM execution using Foundry's native revm backend, achieving 10-100× speedup for evolutionary search at scale. New files: - onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once, deploys the full KRAIKEN stack, then for each candidate uses vm.etch to inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead), and emits one {"candidate_id","fitness"} JSON line per candidate. Skips gracefully when BASE_RPC_URL is unset (CI-safe). - tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that transpiles+compiles each candidate sequentially, writes a two-file manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol in a single forge test run and parses the score JSON from stdout. Modified: - tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm). When EVAL_MODE=revm, batch-scores every candidate in a generation with one batch-eval.sh call instead of N sequential fitness.sh processes; scores are looked up from the JSONL output in the per-candidate loop. Default remains EVAL_MODE=anvil for backward compatibility. Key design decisions: - Per-candidate Solidity compilation is unavoidable (each Push3 candidate produces different Solidity); the speedup is in the evaluation phase. - vm.snapshot/revertTo in forge test are O(1) memory operations (true revm), not RPC calls — this is the core speedup vs Anvil. - recenterAccess is set in bootstrap so TWAP stability checks are bypassed during attack sequences (mirrors the existing fitness.sh bootstrap). - Test skips cleanly when BASE_RPC_URL is absent, keeping CI green. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 11:54:41 +00:00 · 2026-03-12 11:54:41 +00:00 · 26b8876691
commit 26b8876691
parent 4258045c8c
3 changed files with 870 additions and 6 deletions
--- a/tools/push3-evolution/evolve.sh
+++ b/tools/push3-evolution/evolve.sh
@ -44,8 +44,15 @@ export PATH="${HOME}/.foundry/bin:${PATH}"

 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 FITNESS_SH="$SCRIPT_DIR/fitness.sh"
+BATCH_EVAL_SH="$SCRIPT_DIR/revm-evaluator/batch-eval.sh"
 MUTATE_CLI="$SCRIPT_DIR/mutate-cli.ts"

+# EVAL_MODE controls which fitness backend is used:
+#   anvil  (default) — per-candidate Anvil+forge-script pipeline (fitness.sh)
+#   revm            — in-process revm via FitnessEvaluator.t.sol (batch-eval.sh)
+#                     Requires BASE_RPC_URL env var.  10-100× faster at scale.
+EVAL_MODE="${EVAL_MODE:-anvil}"
+
 # =============================================================================
 # Argument parsing
 # =============================================================================
@ -171,6 +178,15 @@ done
 [ -f "$MUTATE_CLI" ] || fail "mutate-cli.ts not found at $MUTATE_CLI"
 [ -x "$FITNESS_SH" ] || chmod +x "$FITNESS_SH"

+if [ "$EVAL_MODE" = "revm" ]; then
+  [ -f "$BATCH_EVAL_SH" ] || fail "batch-eval.sh not found at $BATCH_EVAL_SH"
+  [ -x "$BATCH_EVAL_SH" ] || chmod +x "$BATCH_EVAL_SH"
+  [ -n "${BASE_RPC_URL:-}" ] || fail "EVAL_MODE=revm requires BASE_RPC_URL env var (Base network RPC)"
+  command -v forge &>/dev/null || fail "forge not found in PATH (required for EVAL_MODE=revm)"
+elif [ "$EVAL_MODE" != "anvil" ]; then
+  fail "Unknown EVAL_MODE '$EVAL_MODE' — must be 'anvil' or 'revm'"
+fi
+
 TSX_CMD="$(find_tsx_cmd)" || fail \
  "No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH."

@ -194,6 +210,7 @@ log "  Generations:   $GENERATIONS"
 log "  Mutation rate: $MUTATION_RATE"
 log "  Output:        $OUTPUT_DIR"
 log "  TSX:           $TSX_CMD"
+log "  Eval mode:     $EVAL_MODE"
 log "========================================================"

 # =============================================================================
@ -241,6 +258,29 @@ for gen in $(seq 0 $((GENERATIONS - 1))); do
  SCORE_VALUES=""
  CAND_COUNT=0

+  # In revm mode, batch-score all candidates in one forge test invocation before
+  # the per-candidate loop.  Scores are written to a temp JSONL file that the
+  # loop reads with a fast Python lookup.
+  BATCH_SCORES_FILE="$WORK_DIR/batch_scores_gen_${gen}.jsonl"
+
+  if [ "$EVAL_MODE" = "revm" ]; then
+    declare -a _BATCH_FILES=()
+    for _CF in "$CURRENT_GEN_DIR"/candidate_*.push3; do
+      [ -f "$_CF" ] && _BATCH_FILES+=("$_CF")
+    done
+
+    if [ "${#_BATCH_FILES[@]}" -gt 0 ]; then
+      BATCH_EC=0
+      bash "$BATCH_EVAL_SH" "${_BATCH_FILES[@]}" > "$BATCH_SCORES_FILE" 2>/dev/null \
+        || BATCH_EC=$?
+
+      if [ "$BATCH_EC" -eq 2 ]; then
+        fail "batch-eval.sh reported an infrastructure error (exit 2) — aborting evolution"
+      fi
+      log "  revm batch scoring complete (exit $BATCH_EC)"
+    fi
+  fi
+
  for CAND_FILE in "$CURRENT_GEN_DIR"/candidate_*.push3; do
    [ -f "$CAND_FILE" ] || continue

@ -255,16 +295,37 @@ for gen in $(seq 0 $((GENERATIONS - 1))); do

    SCORE=0
    FITNESS_EC=0
-    SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$?

-    # Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
-    if [ "$FITNESS_EC" -eq 2 ]; then
-      fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
+    if [ "$EVAL_MODE" = "revm" ] && [ -f "$BATCH_SCORES_FILE" ]; then
+      # Look up pre-computed score from batch-eval.sh output.
+      SCORE=$(python3 - "$CID" "$BATCH_SCORES_FILE" <<'PYEOF'
+import json, sys
+cid = sys.argv[1]
+with open(sys.argv[2]) as f:
+    for line in f:
+        try:
+            d = json.loads(line)
+            if d.get("candidate_id") == cid:
+                print(d["fitness"])
+                sys.exit(0)
+        except (json.JSONDecodeError, KeyError):
+            pass
+print(0)
+PYEOF
+) || SCORE=0
+    else
+      # Anvil mode (or revm fallback): score candidate individually.
+      SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$?
+
+      # Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
+      if [ "$FITNESS_EC" -eq 2 ]; then
+        fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
+      fi
    fi

    # Validate that score is a non-negative integer; treat any other output as invalid.
-    if [ "$FITNESS_EC" -ne 0 ] || ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then
-      log "  $CID: invalid candidate (fitness.sh exit $FITNESS_EC), score=0"
+    if ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then
+      log "  $CID: invalid/missing score, using 0"
      SCORE=0
    else
      log "  $CID: fitness=$SCORE"
--- a/tools/push3-evolution/revm-evaluator/batch-eval.sh
+++ b/tools/push3-evolution/revm-evaluator/batch-eval.sh
@ -0,0 +1,226 @@
+#!/usr/bin/env bash
+# =============================================================================
+# batch-eval.sh — revm-based batch fitness evaluator
+#
+# Replaces the per-candidate Anvil+forge-script pipeline with in-process EVM
+# execution via Foundry's native revm backend (FitnessEvaluator.t.sol).
+#
+# Speedup: compiles each candidate once (unavoidable — different Solidity per
+# candidate), then runs ALL attack sequences in a single in-process forge test
+# with O(1) memory snapshot/revert instead of RPC calls per attack.
+#
+# Usage:
+#   ./tools/push3-evolution/revm-evaluator/batch-eval.sh \
+#       [--output-dir /tmp/scores] \
+#       candidate0.push3 candidate1.push3 ...
+#
+# Output (stdout):
+#   One JSON object per candidate:
+#     {"candidate_id":"gen0_c000","fitness":123456789}
+#
+# Exit codes:
+#   0  Success.
+#   1  Candidate-level error (transpile/compile failed for at least one candidate).
+#   2  Infrastructure error (missing tool, BASE_RPC_URL not set, forge test failed).
+#
+# Environment:
+#   BASE_RPC_URL   Required. Base network RPC endpoint for forking.
+#   ATTACKS_DIR    Optional. Path to *.jsonl attack files.
+#                  (default: <repo>/onchain/script/backtesting/attacks)
+#   OUTPUT_DIR     Optional. Directory to copy scores.jsonl into (--output-dir overrides).
+# =============================================================================
+
+set -euo pipefail
+
+export PATH="${HOME}/.foundry/bin:${PATH}"
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
+ONCHAIN_DIR="$REPO_ROOT/onchain"
+TRANSPILER_DIR="$REPO_ROOT/tools/push3-transpiler"
+TRANSPILER_OUT="$ONCHAIN_DIR/src/OptimizerV3Push3.sol"
+ARTIFACT_PATH="$ONCHAIN_DIR/out/OptimizerV3Push3.sol/OptimizerV3Push3.json"
+DEFAULT_ATTACKS_DIR="$ONCHAIN_DIR/script/backtesting/attacks"
+
+# =============================================================================
+# Argument parsing
+# =============================================================================
+
+OUTPUT_DIR="${OUTPUT_DIR:-}"
+
+declare -a PUSH3_FILES=()
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --output-dir) OUTPUT_DIR="$2"; shift 2 ;;
+        --*) echo "Unknown option: $1" >&2; exit 2 ;;
+        *) PUSH3_FILES+=("$1"); shift ;;
+    esac
+done
+
+if [ "${#PUSH3_FILES[@]}" -eq 0 ]; then
+    echo "Usage: $0 [--output-dir DIR] candidate1.push3 ..." >&2
+    exit 2
+fi
+
+# =============================================================================
+# Environment checks
+# =============================================================================
+
+BASE_RPC_URL="${BASE_RPC_URL:-}"
+if [ -z "$BASE_RPC_URL" ]; then
+    echo "  [batch-eval] ERROR: BASE_RPC_URL env var required for Base network fork" >&2
+    exit 2
+fi
+
+for _tool in forge node python3; do
+    command -v "$_tool" &>/dev/null || { echo "  [batch-eval] ERROR: $_tool not found in PATH" >&2; exit 2; }
+done
+
+# =============================================================================
+# Helpers
+# =============================================================================
+
+log() { echo "  [batch-eval]  $*" >&2; }
+fail2() { echo "  [batch-eval]  ERROR: $*" >&2; exit 2; }
+
+# =============================================================================
+# Step 1 — Ensure transpiler dependencies are installed
+# =============================================================================
+
+if [ ! -d "$TRANSPILER_DIR/node_modules" ]; then
+    log "Installing transpiler dependencies…"
+    (cd "$TRANSPILER_DIR" && npm install --silent) || fail2 "npm install in push3-transpiler failed"
+fi
+
+# =============================================================================
+# Step 2 — Transpile + compile each candidate, extract bytecodes into manifest
+# =============================================================================
+
+MANIFEST_DIR="$(mktemp -d)"
+IDS_FILE="$MANIFEST_DIR/ids.txt"
+BYTECODES_FILE="$MANIFEST_DIR/bytecodes.txt"
+
+: > "$IDS_FILE"
+: > "$BYTECODES_FILE"
+
+COMPILED_COUNT=0
+FAILED_IDS=""
+
+for PUSH3_FILE in "${PUSH3_FILES[@]}"; do
+    PUSH3_FILE="$(cd "$(dirname "$PUSH3_FILE")" && pwd)/$(basename "$PUSH3_FILE")"
+    CANDIDATE_ID="$(basename "$PUSH3_FILE" .push3)"
+
+    # Transpile Push3 → OptimizerV3Push3.sol
+    TRANSPILE_EC=0
+    (
+        cd "$TRANSPILER_DIR"
+        npx ts-node src/index.ts "$PUSH3_FILE" "$TRANSPILER_OUT"
+    ) >/dev/null 2>&1 || TRANSPILE_EC=$?
+
+    if [ "$TRANSPILE_EC" -ne 0 ]; then
+        log "WARNING: transpile failed for $CANDIDATE_ID (exit $TRANSPILE_EC) — skipping"
+        FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
+        continue
+    fi
+
+    # Compile (forge's incremental build skips unchanged files quickly)
+    FORGE_EC=0
+    (cd "$ONCHAIN_DIR" && forge build --silent) >/dev/null 2>&1 || FORGE_EC=$?
+
+    if [ "$FORGE_EC" -ne 0 ]; then
+        log "WARNING: forge build failed for $CANDIDATE_ID (exit $FORGE_EC) — skipping"
+        FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
+        continue
+    fi
+
+    # Extract bytecode from artifact (strip leading 0x if present)
+    BYTECODE_HEX="$(python3 - "$ARTIFACT_PATH" <<'PYEOF'
+import json, sys
+with open(sys.argv[1]) as f:
+    d = json.load(f)
+bytecode = d["bytecode"]["object"]
+# Ensure 0x prefix
+if not bytecode.startswith("0x"):
+    bytecode = "0x" + bytecode
+print(bytecode)
+PYEOF
+)" || { log "WARNING: failed to extract bytecode for $CANDIDATE_ID — skipping"; FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"; continue; }
+
+    if [ -z "$BYTECODE_HEX" ] || [ "$BYTECODE_HEX" = "0x" ]; then
+        log "WARNING: empty bytecode for $CANDIDATE_ID — skipping"
+        FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
+        continue
+    fi
+
+    printf '%s\n' "$CANDIDATE_ID" >> "$IDS_FILE"
+    printf '%s\n' "$BYTECODE_HEX" >> "$BYTECODES_FILE"
+    COMPILED_COUNT=$((COMPILED_COUNT + 1))
+    log "Compiled $CANDIDATE_ID"
+done
+
+if [ "$COMPILED_COUNT" -eq 0 ]; then
+    fail2 "No candidates compiled successfully — aborting"
+fi
+
+log "Compiled $COMPILED_COUNT / ${#PUSH3_FILES[@]} candidates"
+
+# =============================================================================
+# Step 3 — Run FitnessEvaluator.t.sol (in-process revm, all candidates at once)
+# =============================================================================
+
+ATTACKS_DIR="${ATTACKS_DIR:-$DEFAULT_ATTACKS_DIR}"
+
+log "Running FitnessEvaluator.t.sol (in-process revm, fork: $BASE_RPC_URL)…"
+
+FORGE_TEST_EC=0
+FORGE_OUTPUT="$(
+    cd "$ONCHAIN_DIR"
+    BASE_RPC_URL="$BASE_RPC_URL" \
+    FITNESS_MANIFEST_DIR="$MANIFEST_DIR" \
+    ATTACKS_DIR="$ATTACKS_DIR" \
+    forge test \
+        --match-contract FitnessEvaluator \
+        --match-test testBatchEvaluate \
+        -vv \
+        --no-match-path "NOT_A_REAL_PATH" \
+        2>&1
+)" || FORGE_TEST_EC=$?
+
+if [ "$FORGE_TEST_EC" -ne 0 ]; then
+    # Surface forge output on failure for diagnosis
+    printf '%s\n' "$FORGE_OUTPUT" >&2
+    fail2 "forge test failed (exit $FORGE_TEST_EC)"
+fi
+
+# =============================================================================
+# Step 4 — Extract and emit score JSON lines
+#
+# forge test -vv wraps console.log output with leading spaces and a "Logs:" header.
+# We grep for lines containing the score JSON pattern and strip the indentation.
+# =============================================================================
+
+SCORES_JSONL="$(printf '%s\n' "$FORGE_OUTPUT" | grep -E '"candidate_id"' | sed 's/^[[:space:]]*//' || true)"
+
+if [ -z "$SCORES_JSONL" ]; then
+    printf '%s\n' "$FORGE_OUTPUT" >&2
+    fail2 "No score lines found in forge test output"
+fi
+
+# Emit scores to stdout
+printf '%s\n' "$SCORES_JSONL"
+
+# Optionally write to output directory
+if [ -n "$OUTPUT_DIR" ]; then
+    mkdir -p "$OUTPUT_DIR"
+    printf '%s\n' "$SCORES_JSONL" > "$OUTPUT_DIR/scores.jsonl"
+    log "Scores written to $OUTPUT_DIR/scores.jsonl"
+fi
+
+# Warn about any candidates that were skipped (compile failures)
+if [ -n "$FAILED_IDS" ]; then
+    log "WARNING: the following candidates were skipped (compile failed): $FAILED_IDS"
+    exit 1
+fi
+
+log "Done — scored $COMPILED_COUNT candidates"