fix: Push3 evolution: fitness scoring wrapper (transpile → deploy → attack → score) (#545)

Address review findings:
- Bug: add BASELINE_SNAP before bootstrap; cleanup reverts it on shared Anvil
  to undo setRecenterAccess/WETH-funding/recenter mutations (was dead code before)
- Bug: require ANVIL_FORK_URL when cold-starting Anvil — DeployLocal.sol needs
  live Base contracts (Uniswap V3 Factory, WETH) that don't exist on a plain fork
- Warning: flag DIRTY and emit warning when anvil_revert fails instead of || true
- Warning: tee deploy-optimizer.sh output to both log file and stderr so progress
  is visible and preserved for post-failure diagnosis
- Nit: replace 50×evm_mine loop with single anvil_mine 0x32 (49 fewer RTTs)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-11 19:41:06 +00:00
parent a8db761de8
commit 0f91234dbe

View file

@ -14,6 +14,12 @@
# 0 Success — score printed to stdout.
# 1 Invalid candidate — Push3 program won't transpile, compile, or deploy.
# 2 Infra error — Anvil unavailable, missing tool, bootstrap failure.
#
# Environment:
# ANVIL_FORK_URL Required when Anvil is not already running. Must point to
# a Base RPC endpoint so Uniswap V3 Factory and WETH exist at
# their canonical addresses (e.g. https://mainnet.base.org or
# a local Base fork). Has no effect when Anvil is already up.
# =============================================================================
set -euo pipefail
@ -30,7 +36,7 @@ MNEMONIC="test test test test test test test test test test test junk"
RECENTER_PK="0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a"
# Account 8 — adversary (used to fund LM with WETH)
ADV_PK="0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97"
# WETH address on the local Anvil Base fork
# WETH address on the Base network
WETH="0x4200000000000000000000000000000000000006"
# =============================================================================
@ -70,25 +76,52 @@ done
# =============================================================================
# Cleanup
#
# If we own Anvil (ANVIL_PID set), just kill it — no state cleanup needed.
# If we are using a shared Anvil (ANVIL_PID empty), revert to BASELINE_SNAP to
# undo bootstrap mutations (setRecenterAccess, WETH funding, initial recenter)
# so the chain is clean for the next caller.
# =============================================================================
ANVIL_PID=""
WORK_DIR="$(mktemp -d)"
BASELINE_SNAP="" # pre-bootstrap snapshot; used to clean up on shared Anvil
cleanup() {
[ -n "$ANVIL_PID" ] && kill "$ANVIL_PID" 2>/dev/null || true
if [ -n "$ANVIL_PID" ]; then
kill "$ANVIL_PID" 2>/dev/null || true
elif [ -n "$BASELINE_SNAP" ]; then
cast rpc anvil_revert "$BASELINE_SNAP" --rpc-url "$RPC_URL" >/dev/null 2>&1 || true
fi
rm -rf "$WORK_DIR"
}
trap cleanup EXIT
# =============================================================================
# Step 0 — Start Anvil (if not already running)
# Step 0 — Ensure Anvil is running
#
# DeployLocal.sol depends on live Base infrastructure: Uniswap V3 Factory at
# 0x4752ba5DBc23f44D87826276BF6Fd6b1C372aD24 and WETH at
# 0x4200000000000000000000000000000000000006. A plain (unfork'd) Anvil has
# neither, so cold-starting without --fork-url silently breaks the pipeline.
#
# When Anvil is already running (dev stack or CI), we use it as-is.
# When it is not running we require ANVIL_FORK_URL and start a forked instance.
# =============================================================================
if cast chain-id --rpc-url "$RPC_URL" >/dev/null 2>&1; then
log "Anvil already running at $RPC_URL"
else
ANVIL_FORK_URL="${ANVIL_FORK_URL:-}"
if [ -z "$ANVIL_FORK_URL" ]; then
fail2 "Anvil is not running at $RPC_URL and ANVIL_FORK_URL is not set.
DeployLocal.sol requires Base network contracts (Uniswap V3 Factory, WETH).
Either start a Base-forked Anvil externally, or set ANVIL_FORK_URL to a Base
RPC endpoint (e.g. ANVIL_FORK_URL=https://mainnet.base.org)."
fi
anvil --silent \
--fork-url "$ANVIL_FORK_URL" \
--mnemonic "$MNEMONIC" \
--port 8545 &
ANVIL_PID=$!
@ -99,14 +132,17 @@ else
[ $TRIES -gt 50 ] && fail2 "Anvil did not start within 50 attempts"
sleep 0.2
done
log "Anvil started (PID $ANVIL_PID)"
log "Anvil started (PID $ANVIL_PID, fork: $ANVIL_FORK_URL)"
fi
# =============================================================================
# Steps 13 — Transpile → compile → deploy fresh stack → UUPS upgrade
#
# deploy-optimizer.sh handles the full pipeline. With no OPTIMIZER_PROXY set it
# also runs DeployLocal.sol to produce the initial stack.
# deploy-optimizer.sh handles the full pipeline. With no OPTIMIZER_PROXY set
# it also runs DeployLocal.sol to produce the initial stack.
#
# Output is tee'd to both a log file and stderr so progress is visible to the
# caller while the log is preserved for post-failure diagnosis.
#
# Exit codes from deploy-optimizer.sh all map to exit 1 (invalid candidate)
# because transpile / compile / round-trip failures are candidate issues.
@ -116,11 +152,11 @@ log "Running deploy-optimizer.sh (transpile → compile → deploy → upgrade)
DEPLOY_LOG="$WORK_DIR/deploy.log"
DEPLOY_EC=0
"$REPO_ROOT/tools/deploy-optimizer.sh" "$PUSH3_FILE" >"$DEPLOY_LOG" 2>&1 || DEPLOY_EC=$?
"$REPO_ROOT/tools/deploy-optimizer.sh" "$PUSH3_FILE" 2>&1 \
| tee "$DEPLOY_LOG" >&2 \
|| DEPLOY_EC=${PIPESTATUS[0]}
if [ "$DEPLOY_EC" -ne 0 ]; then
# Surface the deploy log so operators can diagnose candidate failures.
cat "$DEPLOY_LOG" >&2
fail1 "deploy-optimizer.sh failed (exit $DEPLOY_EC)"
fi
@ -129,8 +165,8 @@ log "Optimizer deployed and upgraded"
# =============================================================================
# Step 4 — Read deployment addresses
#
# DeployLocal.sol writes to onchain/deployments-local.json; addresses are
# deterministic for a fresh Anvil + standard mnemonic.
# DeployLocal.sol writes deterministic addresses to deployments-local.json when
# run against a fresh Anvil + standard mnemonic.
# =============================================================================
DEPLOYMENTS="$ONCHAIN_DIR/deployments-local.json"
@ -148,13 +184,18 @@ log "LiquidityManager: $LM_ADDR"
# =============================================================================
# Step 5 — Bootstrap LM state
#
# a. Grant recenterAccess to the standard Anvil account 2 (impersonate feeDestination).
# b. Fund LM with 1000 WETH from the adversary account (account 8).
# c. Call recenter() to deploy the capital into Uniswap positions so attacks
# have something meaningful to work against. The LM needs at least some
# TWAP history; mine blocks and retry until recenter succeeds.
# a. Snapshot pre-bootstrap state for cleanup (BASELINE_SNAP).
# b. Grant recenterAccess to account 2 (impersonate feeDestination).
# c. Fund LM with 1000 WETH from account 8.
# d. Call recenter() to deploy capital into Uniswap positions.
# The LM needs TWAP history; mine blocks in batches and retry.
# =============================================================================
# a. Pre-bootstrap snapshot — reverted in cleanup to undo mutations on a shared Anvil.
BASELINE_SNAP=$(cast rpc anvil_snapshot --rpc-url "$RPC_URL" | tr -d '"')
log "Pre-bootstrap snapshot: $BASELINE_SNAP"
# b. Grant recenterAccess.
RECENTER_ADDR=$(cast wallet address --private-key "$RECENTER_PK")
FEE_DEST=$(cast call "$LM_ADDR" "feeDestination()(address)" \
@ -168,6 +209,7 @@ cast send --rpc-url "$RPC_URL" --from "$FEE_DEST" --unlocked \
|| fail2 "setRecenterAccess failed"
cast rpc --rpc-url "$RPC_URL" anvil_stopImpersonatingAccount "$FEE_DEST" >/dev/null
# c. Fund LM with 1000 WETH.
log "Funding LM with 1000 WETH"
cast send "$WETH" "deposit()" --value 1000ether \
--private-key "$ADV_PK" --rpc-url "$RPC_URL" >/dev/null 2>&1 \
@ -176,13 +218,12 @@ cast send "$WETH" "transfer(address,uint256)" "$LM_ADDR" 1000000000000000000000
--private-key "$ADV_PK" --rpc-url "$RPC_URL" >/dev/null 2>&1 \
|| fail2 "Failed to transfer WETH to LM"
# d. Initial recenter. Mine 50 blocks per attempt (single anvil_mine call) to
# build the TWAP history the LM needs before recenter() will succeed.
log "Initial recenter — deploying capital into positions"
RECENTERED=false
for _attempt in 1 2 3 4; do
# Mine 50 blocks each attempt to accumulate TWAP history.
for _b in $(seq 1 50); do
cast rpc evm_mine --rpc-url "$RPC_URL" >/dev/null 2>&1
done
cast rpc anvil_mine 0x32 --rpc-url "$RPC_URL" >/dev/null 2>&1
if cast send "$LM_ADDR" "recenter()" \
--private-key "$RECENTER_PK" --rpc-url "$RPC_URL" >/dev/null 2>&1; then
RECENTERED=true
@ -195,33 +236,28 @@ if ! $RECENTERED; then
fi
# =============================================================================
# Step 6 — Take base Anvil snapshot
# Steps 67 — Run each attack and accumulate lm_eth_total
#
# All attacks revert to this snapshot so they each start from the same state.
# =============================================================================
BASE_SNAP=$(cast rpc anvil_snapshot --rpc-url "$RPC_URL" | tr -d '"')
log "Base snapshot: $BASE_SNAP"
# =============================================================================
# Steps 78 — Run each attack and accumulate lm_eth_total
# Each attack starts from the same post-bootstrap state by taking a snapshot
# before the run and reverting to it afterwards. If a revert fails (wrong
# snapshot ID, Anvil restart, etc.) subsequent attacks would run against dirty
# state; we flag this so the caller can discard the score.
# =============================================================================
TOTAL_ETH=0
ATTACK_COUNT=0
DIRTY=false
for ATTACK_JSONL in "$ATTACKS_DIR"/*.jsonl; do
[ -f "$ATTACK_JSONL" ] || continue
ATTACK_NAME="$(basename "$ATTACK_JSONL" .jsonl)"
log "Running attack: $ATTACK_NAME"
# a. Take per-attack snapshot (identical to base on first iteration;
# on subsequent iterations the state is already back at base from the
# previous revert).
# a. Take per-attack snapshot.
ATK_SNAP=$(cast rpc anvil_snapshot --rpc-url "$RPC_URL" | tr -d '"')
# b. Run AttackRunner, capturing all output (console.log snapshots come via
# stdout when using --broadcast; stderr carries compilation noise).
# b. Run AttackRunner, capturing all output.
# console.log snapshot lines start with '{'; other forge output does not.
ATK_OUT="$WORK_DIR/atk-${ATTACK_NAME}.txt"
ATK_EC=0
(
@ -233,12 +269,14 @@ for ATTACK_JSONL in "$ATTACKS_DIR"/*.jsonl; do
if [ "$ATK_EC" -ne 0 ]; then
log " WARNING: AttackRunner failed for $ATTACK_NAME (exit $ATK_EC) — skipping"
cast rpc anvil_revert "$ATK_SNAP" --rpc-url "$RPC_URL" >/dev/null 2>&1 || true
if ! cast rpc anvil_revert "$ATK_SNAP" --rpc-url "$RPC_URL" >/dev/null 2>&1; then
log " WARNING: anvil_revert also failed — subsequent attacks run against dirty state"
DIRTY=true
fi
continue
fi
# c. Extract lm_eth_total from the final JSON snapshot.
# Snapshot lines are emitted by console.log and start with '{'.
# c. Extract lm_eth_total from the final JSON snapshot line.
ETH_RETAINED=$(python3 - "$ATK_OUT" <<'PYEOF'
import sys, json
snapshots = []
@ -251,7 +289,7 @@ with open(sys.argv[1]) as f:
except json.JSONDecodeError:
pass
if snapshots:
# lm_eth_total is a quoted integer string in the snapshot JSON.
# lm_eth_total is serialised as a quoted integer string.
val = snapshots[-1]['lm_eth_total']
print(int(val) if isinstance(val, str) else val)
else:
@ -263,9 +301,11 @@ PYEOF
TOTAL_ETH=$(python3 -c "print(int('$TOTAL_ETH') + int('$ETH_RETAINED'))")
ATTACK_COUNT=$((ATTACK_COUNT + 1))
# d. Revert to per-attack snapshot — resets Anvil state to post-bootstrap
# baseline so the next attack starts from the same conditions.
cast rpc anvil_revert "$ATK_SNAP" --rpc-url "$RPC_URL" >/dev/null 2>&1 || true
# d. Revert to per-attack snapshot to restore post-bootstrap baseline.
if ! cast rpc anvil_revert "$ATK_SNAP" --rpc-url "$RPC_URL" >/dev/null 2>&1; then
log " WARNING: anvil_revert failed for $ATTACK_NAME — subsequent attacks run against dirty state"
DIRTY=true
fi
done
# =============================================================================
@ -276,5 +316,9 @@ if [ "$ATTACK_COUNT" -eq 0 ]; then
fail2 "No attacks ran — check $ATTACKS_DIR for *.jsonl files"
fi
if $DIRTY; then
log "WARNING: one or more revert failures occurred — score may be inaccurate"
fi
log "Score: $TOTAL_ETH wei (sum of lm_eth_total across $ATTACK_COUNT attacks)"
echo "$TOTAL_ETH"