Merge pull request 'fix: feat: revm-based fitness evaluator for evolution at scale (#604)' (#613) from fix/issue-604 into master

This commit is contained in:
johba 2026-03-12 14:56:03 +01:00
commit 685a0e488e
3 changed files with 890 additions and 6 deletions

View file

@ -0,0 +1,597 @@
// SPDX-License-Identifier: GPL-3.0-or-later
pragma solidity ^0.8.19;
/**
* @title FitnessEvaluator
* @notice In-process (revm) batch fitness evaluator for Push3 evolution.
*
* Replaces the Anvil+forge-script pipeline with in-process EVM execution.
* Uses Foundry's native revm backend: vm.snapshot/revertTo are memory operations
* with no JSON-RPC overhead, giving 100-1000x speedup over per-candidate Anvil.
*
* Architecture:
* batch-eval.sh compiles each candidate (Push3Soliditybytecode) and writes a
* two-file manifest (ids.txt + bytecodes.txt). This test reads the manifest,
* forks Base mainnet once, deploys the full KRAIKEN stack once, then for each
* candidate:
* 1. snapshot etch candidate bytecode UUPS upgrade proxy bootstrap
* 2. For each attack: snapshot execute accumulate lm_eth_total revert
* 3. Emit JSON score line
* 4. Revert to pre-bootstrap snapshot
*
* Required env vars:
* BASE_RPC_URL Base network RPC endpoint (for fork)
* FITNESS_MANIFEST_DIR Directory containing ids.txt and bytecodes.txt
*
* Optional env vars:
* ATTACKS_DIR Path to *.jsonl attack files (default: script/backtesting/attacks)
*
* Run:
* BASE_RPC_URL=https://mainnet.base.org \
* FITNESS_MANIFEST_DIR=/tmp/manifest \
* forge test --match-contract FitnessEvaluator --match-test testBatchEvaluate -vv
*/
import "forge-std/Test.sol";
import { Kraiken } from "../src/Kraiken.sol";
import { Stake } from "../src/Stake.sol";
import { Optimizer } from "../src/Optimizer.sol";
import { LiquidityManager } from "../src/LiquidityManager.sol";
import { ERC1967Proxy } from "@openzeppelin/proxy/ERC1967/ERC1967Proxy.sol";
import { UUPSUpgradeable } from "@openzeppelin/proxy/utils/UUPSUpgradeable.sol";
import { IERC20 } from "@openzeppelin/token/ERC20/IERC20.sol";
import { IUniswapV3Factory } from "@uniswap-v3-core/interfaces/IUniswapV3Factory.sol";
import { IUniswapV3Pool } from "@uniswap-v3-core/interfaces/IUniswapV3Pool.sol";
import { FullMath } from "@aperture/uni-v3-lib/FullMath.sol";
import { LiquidityAmounts } from "@aperture/uni-v3-lib/LiquidityAmounts.sol";
import { TickMath } from "@aperture/uni-v3-lib/TickMath.sol";
import { UniswapHelpers } from "../src/helpers/UniswapHelpers.sol";
import { IWETH9 } from "../src/interfaces/IWETH9.sol";
// External interfaces (mirrors AttackRunner.s.sol)
interface ISwapRouter02 {
struct ExactInputSingleParams {
address tokenIn;
address tokenOut;
uint24 fee;
address recipient;
uint256 amountIn;
uint256 amountOutMinimum;
uint160 sqrtPriceLimitX96;
}
function exactInputSingle(ExactInputSingleParams calldata params) external returns (uint256);
}
interface ILM {
function getVWAP() external view returns (uint256);
function positions(uint8 stage) external view returns (uint128 liquidity, int24 tickLower, int24 tickUpper);
function recenter() external returns (bool);
}
interface IStake {
function snatch(uint256 assets, address receiver, uint32 taxRate, uint256[] calldata positionsToSnatch)
external
returns (uint256 positionId);
function exitPosition(uint256 positionId) external;
}
interface INonfungiblePositionManager {
struct MintParams {
address token0;
address token1;
uint24 fee;
int24 tickLower;
int24 tickUpper;
uint256 amount0Desired;
uint256 amount1Desired;
uint256 amount0Min;
uint256 amount1Min;
address recipient;
uint256 deadline;
}
struct DecreaseLiquidityParams {
uint256 tokenId;
uint128 liquidity;
uint256 amount0Min;
uint256 amount1Min;
uint256 deadline;
}
struct CollectParams {
uint256 tokenId;
address recipient;
uint128 amount0Max;
uint128 amount1Max;
}
function mint(MintParams calldata params) external payable returns (uint256 tokenId, uint128 liquidity, uint256 amount0, uint256 amount1);
function positions(uint256 tokenId)
external
view
returns (
uint96 nonce,
address operator,
address token0,
address token1,
uint24 fee,
int24 tickLower,
int24 tickUpper,
uint128 liquidity,
uint256 feeGrowthInside0LastX128,
uint256 feeGrowthInside1LastX128,
uint128 tokensOwed0,
uint128 tokensOwed1
);
function decreaseLiquidity(DecreaseLiquidityParams calldata params) external payable returns (uint256 amount0, uint256 amount1);
function collect(CollectParams calldata params) external payable returns (uint256 amount0, uint256 amount1);
}
// Main test contract
contract FitnessEvaluator is Test {
using UniswapHelpers for IUniswapV3Pool;
// Base network constants
uint24 internal constant POOL_FEE = 10_000;
address internal constant WETH_ADDR = 0x4200000000000000000000000000000000000006;
address internal constant SWAP_ROUTER = 0x94cC0AaC535CCDB3C01d6787D6413C739ae12bc4;
address internal constant NPM_ADDR = 0x27F971cb582BF9E50F397e4d29a5C7A34f11faA2;
address internal constant V3_FACTORY = 0x4752ba5DBc23f44D87826276BF6Fd6b1C372aD24;
address internal constant FEE_DEST = 0xf6a3eef9088A255c32b6aD2025f83E57291D9011;
/// @dev Fixed address used with vm.etch to inject candidate bytecode.
/// Chosen to be deterministic and not collide with real Base addresses.
address internal constant IMPL_SLOT = address(uint160(uint256(keccak256("fitness.impl.slot"))));
// Anvil test accounts (deterministic mnemonic)
/// @dev Account 8 adversary (10 000 ETH in Anvil; funded via vm.deal here)
uint256 internal constant ADV_PK = 0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97;
/// @dev Account 2 recenter caller (granted recenterAccess in bootstrap)
uint256 internal constant RECENTER_PK = 0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a;
// Runtime state
address internal lmAddr;
address internal krkAddr;
address internal stakeAddr;
address internal optProxy;
address internal advAddr;
address internal recenterAddr;
IUniswapV3Pool internal pool;
bool internal token0isWeth;
/// @dev Mirrors AttackRunner._stakedPositionIds: position IDs returned by stake ops.
/// vm.snapshot/revertTo reverts this array's storage between attacks.
uint256[] internal _stakedPositionIds;
/// @dev NPM tokenIds returned by mint_lp ops (in insertion order).
/// burn_lp references positions by 1-based index into this array so that
/// attack files are fork-block-independent (tokenIds vary by fork tip).
uint256[] internal _mintedNpmTokenIds;
// Entry point
/**
* @notice Batch fitness evaluator: score all candidates in the manifest.
*
* Reads FITNESS_MANIFEST_DIR/{ids.txt,bytecodes.txt} line-by-line.
* Outputs one JSON line per candidate to stdout:
* {"candidate_id":"gen0_c000","fitness":1234567890}
*
* Skipped (with a pass) if BASE_RPC_URL is not set, so CI without a Base
* RPC key does not fail the test suite.
*/
function testBatchEvaluate() public {
string memory rpcUrl = vm.envOr("BASE_RPC_URL", string(""));
vm.skip(bytes(rpcUrl).length == 0);
string memory manifestDir = vm.envOr("FITNESS_MANIFEST_DIR", string(""));
require(bytes(manifestDir).length > 0, "FITNESS_MANIFEST_DIR env var required");
string memory attacksDir = vm.envOr("ATTACKS_DIR", string("script/backtesting/attacks"));
// Fork Base mainnet so Uniswap V3, WETH, etc. exist at canonical addresses.
vm.createSelectFork(rpcUrl);
advAddr = vm.addr(ADV_PK);
recenterAddr = vm.addr(RECENTER_PK);
// Deploy the full KRAIKEN stack once on the fork.
_deploy();
// Snapshot after deployment (pre-bootstrap, pre-candidate-specific state).
uint256 baseSnap = vm.snapshot();
// Discover attack files (sorted alphabetically by path).
string memory idsFile = string.concat(manifestDir, "/ids.txt");
string memory bytecodesFile = string.concat(manifestDir, "/bytecodes.txt");
// Process candidates one at a time.
while (true) {
string memory candidateId = vm.readLine(idsFile);
string memory bytecodeHex = vm.readLine(bytecodesFile);
if (bytes(candidateId).length == 0) break;
// Revert to clean post-deploy state for each candidate.
vm.revertTo(baseSnap);
baseSnap = vm.snapshot();
// Etch candidate optimizer bytecode and upgrade proxy.
// Wrapped in try/catch: a malformed candidate (compiler bug, bad transpiler output)
// would otherwise abort the entire batch. On failure, emit fitness=0 and continue;
// vm.revertTo(baseSnap) at the top of the next iteration cleans up state.
bytes memory candidateBytecode = vm.parseBytes(bytecodeHex);
vm.etch(IMPL_SLOT, candidateBytecode);
bool upgradeOk = true;
try UUPSUpgradeable(optProxy).upgradeTo(IMPL_SLOT) { }
catch {
upgradeOk = false;
}
if (!upgradeOk) {
console.log(string.concat('{"candidate_id":"', candidateId, '","fitness":0,"error":"upgrade_failed"}'));
continue;
}
// Bootstrap: fund LM, set recenterAccess, initial recenter.
_bootstrap();
// Score: sum lm_eth_total across all attack sequences.
uint256 totalFitness = 0;
Vm.DirEntry[] memory entries = vm.readDir(attacksDir);
for (uint256 i = 0; i < entries.length; i++) {
if (entries[i].isDir || !_endsWith(entries[i].path, ".jsonl")) continue;
uint256 atkSnap = vm.snapshot();
uint256 score = _runAttack(entries[i].path);
totalFitness += score;
vm.revertTo(atkSnap);
}
// Emit score as a JSON line (parsed by batch-eval.sh).
console.log(string.concat('{"candidate_id":"', candidateId, '","fitness":', _uint2str(totalFitness), "}"));
}
// Close manifest files.
vm.closeFile(idsFile);
vm.closeFile(bytecodesFile);
}
// Deployment
/**
* @notice Deploy the full KRAIKEN stack (mirrors DeployLocal.sol).
* @dev All contracts are deployed as address(this) (the test contract),
* which becomes the UUPS admin for the Optimizer proxy.
*/
function _deploy() internal {
// Deploy Kraiken token.
Kraiken kraiken = new Kraiken("Kraiken", "KRK");
krkAddr = address(kraiken);
token0isWeth = WETH_ADDR < krkAddr;
// Deploy Stake.
Stake stake = new Stake(krkAddr, FEE_DEST);
stakeAddr = address(stake);
kraiken.setStakingPool(stakeAddr);
// Get or create Uniswap V3 pool.
IUniswapV3Factory factory = IUniswapV3Factory(V3_FACTORY);
address poolAddr = factory.getPool(WETH_ADDR, krkAddr, POOL_FEE);
if (poolAddr == address(0)) {
poolAddr = factory.createPool(WETH_ADDR, krkAddr, POOL_FEE);
}
pool = IUniswapV3Pool(poolAddr);
// Initialize pool at 1-cent price if not already initialized.
(uint160 sqrtPriceX96,,,,,,) = pool.slot0();
if (sqrtPriceX96 == 0) {
pool.initializePoolFor1Cent(token0isWeth);
}
// Deploy Optimizer implementation + UUPS proxy.
// address(this) (test contract) becomes the UUPS admin via initialize.
Optimizer optimizerImpl = new Optimizer();
bytes memory initData = abi.encodeWithSignature("initialize(address,address)", krkAddr, stakeAddr);
ERC1967Proxy proxy = new ERC1967Proxy(address(optimizerImpl), initData);
optProxy = address(proxy);
// Deploy LiquidityManager.
LiquidityManager lm = new LiquidityManager(V3_FACTORY, WETH_ADDR, krkAddr, optProxy);
lmAddr = address(lm);
// Wire contracts together.
lm.setFeeDestination(FEE_DEST);
kraiken.setLiquidityManager(lmAddr);
}
// Bootstrap
/**
* @notice Bootstrap LM state for a candidate evaluation (mirrors fitness.sh bootstrap).
*
* Steps (same order as fitness.sh):
* a. Grant recenterAccess to recenterAddr (impersonate feeDestination).
* b. Fund adversary account and wrap ETH WETH.
* c. Transfer 1000 WETH to LM.
* d. Wrap 9000 WETH for adversary trades + set approvals.
* e. Initial recenter (succeeds immediately: recenterAccess set, no ANCHOR liquidity yet).
*/
function _bootstrap() internal {
// a. Grant recenterAccess (feeDestination call, no ETH needed with gas_price=0).
vm.prank(FEE_DEST);
LiquidityManager(payable(lmAddr)).setRecenterAccess(recenterAddr);
// b. Fund adversary with ETH.
vm.deal(advAddr, 10_000 ether);
// c. Wrap 1000 ETH WETH and send to LM.
vm.startPrank(advAddr);
IWETH9(WETH_ADDR).deposit{ value: 1_000 ether }();
IERC20(WETH_ADDR).transfer(lmAddr, 1_000 ether);
vm.stopPrank();
// d. Wrap remaining 9000 ETH for trade operations + set approvals.
vm.startPrank(advAddr);
IWETH9(WETH_ADDR).deposit{ value: 9_000 ether }();
IERC20(WETH_ADDR).approve(SWAP_ROUTER, type(uint256).max);
IERC20(WETH_ADDR).approve(NPM_ADDR, type(uint256).max);
IERC20(krkAddr).approve(SWAP_ROUTER, type(uint256).max);
IERC20(krkAddr).approve(stakeAddr, type(uint256).max);
IERC20(krkAddr).approve(NPM_ADDR, type(uint256).max);
vm.stopPrank();
// e. Initial recenter: no ANCHOR position exists yet so amplitude check is skipped;
// recenterAccess is set so TWAP stability check is also skipped.
// If all retries fail, revert with a clear message silent failure would make every
// candidate score identically (all lm_eth_total = free WETH only, no positions).
bool recentered = false;
for (uint256 _attempt = 0; _attempt < 5; _attempt++) {
if (_attempt > 0) vm.roll(block.number + 50);
vm.prank(recenterAddr);
try ILM(lmAddr).recenter() returns (bool) {
recentered = true;
break;
} catch { }
}
require(recentered, "FitnessEvaluator: bootstrap recenter failed after 5 attempts");
}
// Attack execution
/**
* @notice Execute one attack sequence and return the final lm_eth_total.
* @param attackFile Path to the *.jsonl attack file.
*/
function _runAttack(string memory attackFile) internal returns (uint256) {
// Reset file read position so each call to _runAttack starts from line 1.
vm.closeFile(attackFile);
// vm.revertTo() reverts all EVM state including test contract storage, so these
// arrays are already empty after revert. Explicit delete is a defensive reset
// for the first attack (no preceding revert) and any future call-path changes.
delete _stakedPositionIds;
delete _mintedNpmTokenIds;
string memory line = vm.readLine(attackFile);
while (bytes(line).length > 0) {
_executeOp(line);
line = vm.readLine(attackFile);
}
return _computeLmEthTotal();
}
/**
* @notice Execute a single attack operation (mirrors AttackRunner._execute).
*/
function _executeOp(string memory line) internal {
string memory op = vm.parseJsonString(line, ".op");
if (_eq(op, "buy")) {
uint256 amount = vm.parseUint(vm.parseJsonString(line, ".amount"));
vm.prank(advAddr);
ISwapRouter02(SWAP_ROUTER).exactInputSingle(
ISwapRouter02.ExactInputSingleParams({
tokenIn: WETH_ADDR,
tokenOut: krkAddr,
fee: POOL_FEE,
recipient: advAddr,
amountIn: amount,
amountOutMinimum: 0,
sqrtPriceLimitX96: 0
})
);
} else if (_eq(op, "sell")) {
string memory amtStr = vm.parseJsonString(line, ".amount");
uint256 amount = _eq(amtStr, "all") ? IERC20(krkAddr).balanceOf(advAddr) : vm.parseUint(amtStr);
if (amount == 0) return;
vm.prank(advAddr);
ISwapRouter02(SWAP_ROUTER).exactInputSingle(
ISwapRouter02.ExactInputSingleParams({
tokenIn: krkAddr,
tokenOut: WETH_ADDR,
fee: POOL_FEE,
recipient: advAddr,
amountIn: amount,
amountOutMinimum: 0,
sqrtPriceLimitX96: 0
})
);
} else if (_eq(op, "recenter")) {
vm.prank(recenterAddr);
try ILM(lmAddr).recenter() { } catch { }
} else if (_eq(op, "stake")) {
uint256 amount = vm.parseUint(vm.parseJsonString(line, ".amount"));
uint32 taxRate = uint32(vm.parseJsonUint(line, ".taxRateIndex"));
vm.prank(advAddr);
uint256 posId = IStake(stakeAddr).snatch(amount, advAddr, taxRate, new uint256[](0));
_stakedPositionIds.push(posId);
} else if (_eq(op, "unstake")) {
uint256 posIndex = vm.parseJsonUint(line, ".positionId");
require(
posIndex >= 1 && posIndex <= _stakedPositionIds.length,
"FitnessEvaluator: unstake positionId out of range"
);
vm.prank(advAddr);
IStake(stakeAddr).exitPosition(_stakedPositionIds[posIndex - 1]);
} else if (_eq(op, "mine")) {
uint256 blocks = vm.parseJsonUint(line, ".blocks");
vm.roll(block.number + blocks);
} else if (_eq(op, "mint_lp")) {
int24 tickLower = int24(vm.parseJsonInt(line, ".tickLower"));
int24 tickUpper = int24(vm.parseJsonInt(line, ".tickUpper"));
uint256 amount0 = vm.parseUint(vm.parseJsonString(line, ".amount0"));
uint256 amount1 = vm.parseUint(vm.parseJsonString(line, ".amount1"));
(address t0, address t1) = token0isWeth ? (WETH_ADDR, krkAddr) : (krkAddr, WETH_ADDR);
vm.prank(advAddr);
// Track the returned tokenId so burn_lp can reference it by 1-based index,
// making attack files fork-block-independent (NPM tokenIds depend on fork tip).
(uint256 mintedTokenId,,,) = INonfungiblePositionManager(NPM_ADDR).mint(
INonfungiblePositionManager.MintParams({
token0: t0,
token1: t1,
fee: POOL_FEE,
tickLower: tickLower,
tickUpper: tickUpper,
amount0Desired: amount0,
amount1Desired: amount1,
amount0Min: 0,
amount1Min: 0,
recipient: advAddr,
deadline: block.timestamp + 3600
})
);
_mintedNpmTokenIds.push(mintedTokenId);
} else if (_eq(op, "burn_lp")) {
// .tokenId in the attack file is a 1-based index into _mintedNpmTokenIds
// (positions created by mint_lp ops in this run), not a raw NPM tokenId.
// This mirrors the stake/unstake index pattern and avoids fork-block sensitivity.
uint256 tokenIndex = vm.parseJsonUint(line, ".tokenId");
require(
tokenIndex >= 1 && tokenIndex <= _mintedNpmTokenIds.length,
"FitnessEvaluator: burn_lp tokenId out of range (must be 1-based index of a prior mint_lp op)"
);
uint256 tokenId = _mintedNpmTokenIds[tokenIndex - 1];
(,,,,,, , uint128 liquidity,,,,) = INonfungiblePositionManager(NPM_ADDR).positions(tokenId);
if (liquidity == 0) return;
vm.startPrank(advAddr);
INonfungiblePositionManager(NPM_ADDR).decreaseLiquidity(
INonfungiblePositionManager.DecreaseLiquidityParams({
tokenId: tokenId,
liquidity: liquidity,
amount0Min: 0,
amount1Min: 0,
deadline: block.timestamp + 3600
})
);
INonfungiblePositionManager(NPM_ADDR).collect(
INonfungiblePositionManager.CollectParams({
tokenId: tokenId,
recipient: advAddr,
amount0Max: type(uint128).max,
amount1Max: type(uint128).max
})
);
vm.stopPrank();
}
// Unknown ops are silently ignored (mirrors AttackRunner behaviour).
}
// Score computation
/**
* @notice Compute lm_eth_total = free ETH + free WETH + sum(position ETH values).
* Mirrors AttackRunner._logSnapshot's lm_eth_total calculation.
*/
function _computeLmEthTotal() internal view returns (uint256) {
(uint160 sqrtPriceX96,,,,,,) = pool.slot0();
uint256 lmEthFree = lmAddr.balance;
uint256 lmWethFree = IERC20(WETH_ADDR).balanceOf(lmAddr);
(uint128 fLiq, int24 fLo, int24 fHi) = ILM(lmAddr).positions(0); // FLOOR
(uint128 aLiq, int24 aLo, int24 aHi) = ILM(lmAddr).positions(1); // ANCHOR
(uint128 dLiq, int24 dLo, int24 dHi) = ILM(lmAddr).positions(2); // DISCOVERY
return lmEthFree
+ lmWethFree
+ _positionEthValue(sqrtPriceX96, fLo, fHi, fLiq)
+ _positionEthValue(sqrtPriceX96, aLo, aHi, aLiq)
+ _positionEthValue(sqrtPriceX96, dLo, dHi, dLiq);
}
/**
* @notice ETH-equivalent value of a Uniswap V3 position at the current price.
* Copied verbatim from AttackRunner._positionEthValue.
*/
function _positionEthValue(
uint160 sqrtPriceX96,
int24 tickLower,
int24 tickUpper,
uint128 liquidity
)
internal
view
returns (uint256)
{
if (liquidity == 0) return 0;
uint160 sqrtRatioAX96 = TickMath.getSqrtRatioAtTick(tickLower);
uint160 sqrtRatioBX96 = TickMath.getSqrtRatioAtTick(tickUpper);
(uint256 amount0, uint256 amount1) =
LiquidityAmounts.getAmountsForLiquidity(sqrtPriceX96, sqrtRatioAX96, sqrtRatioBX96, liquidity);
uint256 ethAmount = token0isWeth ? amount0 : amount1;
uint256 krkAmount = token0isWeth ? amount1 : amount0;
if (krkAmount == 0 || sqrtPriceX96 == 0) return ethAmount;
uint256 krkInEth;
if (token0isWeth) {
// token0=WETH, token1=KRK: 1 KRK = 2^192 / sqrtP^2 WETH
krkInEth = FullMath.mulDiv(FullMath.mulDiv(krkAmount, 1 << 96, sqrtPriceX96), 1 << 96, sqrtPriceX96);
} else {
// token0=KRK, token1=WETH: 1 KRK = sqrtP^2 / 2^192 WETH
krkInEth = FullMath.mulDiv(FullMath.mulDiv(krkAmount, sqrtPriceX96, 1 << 96), sqrtPriceX96, 1 << 96);
}
return ethAmount + krkInEth;
}
// Utilities
function _eq(string memory a, string memory b) internal pure returns (bool) {
return keccak256(bytes(a)) == keccak256(bytes(b));
}
function _endsWith(string memory str, string memory suffix) internal pure returns (bool) {
bytes memory bStr = bytes(str);
bytes memory bSuf = bytes(suffix);
if (bStr.length < bSuf.length) return false;
uint256 offset = bStr.length - bSuf.length;
for (uint256 i = 0; i < bSuf.length; i++) {
if (bStr[offset + i] != bSuf[i]) return false;
}
return true;
}
function _uint2str(uint256 n) internal pure returns (string memory) {
if (n == 0) return "0";
uint256 temp = n;
uint256 digits;
while (temp != 0) {
digits++;
temp /= 10;
}
bytes memory buffer = new bytes(digits);
while (n != 0) {
digits--;
buffer[digits] = bytes1(uint8(48 + (n % 10)));
n /= 10;
}
return string(buffer);
}
}

View file

@ -44,8 +44,15 @@ export PATH="${HOME}/.foundry/bin:${PATH}"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
FITNESS_SH="$SCRIPT_DIR/fitness.sh"
BATCH_EVAL_SH="$SCRIPT_DIR/revm-evaluator/batch-eval.sh"
MUTATE_CLI="$SCRIPT_DIR/mutate-cli.ts"
# EVAL_MODE controls which fitness backend is used:
# anvil (default) — per-candidate Anvil+forge-script pipeline (fitness.sh)
# revm — in-process revm via FitnessEvaluator.t.sol (batch-eval.sh)
# Requires BASE_RPC_URL env var. 10-100× faster at scale.
EVAL_MODE="${EVAL_MODE:-anvil}"
# =============================================================================
# Argument parsing
# =============================================================================
@ -171,6 +178,15 @@ done
[ -f "$MUTATE_CLI" ] || fail "mutate-cli.ts not found at $MUTATE_CLI"
[ -x "$FITNESS_SH" ] || chmod +x "$FITNESS_SH"
if [ "$EVAL_MODE" = "revm" ]; then
[ -f "$BATCH_EVAL_SH" ] || fail "batch-eval.sh not found at $BATCH_EVAL_SH"
[ -x "$BATCH_EVAL_SH" ] || chmod +x "$BATCH_EVAL_SH"
[ -n "${BASE_RPC_URL:-}" ] || fail "EVAL_MODE=revm requires BASE_RPC_URL env var (Base network RPC)"
command -v forge &>/dev/null || fail "forge not found in PATH (required for EVAL_MODE=revm)"
elif [ "$EVAL_MODE" != "anvil" ]; then
fail "Unknown EVAL_MODE '$EVAL_MODE' — must be 'anvil' or 'revm'"
fi
TSX_CMD="$(find_tsx_cmd)" || fail \
"No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH."
@ -194,6 +210,7 @@ log " Generations: $GENERATIONS"
log " Mutation rate: $MUTATION_RATE"
log " Output: $OUTPUT_DIR"
log " TSX: $TSX_CMD"
log " Eval mode: $EVAL_MODE"
log "========================================================"
# =============================================================================
@ -241,6 +258,29 @@ for gen in $(seq 0 $((GENERATIONS - 1))); do
SCORE_VALUES=""
CAND_COUNT=0
# In revm mode, batch-score all candidates in one forge test invocation before
# the per-candidate loop. Scores are written to a temp JSONL file that the
# loop reads with a fast Python lookup.
BATCH_SCORES_FILE="$WORK_DIR/batch_scores_gen_${gen}.jsonl"
if [ "$EVAL_MODE" = "revm" ]; then
declare -a _BATCH_FILES=()
for _CF in "$CURRENT_GEN_DIR"/candidate_*.push3; do
[ -f "$_CF" ] && _BATCH_FILES+=("$_CF")
done
if [ "${#_BATCH_FILES[@]}" -gt 0 ]; then
BATCH_EC=0
bash "$BATCH_EVAL_SH" "${_BATCH_FILES[@]}" > "$BATCH_SCORES_FILE" 2>/dev/null \
|| BATCH_EC=$?
if [ "$BATCH_EC" -eq 2 ]; then
fail "batch-eval.sh reported an infrastructure error (exit 2) — aborting evolution"
fi
log " revm batch scoring complete (exit $BATCH_EC)"
fi
fi
for CAND_FILE in "$CURRENT_GEN_DIR"/candidate_*.push3; do
[ -f "$CAND_FILE" ] || continue
@ -255,16 +295,37 @@ for gen in $(seq 0 $((GENERATIONS - 1))); do
SCORE=0
FITNESS_EC=0
SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$?
# Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
if [ "$FITNESS_EC" -eq 2 ]; then
fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
if [ "$EVAL_MODE" = "revm" ] && [ -f "$BATCH_SCORES_FILE" ]; then
# Look up pre-computed score from batch-eval.sh output.
SCORE=$(python3 - "$CID" "$BATCH_SCORES_FILE" <<'PYEOF'
import json, sys
cid = sys.argv[1]
with open(sys.argv[2]) as f:
for line in f:
try:
d = json.loads(line)
if d.get("candidate_id") == cid:
print(d["fitness"])
sys.exit(0)
except (json.JSONDecodeError, KeyError):
pass
print(0)
PYEOF
) || SCORE=0
else
# Anvil mode (or revm fallback): score candidate individually.
SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$?
# Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
if [ "$FITNESS_EC" -eq 2 ]; then
fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
fi
fi
# Validate that score is a non-negative integer; treat any other output as invalid.
if [ "$FITNESS_EC" -ne 0 ] || ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then
log " $CID: invalid candidate (fitness.sh exit $FITNESS_EC), score=0"
if ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then
log " $CID: invalid/missing score, using 0"
SCORE=0
else
log " $CID: fitness=$SCORE"

View file

@ -0,0 +1,226 @@
#!/usr/bin/env bash
# =============================================================================
# batch-eval.sh — revm-based batch fitness evaluator
#
# Replaces the per-candidate Anvil+forge-script pipeline with in-process EVM
# execution via Foundry's native revm backend (FitnessEvaluator.t.sol).
#
# Speedup: compiles each candidate once (unavoidable — different Solidity per
# candidate), then runs ALL attack sequences in a single in-process forge test
# with O(1) memory snapshot/revert instead of RPC calls per attack.
#
# Usage:
# ./tools/push3-evolution/revm-evaluator/batch-eval.sh \
# [--output-dir /tmp/scores] \
# candidate0.push3 candidate1.push3 ...
#
# Output (stdout):
# One JSON object per candidate:
# {"candidate_id":"gen0_c000","fitness":123456789}
#
# Exit codes:
# 0 Success.
# 1 Candidate-level error (transpile/compile failed for at least one candidate).
# 2 Infrastructure error (missing tool, BASE_RPC_URL not set, forge test failed).
#
# Environment:
# BASE_RPC_URL Required. Base network RPC endpoint for forking.
# ATTACKS_DIR Optional. Path to *.jsonl attack files.
# (default: <repo>/onchain/script/backtesting/attacks)
# OUTPUT_DIR Optional. Directory to copy scores.jsonl into (--output-dir overrides).
# =============================================================================
set -euo pipefail
export PATH="${HOME}/.foundry/bin:${PATH}"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
ONCHAIN_DIR="$REPO_ROOT/onchain"
TRANSPILER_DIR="$REPO_ROOT/tools/push3-transpiler"
TRANSPILER_OUT="$ONCHAIN_DIR/src/OptimizerV3Push3.sol"
ARTIFACT_PATH="$ONCHAIN_DIR/out/OptimizerV3Push3.sol/OptimizerV3Push3.json"
DEFAULT_ATTACKS_DIR="$ONCHAIN_DIR/script/backtesting/attacks"
# =============================================================================
# Argument parsing
# =============================================================================
OUTPUT_DIR="${OUTPUT_DIR:-}"
declare -a PUSH3_FILES=()
while [[ $# -gt 0 ]]; do
case $1 in
--output-dir) OUTPUT_DIR="$2"; shift 2 ;;
--*) echo "Unknown option: $1" >&2; exit 2 ;;
*) PUSH3_FILES+=("$1"); shift ;;
esac
done
if [ "${#PUSH3_FILES[@]}" -eq 0 ]; then
echo "Usage: $0 [--output-dir DIR] candidate1.push3 ..." >&2
exit 2
fi
# =============================================================================
# Environment checks
# =============================================================================
BASE_RPC_URL="${BASE_RPC_URL:-}"
if [ -z "$BASE_RPC_URL" ]; then
echo " [batch-eval] ERROR: BASE_RPC_URL env var required for Base network fork" >&2
exit 2
fi
for _tool in forge node python3; do
command -v "$_tool" &>/dev/null || { echo " [batch-eval] ERROR: $_tool not found in PATH" >&2; exit 2; }
done
# =============================================================================
# Helpers
# =============================================================================
log() { echo " [batch-eval] $*" >&2; }
fail2() { echo " [batch-eval] ERROR: $*" >&2; exit 2; }
# =============================================================================
# Step 1 — Ensure transpiler dependencies are installed
# =============================================================================
if [ ! -d "$TRANSPILER_DIR/node_modules" ]; then
log "Installing transpiler dependencies…"
(cd "$TRANSPILER_DIR" && npm install --silent) || fail2 "npm install in push3-transpiler failed"
fi
# =============================================================================
# Step 2 — Transpile + compile each candidate, extract bytecodes into manifest
# =============================================================================
MANIFEST_DIR="$(mktemp -d)"
IDS_FILE="$MANIFEST_DIR/ids.txt"
BYTECODES_FILE="$MANIFEST_DIR/bytecodes.txt"
: > "$IDS_FILE"
: > "$BYTECODES_FILE"
COMPILED_COUNT=0
FAILED_IDS=""
for PUSH3_FILE in "${PUSH3_FILES[@]}"; do
PUSH3_FILE="$(cd "$(dirname "$PUSH3_FILE")" && pwd)/$(basename "$PUSH3_FILE")"
CANDIDATE_ID="$(basename "$PUSH3_FILE" .push3)"
# Transpile Push3 → OptimizerV3Push3.sol
TRANSPILE_EC=0
(
cd "$TRANSPILER_DIR"
npx ts-node src/index.ts "$PUSH3_FILE" "$TRANSPILER_OUT"
) >/dev/null 2>&1 || TRANSPILE_EC=$?
if [ "$TRANSPILE_EC" -ne 0 ]; then
log "WARNING: transpile failed for $CANDIDATE_ID (exit $TRANSPILE_EC) — skipping"
FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
continue
fi
# Compile (forge's incremental build skips unchanged files quickly)
FORGE_EC=0
(cd "$ONCHAIN_DIR" && forge build --silent) >/dev/null 2>&1 || FORGE_EC=$?
if [ "$FORGE_EC" -ne 0 ]; then
log "WARNING: forge build failed for $CANDIDATE_ID (exit $FORGE_EC) — skipping"
FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
continue
fi
# Extract bytecode from artifact (strip leading 0x if present)
BYTECODE_HEX="$(python3 - "$ARTIFACT_PATH" <<'PYEOF'
import json, sys
with open(sys.argv[1]) as f:
d = json.load(f)
bytecode = d["bytecode"]["object"]
# Ensure 0x prefix
if not bytecode.startswith("0x"):
bytecode = "0x" + bytecode
print(bytecode)
PYEOF
)" || { log "WARNING: failed to extract bytecode for $CANDIDATE_ID — skipping"; FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"; continue; }
if [ -z "$BYTECODE_HEX" ] || [ "$BYTECODE_HEX" = "0x" ]; then
log "WARNING: empty bytecode for $CANDIDATE_ID — skipping"
FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
continue
fi
printf '%s\n' "$CANDIDATE_ID" >> "$IDS_FILE"
printf '%s\n' "$BYTECODE_HEX" >> "$BYTECODES_FILE"
COMPILED_COUNT=$((COMPILED_COUNT + 1))
log "Compiled $CANDIDATE_ID"
done
if [ "$COMPILED_COUNT" -eq 0 ]; then
fail2 "No candidates compiled successfully — aborting"
fi
log "Compiled $COMPILED_COUNT / ${#PUSH3_FILES[@]} candidates"
# =============================================================================
# Step 3 — Run FitnessEvaluator.t.sol (in-process revm, all candidates at once)
# =============================================================================
ATTACKS_DIR="${ATTACKS_DIR:-$DEFAULT_ATTACKS_DIR}"
log "Running FitnessEvaluator.t.sol (in-process revm, fork: $BASE_RPC_URL)…"
FORGE_TEST_EC=0
FORGE_OUTPUT="$(
cd "$ONCHAIN_DIR"
BASE_RPC_URL="$BASE_RPC_URL" \
FITNESS_MANIFEST_DIR="$MANIFEST_DIR" \
ATTACKS_DIR="$ATTACKS_DIR" \
forge test \
--match-contract FitnessEvaluator \
--match-test testBatchEvaluate \
-vv \
--no-match-path "NOT_A_REAL_PATH" \
2>&1
)" || FORGE_TEST_EC=$?
if [ "$FORGE_TEST_EC" -ne 0 ]; then
# Surface forge output on failure for diagnosis
printf '%s\n' "$FORGE_OUTPUT" >&2
fail2 "forge test failed (exit $FORGE_TEST_EC)"
fi
# =============================================================================
# Step 4 — Extract and emit score JSON lines
#
# forge test -vv wraps console.log output with leading spaces and a "Logs:" header.
# We grep for lines containing the score JSON pattern and strip the indentation.
# =============================================================================
SCORES_JSONL="$(printf '%s\n' "$FORGE_OUTPUT" | grep -E '"candidate_id"' | sed 's/^[[:space:]]*//' || true)"
if [ -z "$SCORES_JSONL" ]; then
printf '%s\n' "$FORGE_OUTPUT" >&2
fail2 "No score lines found in forge test output"
fi
# Emit scores to stdout
printf '%s\n' "$SCORES_JSONL"
# Optionally write to output directory
if [ -n "$OUTPUT_DIR" ]; then
mkdir -p "$OUTPUT_DIR"
printf '%s\n' "$SCORES_JSONL" > "$OUTPUT_DIR/scores.jsonl"
log "Scores written to $OUTPUT_DIR/scores.jsonl"
fi
# Warn about any candidates that were skipped (compile failures)
if [ -n "$FAILED_IDS" ]; then
log "WARNING: the following candidates were skipped (compile failed): $FAILED_IDS"
exit 1
fi
log "Done — scored $COMPILED_COUNT candidates"