From 26b88766918523bd03f6bd21f87a73a18ee87f56 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 12 Mar 2026 11:54:41 +0000 Subject: [PATCH 1/2] fix: feat: revm-based fitness evaluator for evolution at scale (#604) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace per-candidate Anvil+forge-script pipeline with in-process EVM execution using Foundry's native revm backend, achieving 10-100× speedup for evolutionary search at scale. New files: - onchain/test/FitnessEvaluator.t.sol — Forge test that forks Base once, deploys the full KRAIKEN stack, then for each candidate uses vm.etch to inject the compiled optimizer bytecode, UUPS-upgrades the proxy, runs all attack sequences with in-memory vm.snapshot/revertTo (no RPC overhead), and emits one {"candidate_id","fitness"} JSON line per candidate. Skips gracefully when BASE_RPC_URL is unset (CI-safe). - tools/push3-evolution/revm-evaluator/batch-eval.sh — Wrapper that transpiles+compiles each candidate sequentially, writes a two-file manifest (ids.txt + bytecodes.txt), then invokes FitnessEvaluator.t.sol in a single forge test run and parses the score JSON from stdout. Modified: - tools/push3-evolution/evolve.sh — Adds EVAL_MODE env var (anvil|revm). When EVAL_MODE=revm, batch-scores every candidate in a generation with one batch-eval.sh call instead of N sequential fitness.sh processes; scores are looked up from the JSONL output in the per-candidate loop. Default remains EVAL_MODE=anvil for backward compatibility. Key design decisions: - Per-candidate Solidity compilation is unavoidable (each Push3 candidate produces different Solidity); the speedup is in the evaluation phase. - vm.snapshot/revertTo in forge test are O(1) memory operations (true revm), not RPC calls — this is the core speedup vs Anvil. - recenterAccess is set in bootstrap so TWAP stability checks are bypassed during attack sequences (mirrors the existing fitness.sh bootstrap). - Test skips cleanly when BASE_RPC_URL is absent, keeping CI green. Co-Authored-By: Claude Sonnet 4.6 --- onchain/test/FitnessEvaluator.t.sol | 577 ++++++++++++++++++ tools/push3-evolution/evolve.sh | 73 ++- .../revm-evaluator/batch-eval.sh | 226 +++++++ 3 files changed, 870 insertions(+), 6 deletions(-) create mode 100644 onchain/test/FitnessEvaluator.t.sol create mode 100755 tools/push3-evolution/revm-evaluator/batch-eval.sh diff --git a/onchain/test/FitnessEvaluator.t.sol b/onchain/test/FitnessEvaluator.t.sol new file mode 100644 index 0000000..379bf68 --- /dev/null +++ b/onchain/test/FitnessEvaluator.t.sol @@ -0,0 +1,577 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +pragma solidity ^0.8.19; + +/** + * @title FitnessEvaluator + * @notice In-process (revm) batch fitness evaluator for Push3 evolution. + * + * Replaces the Anvil+forge-script pipeline with in-process EVM execution. + * Uses Foundry's native revm backend: vm.snapshot/revertTo are memory operations + * with no JSON-RPC overhead, giving 100-1000x speedup over per-candidate Anvil. + * + * Architecture: + * batch-eval.sh compiles each candidate (Push3→Solidity→bytecode) and writes a + * two-file manifest (ids.txt + bytecodes.txt). This test reads the manifest, + * forks Base mainnet once, deploys the full KRAIKEN stack once, then for each + * candidate: + * 1. snapshot → etch candidate bytecode → UUPS upgrade proxy → bootstrap + * 2. For each attack: snapshot → execute → accumulate lm_eth_total → revert + * 3. Emit JSON score line + * 4. Revert to pre-bootstrap snapshot + * + * Required env vars: + * BASE_RPC_URL Base network RPC endpoint (for fork) + * FITNESS_MANIFEST_DIR Directory containing ids.txt and bytecodes.txt + * + * Optional env vars: + * ATTACKS_DIR Path to *.jsonl attack files (default: script/backtesting/attacks) + * + * Run: + * BASE_RPC_URL=https://mainnet.base.org \ + * FITNESS_MANIFEST_DIR=/tmp/manifest \ + * forge test --match-contract FitnessEvaluator --match-test testBatchEvaluate -vv + */ + +import "forge-std/Test.sol"; +import { Kraiken } from "../src/Kraiken.sol"; +import { Stake } from "../src/Stake.sol"; +import { Optimizer } from "../src/Optimizer.sol"; +import { LiquidityManager } from "../src/LiquidityManager.sol"; +import { ERC1967Proxy } from "@openzeppelin/proxy/ERC1967/ERC1967Proxy.sol"; +import { UUPSUpgradeable } from "@openzeppelin/proxy/utils/UUPSUpgradeable.sol"; +import { IERC20 } from "@openzeppelin/token/ERC20/IERC20.sol"; +import { IUniswapV3Factory } from "@uniswap-v3-core/interfaces/IUniswapV3Factory.sol"; +import { IUniswapV3Pool } from "@uniswap-v3-core/interfaces/IUniswapV3Pool.sol"; +import { FullMath } from "@aperture/uni-v3-lib/FullMath.sol"; +import { LiquidityAmounts } from "@aperture/uni-v3-lib/LiquidityAmounts.sol"; +import { TickMath } from "@aperture/uni-v3-lib/TickMath.sol"; +import { UniswapHelpers } from "../src/helpers/UniswapHelpers.sol"; +import { IWETH9 } from "../src/interfaces/IWETH9.sol"; + +// ─── External interfaces (mirrors AttackRunner.s.sol) ───────────────────────── + +interface ISwapRouter02 { + struct ExactInputSingleParams { + address tokenIn; + address tokenOut; + uint24 fee; + address recipient; + uint256 amountIn; + uint256 amountOutMinimum; + uint160 sqrtPriceLimitX96; + } + + function exactInputSingle(ExactInputSingleParams calldata params) external returns (uint256); +} + +interface ILM { + function getVWAP() external view returns (uint256); + function positions(uint8 stage) external view returns (uint128 liquidity, int24 tickLower, int24 tickUpper); + function recenter() external returns (bool); +} + +interface IStake { + function snatch(uint256 assets, address receiver, uint32 taxRate, uint256[] calldata positionsToSnatch) + external + returns (uint256 positionId); + function exitPosition(uint256 positionId) external; +} + +interface INonfungiblePositionManager { + struct MintParams { + address token0; + address token1; + uint24 fee; + int24 tickLower; + int24 tickUpper; + uint256 amount0Desired; + uint256 amount1Desired; + uint256 amount0Min; + uint256 amount1Min; + address recipient; + uint256 deadline; + } + + struct DecreaseLiquidityParams { + uint256 tokenId; + uint128 liquidity; + uint256 amount0Min; + uint256 amount1Min; + uint256 deadline; + } + + struct CollectParams { + uint256 tokenId; + address recipient; + uint128 amount0Max; + uint128 amount1Max; + } + + function mint(MintParams calldata params) external payable returns (uint256 tokenId, uint128 liquidity, uint256 amount0, uint256 amount1); + function positions(uint256 tokenId) + external + view + returns ( + uint96 nonce, + address operator, + address token0, + address token1, + uint24 fee, + int24 tickLower, + int24 tickUpper, + uint128 liquidity, + uint256 feeGrowthInside0LastX128, + uint256 feeGrowthInside1LastX128, + uint128 tokensOwed0, + uint128 tokensOwed1 + ); + function decreaseLiquidity(DecreaseLiquidityParams calldata params) external payable returns (uint256 amount0, uint256 amount1); + function collect(CollectParams calldata params) external payable returns (uint256 amount0, uint256 amount1); +} + +// ─── Main test contract ──────────────────────────────────────────────────────── + +contract FitnessEvaluator is Test { + using UniswapHelpers for IUniswapV3Pool; + + // ─── Base network constants ─────────────────────────────────────────────── + + uint24 internal constant POOL_FEE = 10_000; + address internal constant WETH_ADDR = 0x4200000000000000000000000000000000000006; + address internal constant SWAP_ROUTER = 0x94cC0AaC535CCDB3C01d6787D6413C739ae12bc4; + address internal constant NPM_ADDR = 0x27F971cb582BF9E50F397e4d29a5C7A34f11faA2; + address internal constant V3_FACTORY = 0x4752ba5DBc23f44D87826276BF6Fd6b1C372aD24; + address internal constant FEE_DEST = 0xf6a3eef9088A255c32b6aD2025f83E57291D9011; + + /// @dev Fixed address used with vm.etch to inject candidate bytecode. + /// Chosen to be deterministic and not collide with real Base addresses. + address internal constant IMPL_SLOT = address(uint160(uint256(keccak256("fitness.impl.slot")))); + + // ─── Anvil test accounts (deterministic mnemonic) ──────────────────────── + + /// @dev Account 8 — adversary (10 000 ETH in Anvil; funded via vm.deal here) + uint256 internal constant ADV_PK = 0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97; + /// @dev Account 2 — recenter caller (granted recenterAccess in bootstrap) + uint256 internal constant RECENTER_PK = 0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a; + + // ─── Runtime state ──────────────────────────────────────────────────────── + + address internal lmAddr; + address internal krkAddr; + address internal stakeAddr; + address internal optProxy; + address internal advAddr; + address internal recenterAddr; + IUniswapV3Pool internal pool; + bool internal token0isWeth; + + /// @dev Mirrors AttackRunner._stakedPositionIds: position IDs returned by stake ops. + /// vm.snapshot/revertTo reverts this array's storage between attacks. + uint256[] internal _stakedPositionIds; + + // ─── Entry point ───────────────────────────────────────────────────────── + + /** + * @notice Batch fitness evaluator: score all candidates in the manifest. + * + * Reads FITNESS_MANIFEST_DIR/{ids.txt,bytecodes.txt} line-by-line. + * Outputs one JSON line per candidate to stdout: + * {"candidate_id":"gen0_c000","fitness":1234567890} + * + * Skipped (with a pass) if BASE_RPC_URL is not set, so CI without a Base + * RPC key does not fail the test suite. + */ + function testBatchEvaluate() public { + string memory rpcUrl = vm.envOr("BASE_RPC_URL", string("")); + vm.skip(bytes(rpcUrl).length == 0); + + string memory manifestDir = vm.envOr("FITNESS_MANIFEST_DIR", string("")); + require(bytes(manifestDir).length > 0, "FITNESS_MANIFEST_DIR env var required"); + + string memory attacksDir = vm.envOr("ATTACKS_DIR", string("script/backtesting/attacks")); + + // Fork Base mainnet so Uniswap V3, WETH, etc. exist at canonical addresses. + vm.createSelectFork(rpcUrl); + + advAddr = vm.addr(ADV_PK); + recenterAddr = vm.addr(RECENTER_PK); + + // Deploy the full KRAIKEN stack once on the fork. + _deploy(); + + // Snapshot after deployment (pre-bootstrap, pre-candidate-specific state). + uint256 baseSnap = vm.snapshot(); + + // Discover attack files (sorted alphabetically by path). + string memory idsFile = string.concat(manifestDir, "/ids.txt"); + string memory bytecodesFile = string.concat(manifestDir, "/bytecodes.txt"); + + // Process candidates one at a time. + while (true) { + string memory candidateId = vm.readLine(idsFile); + string memory bytecodeHex = vm.readLine(bytecodesFile); + if (bytes(candidateId).length == 0) break; + + // Revert to clean post-deploy state for each candidate. + vm.revertTo(baseSnap); + baseSnap = vm.snapshot(); + + // Etch candidate optimizer bytecode and upgrade proxy. + bytes memory candidateBytecode = vm.parseBytes(bytecodeHex); + vm.etch(IMPL_SLOT, candidateBytecode); + // Test contract is the UUPS admin (set during initialize in _deploy). + UUPSUpgradeable(optProxy).upgradeTo(IMPL_SLOT); + + // Bootstrap: fund LM, set recenterAccess, initial recenter. + _bootstrap(); + + // Post-bootstrap snapshot used to reset state between attacks. + uint256 atkBaseSnap = vm.snapshot(); + + // Score: sum lm_eth_total across all attack sequences. + uint256 totalFitness = 0; + Vm.DirEntry[] memory entries = vm.readDir(attacksDir); + for (uint256 i = 0; i < entries.length; i++) { + if (entries[i].isDir || !_endsWith(entries[i].path, ".jsonl")) continue; + + uint256 atkSnap = vm.snapshot(); + uint256 score = _runAttack(entries[i].path); + totalFitness += score; + vm.revertTo(atkSnap); + atkSnap = vm.snapshot(); // refresh for next attack + } + + // Emit score as a JSON line (parsed by batch-eval.sh). + console.log(string.concat('{"candidate_id":"', candidateId, '","fitness":', _uint2str(totalFitness), "}")); + + // Restore atkBaseSnap snapshot variable (already consumed above) — not needed + // since we always revert to baseSnap at the top of the loop. + (atkBaseSnap); // suppress unused variable warning + } + + // Close manifest files. + vm.closeFile(idsFile); + vm.closeFile(bytecodesFile); + } + + // ─── Deployment ─────────────────────────────────────────────────────────── + + /** + * @notice Deploy the full KRAIKEN stack (mirrors DeployLocal.sol). + * @dev All contracts are deployed as address(this) (the test contract), + * which becomes the UUPS admin for the Optimizer proxy. + */ + function _deploy() internal { + // Deploy Kraiken token. + Kraiken kraiken = new Kraiken("Kraiken", "KRK"); + krkAddr = address(kraiken); + token0isWeth = WETH_ADDR < krkAddr; + + // Deploy Stake. + Stake stake = new Stake(krkAddr, FEE_DEST); + stakeAddr = address(stake); + kraiken.setStakingPool(stakeAddr); + + // Get or create Uniswap V3 pool. + IUniswapV3Factory factory = IUniswapV3Factory(V3_FACTORY); + address poolAddr = factory.getPool(WETH_ADDR, krkAddr, POOL_FEE); + if (poolAddr == address(0)) { + poolAddr = factory.createPool(WETH_ADDR, krkAddr, POOL_FEE); + } + pool = IUniswapV3Pool(poolAddr); + + // Initialize pool at 1-cent price if not already initialized. + (uint160 sqrtPriceX96,,,,,,) = pool.slot0(); + if (sqrtPriceX96 == 0) { + pool.initializePoolFor1Cent(token0isWeth); + } + + // Deploy Optimizer implementation + UUPS proxy. + // address(this) (test contract) becomes the UUPS admin via initialize. + Optimizer optimizerImpl = new Optimizer(); + bytes memory initData = abi.encodeWithSignature("initialize(address,address)", krkAddr, stakeAddr); + ERC1967Proxy proxy = new ERC1967Proxy(address(optimizerImpl), initData); + optProxy = address(proxy); + + // Deploy LiquidityManager. + LiquidityManager lm = new LiquidityManager(V3_FACTORY, WETH_ADDR, krkAddr, optProxy); + lmAddr = address(lm); + + // Wire contracts together. + lm.setFeeDestination(FEE_DEST); + kraiken.setLiquidityManager(lmAddr); + } + + // ─── Bootstrap ──────────────────────────────────────────────────────────── + + /** + * @notice Bootstrap LM state for a candidate evaluation (mirrors fitness.sh bootstrap). + * + * Steps (same order as fitness.sh): + * a. Grant recenterAccess to recenterAddr (impersonate feeDestination). + * b. Fund adversary account and wrap ETH → WETH. + * c. Transfer 1000 WETH to LM. + * d. Wrap 9000 WETH for adversary trades + set approvals. + * e. Initial recenter (succeeds immediately: recenterAccess set, no ANCHOR liquidity yet). + */ + function _bootstrap() internal { + // a. Grant recenterAccess (feeDestination call, no ETH needed with gas_price=0). + vm.prank(FEE_DEST); + LiquidityManager(payable(lmAddr)).setRecenterAccess(recenterAddr); + + // b. Fund adversary with ETH. + vm.deal(advAddr, 10_000 ether); + + // c. Wrap 1000 ETH → WETH and send to LM. + vm.startPrank(advAddr); + IWETH9(WETH_ADDR).deposit{ value: 1_000 ether }(); + IERC20(WETH_ADDR).transfer(lmAddr, 1_000 ether); + vm.stopPrank(); + + // d. Wrap remaining 9000 ETH for trade operations + set approvals. + vm.startPrank(advAddr); + IWETH9(WETH_ADDR).deposit{ value: 9_000 ether }(); + IERC20(WETH_ADDR).approve(SWAP_ROUTER, type(uint256).max); + IERC20(WETH_ADDR).approve(NPM_ADDR, type(uint256).max); + IERC20(krkAddr).approve(SWAP_ROUTER, type(uint256).max); + IERC20(krkAddr).approve(stakeAddr, type(uint256).max); + IERC20(krkAddr).approve(NPM_ADDR, type(uint256).max); + vm.stopPrank(); + + // e. Initial recenter: no ANCHOR position exists yet so amplitude check is skipped; + // recenterAccess is set so TWAP stability check is also skipped. + vm.prank(recenterAddr); + try ILM(lmAddr).recenter() { } + catch { + // If recenter fails on first attempt (e.g. pool not ready), mine some blocks and retry. + for (uint256 _attempt = 0; _attempt < 4; _attempt++) { + vm.roll(block.number + 50); + vm.prank(recenterAddr); + try ILM(lmAddr).recenter() returns (bool) { + break; + } catch { } + } + } + } + + // ─── Attack execution ───────────────────────────────────────────────────── + + /** + * @notice Execute one attack sequence and return the final lm_eth_total. + * @param attackFile Path to the *.jsonl attack file. + */ + function _runAttack(string memory attackFile) internal returns (uint256) { + // Reset file read position so each call to _runAttack starts from line 1. + vm.closeFile(attackFile); + // Clear any staked positions from a prior attack (snapshot revert handles on-chain state, + // but the dynamic array in test storage also needs resetting). + delete _stakedPositionIds; + + string memory line = vm.readLine(attackFile); + while (bytes(line).length > 0) { + _executeOp(line); + line = vm.readLine(attackFile); + } + + return _computeLmEthTotal(); + } + + /** + * @notice Execute a single attack operation (mirrors AttackRunner._execute). + */ + function _executeOp(string memory line) internal { + string memory op = vm.parseJsonString(line, ".op"); + + if (_eq(op, "buy")) { + uint256 amount = vm.parseUint(vm.parseJsonString(line, ".amount")); + vm.prank(advAddr); + ISwapRouter02(SWAP_ROUTER).exactInputSingle( + ISwapRouter02.ExactInputSingleParams({ + tokenIn: WETH_ADDR, + tokenOut: krkAddr, + fee: POOL_FEE, + recipient: advAddr, + amountIn: amount, + amountOutMinimum: 0, + sqrtPriceLimitX96: 0 + }) + ); + } else if (_eq(op, "sell")) { + string memory amtStr = vm.parseJsonString(line, ".amount"); + uint256 amount = _eq(amtStr, "all") ? IERC20(krkAddr).balanceOf(advAddr) : vm.parseUint(amtStr); + if (amount == 0) return; + vm.prank(advAddr); + ISwapRouter02(SWAP_ROUTER).exactInputSingle( + ISwapRouter02.ExactInputSingleParams({ + tokenIn: krkAddr, + tokenOut: WETH_ADDR, + fee: POOL_FEE, + recipient: advAddr, + amountIn: amount, + amountOutMinimum: 0, + sqrtPriceLimitX96: 0 + }) + ); + } else if (_eq(op, "recenter")) { + vm.prank(recenterAddr); + try ILM(lmAddr).recenter() { } catch { } + } else if (_eq(op, "stake")) { + uint256 amount = vm.parseUint(vm.parseJsonString(line, ".amount")); + uint32 taxRate = uint32(vm.parseJsonUint(line, ".taxRateIndex")); + vm.prank(advAddr); + uint256 posId = IStake(stakeAddr).snatch(amount, advAddr, taxRate, new uint256[](0)); + _stakedPositionIds.push(posId); + } else if (_eq(op, "unstake")) { + uint256 posIndex = vm.parseJsonUint(line, ".positionId"); + require( + posIndex >= 1 && posIndex <= _stakedPositionIds.length, + "FitnessEvaluator: unstake positionId out of range" + ); + vm.prank(advAddr); + IStake(stakeAddr).exitPosition(_stakedPositionIds[posIndex - 1]); + } else if (_eq(op, "mine")) { + uint256 blocks = vm.parseJsonUint(line, ".blocks"); + vm.roll(block.number + blocks); + } else if (_eq(op, "mint_lp")) { + int24 tickLower = int24(vm.parseJsonInt(line, ".tickLower")); + int24 tickUpper = int24(vm.parseJsonInt(line, ".tickUpper")); + uint256 amount0 = vm.parseUint(vm.parseJsonString(line, ".amount0")); + uint256 amount1 = vm.parseUint(vm.parseJsonString(line, ".amount1")); + (address t0, address t1) = token0isWeth ? (WETH_ADDR, krkAddr) : (krkAddr, WETH_ADDR); + vm.prank(advAddr); + INonfungiblePositionManager(NPM_ADDR).mint( + INonfungiblePositionManager.MintParams({ + token0: t0, + token1: t1, + fee: POOL_FEE, + tickLower: tickLower, + tickUpper: tickUpper, + amount0Desired: amount0, + amount1Desired: amount1, + amount0Min: 0, + amount1Min: 0, + recipient: advAddr, + deadline: block.timestamp + 3600 + }) + ); + } else if (_eq(op, "burn_lp")) { + uint256 tokenId = vm.parseJsonUint(line, ".tokenId"); + (,,,,,, , uint128 liquidity,,,,) = INonfungiblePositionManager(NPM_ADDR).positions(tokenId); + if (liquidity == 0) return; + vm.startPrank(advAddr); + INonfungiblePositionManager(NPM_ADDR).decreaseLiquidity( + INonfungiblePositionManager.DecreaseLiquidityParams({ + tokenId: tokenId, + liquidity: liquidity, + amount0Min: 0, + amount1Min: 0, + deadline: block.timestamp + 3600 + }) + ); + INonfungiblePositionManager(NPM_ADDR).collect( + INonfungiblePositionManager.CollectParams({ + tokenId: tokenId, + recipient: advAddr, + amount0Max: type(uint128).max, + amount1Max: type(uint128).max + }) + ); + vm.stopPrank(); + } + // Unknown ops are silently ignored (mirrors AttackRunner behaviour). + } + + // ─── Score computation ──────────────────────────────────────────────────── + + /** + * @notice Compute lm_eth_total = free ETH + free WETH + sum(position ETH values). + * Mirrors AttackRunner._logSnapshot's lm_eth_total calculation. + */ + function _computeLmEthTotal() internal view returns (uint256) { + (uint160 sqrtPriceX96,,,,,,) = pool.slot0(); + + uint256 lmEthFree = lmAddr.balance; + uint256 lmWethFree = IERC20(WETH_ADDR).balanceOf(lmAddr); + + (uint128 fLiq, int24 fLo, int24 fHi) = ILM(lmAddr).positions(0); // FLOOR + (uint128 aLiq, int24 aLo, int24 aHi) = ILM(lmAddr).positions(1); // ANCHOR + (uint128 dLiq, int24 dLo, int24 dHi) = ILM(lmAddr).positions(2); // DISCOVERY + + return lmEthFree + + lmWethFree + + _positionEthValue(sqrtPriceX96, fLo, fHi, fLiq) + + _positionEthValue(sqrtPriceX96, aLo, aHi, aLiq) + + _positionEthValue(sqrtPriceX96, dLo, dHi, dLiq); + } + + /** + * @notice ETH-equivalent value of a Uniswap V3 position at the current price. + * Copied verbatim from AttackRunner._positionEthValue. + */ + function _positionEthValue( + uint160 sqrtPriceX96, + int24 tickLower, + int24 tickUpper, + uint128 liquidity + ) + internal + view + returns (uint256) + { + if (liquidity == 0) return 0; + uint160 sqrtRatioAX96 = TickMath.getSqrtRatioAtTick(tickLower); + uint160 sqrtRatioBX96 = TickMath.getSqrtRatioAtTick(tickUpper); + (uint256 amount0, uint256 amount1) = + LiquidityAmounts.getAmountsForLiquidity(sqrtPriceX96, sqrtRatioAX96, sqrtRatioBX96, liquidity); + + uint256 ethAmount = token0isWeth ? amount0 : amount1; + uint256 krkAmount = token0isWeth ? amount1 : amount0; + + if (krkAmount == 0 || sqrtPriceX96 == 0) return ethAmount; + + uint256 krkInEth; + if (token0isWeth) { + // token0=WETH, token1=KRK: 1 KRK = 2^192 / sqrtP^2 WETH + krkInEth = FullMath.mulDiv(FullMath.mulDiv(krkAmount, 1 << 96, sqrtPriceX96), 1 << 96, sqrtPriceX96); + } else { + // token0=KRK, token1=WETH: 1 KRK = sqrtP^2 / 2^192 WETH + krkInEth = FullMath.mulDiv(FullMath.mulDiv(krkAmount, sqrtPriceX96, 1 << 96), sqrtPriceX96, 1 << 96); + } + + return ethAmount + krkInEth; + } + + // ─── Utilities ──────────────────────────────────────────────────────────── + + function _eq(string memory a, string memory b) internal pure returns (bool) { + return keccak256(bytes(a)) == keccak256(bytes(b)); + } + + function _endsWith(string memory str, string memory suffix) internal pure returns (bool) { + bytes memory bStr = bytes(str); + bytes memory bSuf = bytes(suffix); + if (bStr.length < bSuf.length) return false; + uint256 offset = bStr.length - bSuf.length; + for (uint256 i = 0; i < bSuf.length; i++) { + if (bStr[offset + i] != bSuf[i]) return false; + } + return true; + } + + function _uint2str(uint256 n) internal pure returns (string memory) { + if (n == 0) return "0"; + uint256 temp = n; + uint256 digits; + while (temp != 0) { + digits++; + temp /= 10; + } + bytes memory buffer = new bytes(digits); + while (n != 0) { + digits--; + buffer[digits] = bytes1(uint8(48 + (n % 10))); + n /= 10; + } + return string(buffer); + } +} diff --git a/tools/push3-evolution/evolve.sh b/tools/push3-evolution/evolve.sh index a502fc5..47d9f1a 100755 --- a/tools/push3-evolution/evolve.sh +++ b/tools/push3-evolution/evolve.sh @@ -44,8 +44,15 @@ export PATH="${HOME}/.foundry/bin:${PATH}" SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" FITNESS_SH="$SCRIPT_DIR/fitness.sh" +BATCH_EVAL_SH="$SCRIPT_DIR/revm-evaluator/batch-eval.sh" MUTATE_CLI="$SCRIPT_DIR/mutate-cli.ts" +# EVAL_MODE controls which fitness backend is used: +# anvil (default) — per-candidate Anvil+forge-script pipeline (fitness.sh) +# revm — in-process revm via FitnessEvaluator.t.sol (batch-eval.sh) +# Requires BASE_RPC_URL env var. 10-100× faster at scale. +EVAL_MODE="${EVAL_MODE:-anvil}" + # ============================================================================= # Argument parsing # ============================================================================= @@ -171,6 +178,15 @@ done [ -f "$MUTATE_CLI" ] || fail "mutate-cli.ts not found at $MUTATE_CLI" [ -x "$FITNESS_SH" ] || chmod +x "$FITNESS_SH" +if [ "$EVAL_MODE" = "revm" ]; then + [ -f "$BATCH_EVAL_SH" ] || fail "batch-eval.sh not found at $BATCH_EVAL_SH" + [ -x "$BATCH_EVAL_SH" ] || chmod +x "$BATCH_EVAL_SH" + [ -n "${BASE_RPC_URL:-}" ] || fail "EVAL_MODE=revm requires BASE_RPC_URL env var (Base network RPC)" + command -v forge &>/dev/null || fail "forge not found in PATH (required for EVAL_MODE=revm)" +elif [ "$EVAL_MODE" != "anvil" ]; then + fail "Unknown EVAL_MODE '$EVAL_MODE' — must be 'anvil' or 'revm'" +fi + TSX_CMD="$(find_tsx_cmd)" || fail \ "No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH." @@ -194,6 +210,7 @@ log " Generations: $GENERATIONS" log " Mutation rate: $MUTATION_RATE" log " Output: $OUTPUT_DIR" log " TSX: $TSX_CMD" +log " Eval mode: $EVAL_MODE" log "========================================================" # ============================================================================= @@ -241,6 +258,29 @@ for gen in $(seq 0 $((GENERATIONS - 1))); do SCORE_VALUES="" CAND_COUNT=0 + # In revm mode, batch-score all candidates in one forge test invocation before + # the per-candidate loop. Scores are written to a temp JSONL file that the + # loop reads with a fast Python lookup. + BATCH_SCORES_FILE="$WORK_DIR/batch_scores_gen_${gen}.jsonl" + + if [ "$EVAL_MODE" = "revm" ]; then + declare -a _BATCH_FILES=() + for _CF in "$CURRENT_GEN_DIR"/candidate_*.push3; do + [ -f "$_CF" ] && _BATCH_FILES+=("$_CF") + done + + if [ "${#_BATCH_FILES[@]}" -gt 0 ]; then + BATCH_EC=0 + bash "$BATCH_EVAL_SH" "${_BATCH_FILES[@]}" > "$BATCH_SCORES_FILE" 2>/dev/null \ + || BATCH_EC=$? + + if [ "$BATCH_EC" -eq 2 ]; then + fail "batch-eval.sh reported an infrastructure error (exit 2) — aborting evolution" + fi + log " revm batch scoring complete (exit $BATCH_EC)" + fi + fi + for CAND_FILE in "$CURRENT_GEN_DIR"/candidate_*.push3; do [ -f "$CAND_FILE" ] || continue @@ -255,16 +295,37 @@ for gen in $(seq 0 $((GENERATIONS - 1))); do SCORE=0 FITNESS_EC=0 - SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$? - # Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately. - if [ "$FITNESS_EC" -eq 2 ]; then - fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution" + if [ "$EVAL_MODE" = "revm" ] && [ -f "$BATCH_SCORES_FILE" ]; then + # Look up pre-computed score from batch-eval.sh output. + SCORE=$(python3 - "$CID" "$BATCH_SCORES_FILE" <<'PYEOF' +import json, sys +cid = sys.argv[1] +with open(sys.argv[2]) as f: + for line in f: + try: + d = json.loads(line) + if d.get("candidate_id") == cid: + print(d["fitness"]) + sys.exit(0) + except (json.JSONDecodeError, KeyError): + pass +print(0) +PYEOF +) || SCORE=0 + else + # Anvil mode (or revm fallback): score candidate individually. + SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$? + + # Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately. + if [ "$FITNESS_EC" -eq 2 ]; then + fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution" + fi fi # Validate that score is a non-negative integer; treat any other output as invalid. - if [ "$FITNESS_EC" -ne 0 ] || ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then - log " $CID: invalid candidate (fitness.sh exit $FITNESS_EC), score=0" + if ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then + log " $CID: invalid/missing score, using 0" SCORE=0 else log " $CID: fitness=$SCORE" diff --git a/tools/push3-evolution/revm-evaluator/batch-eval.sh b/tools/push3-evolution/revm-evaluator/batch-eval.sh new file mode 100755 index 0000000..0bec75b --- /dev/null +++ b/tools/push3-evolution/revm-evaluator/batch-eval.sh @@ -0,0 +1,226 @@ +#!/usr/bin/env bash +# ============================================================================= +# batch-eval.sh — revm-based batch fitness evaluator +# +# Replaces the per-candidate Anvil+forge-script pipeline with in-process EVM +# execution via Foundry's native revm backend (FitnessEvaluator.t.sol). +# +# Speedup: compiles each candidate once (unavoidable — different Solidity per +# candidate), then runs ALL attack sequences in a single in-process forge test +# with O(1) memory snapshot/revert instead of RPC calls per attack. +# +# Usage: +# ./tools/push3-evolution/revm-evaluator/batch-eval.sh \ +# [--output-dir /tmp/scores] \ +# candidate0.push3 candidate1.push3 ... +# +# Output (stdout): +# One JSON object per candidate: +# {"candidate_id":"gen0_c000","fitness":123456789} +# +# Exit codes: +# 0 Success. +# 1 Candidate-level error (transpile/compile failed for at least one candidate). +# 2 Infrastructure error (missing tool, BASE_RPC_URL not set, forge test failed). +# +# Environment: +# BASE_RPC_URL Required. Base network RPC endpoint for forking. +# ATTACKS_DIR Optional. Path to *.jsonl attack files. +# (default: /onchain/script/backtesting/attacks) +# OUTPUT_DIR Optional. Directory to copy scores.jsonl into (--output-dir overrides). +# ============================================================================= + +set -euo pipefail + +export PATH="${HOME}/.foundry/bin:${PATH}" + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" +ONCHAIN_DIR="$REPO_ROOT/onchain" +TRANSPILER_DIR="$REPO_ROOT/tools/push3-transpiler" +TRANSPILER_OUT="$ONCHAIN_DIR/src/OptimizerV3Push3.sol" +ARTIFACT_PATH="$ONCHAIN_DIR/out/OptimizerV3Push3.sol/OptimizerV3Push3.json" +DEFAULT_ATTACKS_DIR="$ONCHAIN_DIR/script/backtesting/attacks" + +# ============================================================================= +# Argument parsing +# ============================================================================= + +OUTPUT_DIR="${OUTPUT_DIR:-}" + +declare -a PUSH3_FILES=() + +while [[ $# -gt 0 ]]; do + case $1 in + --output-dir) OUTPUT_DIR="$2"; shift 2 ;; + --*) echo "Unknown option: $1" >&2; exit 2 ;; + *) PUSH3_FILES+=("$1"); shift ;; + esac +done + +if [ "${#PUSH3_FILES[@]}" -eq 0 ]; then + echo "Usage: $0 [--output-dir DIR] candidate1.push3 ..." >&2 + exit 2 +fi + +# ============================================================================= +# Environment checks +# ============================================================================= + +BASE_RPC_URL="${BASE_RPC_URL:-}" +if [ -z "$BASE_RPC_URL" ]; then + echo " [batch-eval] ERROR: BASE_RPC_URL env var required for Base network fork" >&2 + exit 2 +fi + +for _tool in forge node python3; do + command -v "$_tool" &>/dev/null || { echo " [batch-eval] ERROR: $_tool not found in PATH" >&2; exit 2; } +done + +# ============================================================================= +# Helpers +# ============================================================================= + +log() { echo " [batch-eval] $*" >&2; } +fail2() { echo " [batch-eval] ERROR: $*" >&2; exit 2; } + +# ============================================================================= +# Step 1 — Ensure transpiler dependencies are installed +# ============================================================================= + +if [ ! -d "$TRANSPILER_DIR/node_modules" ]; then + log "Installing transpiler dependencies…" + (cd "$TRANSPILER_DIR" && npm install --silent) || fail2 "npm install in push3-transpiler failed" +fi + +# ============================================================================= +# Step 2 — Transpile + compile each candidate, extract bytecodes into manifest +# ============================================================================= + +MANIFEST_DIR="$(mktemp -d)" +IDS_FILE="$MANIFEST_DIR/ids.txt" +BYTECODES_FILE="$MANIFEST_DIR/bytecodes.txt" + +: > "$IDS_FILE" +: > "$BYTECODES_FILE" + +COMPILED_COUNT=0 +FAILED_IDS="" + +for PUSH3_FILE in "${PUSH3_FILES[@]}"; do + PUSH3_FILE="$(cd "$(dirname "$PUSH3_FILE")" && pwd)/$(basename "$PUSH3_FILE")" + CANDIDATE_ID="$(basename "$PUSH3_FILE" .push3)" + + # Transpile Push3 → OptimizerV3Push3.sol + TRANSPILE_EC=0 + ( + cd "$TRANSPILER_DIR" + npx ts-node src/index.ts "$PUSH3_FILE" "$TRANSPILER_OUT" + ) >/dev/null 2>&1 || TRANSPILE_EC=$? + + if [ "$TRANSPILE_EC" -ne 0 ]; then + log "WARNING: transpile failed for $CANDIDATE_ID (exit $TRANSPILE_EC) — skipping" + FAILED_IDS="$FAILED_IDS $CANDIDATE_ID" + continue + fi + + # Compile (forge's incremental build skips unchanged files quickly) + FORGE_EC=0 + (cd "$ONCHAIN_DIR" && forge build --silent) >/dev/null 2>&1 || FORGE_EC=$? + + if [ "$FORGE_EC" -ne 0 ]; then + log "WARNING: forge build failed for $CANDIDATE_ID (exit $FORGE_EC) — skipping" + FAILED_IDS="$FAILED_IDS $CANDIDATE_ID" + continue + fi + + # Extract bytecode from artifact (strip leading 0x if present) + BYTECODE_HEX="$(python3 - "$ARTIFACT_PATH" <<'PYEOF' +import json, sys +with open(sys.argv[1]) as f: + d = json.load(f) +bytecode = d["bytecode"]["object"] +# Ensure 0x prefix +if not bytecode.startswith("0x"): + bytecode = "0x" + bytecode +print(bytecode) +PYEOF +)" || { log "WARNING: failed to extract bytecode for $CANDIDATE_ID — skipping"; FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"; continue; } + + if [ -z "$BYTECODE_HEX" ] || [ "$BYTECODE_HEX" = "0x" ]; then + log "WARNING: empty bytecode for $CANDIDATE_ID — skipping" + FAILED_IDS="$FAILED_IDS $CANDIDATE_ID" + continue + fi + + printf '%s\n' "$CANDIDATE_ID" >> "$IDS_FILE" + printf '%s\n' "$BYTECODE_HEX" >> "$BYTECODES_FILE" + COMPILED_COUNT=$((COMPILED_COUNT + 1)) + log "Compiled $CANDIDATE_ID" +done + +if [ "$COMPILED_COUNT" -eq 0 ]; then + fail2 "No candidates compiled successfully — aborting" +fi + +log "Compiled $COMPILED_COUNT / ${#PUSH3_FILES[@]} candidates" + +# ============================================================================= +# Step 3 — Run FitnessEvaluator.t.sol (in-process revm, all candidates at once) +# ============================================================================= + +ATTACKS_DIR="${ATTACKS_DIR:-$DEFAULT_ATTACKS_DIR}" + +log "Running FitnessEvaluator.t.sol (in-process revm, fork: $BASE_RPC_URL)…" + +FORGE_TEST_EC=0 +FORGE_OUTPUT="$( + cd "$ONCHAIN_DIR" + BASE_RPC_URL="$BASE_RPC_URL" \ + FITNESS_MANIFEST_DIR="$MANIFEST_DIR" \ + ATTACKS_DIR="$ATTACKS_DIR" \ + forge test \ + --match-contract FitnessEvaluator \ + --match-test testBatchEvaluate \ + -vv \ + --no-match-path "NOT_A_REAL_PATH" \ + 2>&1 +)" || FORGE_TEST_EC=$? + +if [ "$FORGE_TEST_EC" -ne 0 ]; then + # Surface forge output on failure for diagnosis + printf '%s\n' "$FORGE_OUTPUT" >&2 + fail2 "forge test failed (exit $FORGE_TEST_EC)" +fi + +# ============================================================================= +# Step 4 — Extract and emit score JSON lines +# +# forge test -vv wraps console.log output with leading spaces and a "Logs:" header. +# We grep for lines containing the score JSON pattern and strip the indentation. +# ============================================================================= + +SCORES_JSONL="$(printf '%s\n' "$FORGE_OUTPUT" | grep -E '"candidate_id"' | sed 's/^[[:space:]]*//' || true)" + +if [ -z "$SCORES_JSONL" ]; then + printf '%s\n' "$FORGE_OUTPUT" >&2 + fail2 "No score lines found in forge test output" +fi + +# Emit scores to stdout +printf '%s\n' "$SCORES_JSONL" + +# Optionally write to output directory +if [ -n "$OUTPUT_DIR" ]; then + mkdir -p "$OUTPUT_DIR" + printf '%s\n' "$SCORES_JSONL" > "$OUTPUT_DIR/scores.jsonl" + log "Scores written to $OUTPUT_DIR/scores.jsonl" +fi + +# Warn about any candidates that were skipped (compile failures) +if [ -n "$FAILED_IDS" ]; then + log "WARNING: the following candidates were skipped (compile failed): $FAILED_IDS" + exit 1 +fi + +log "Done — scored $COMPILED_COUNT candidates" From 9c4294790327372aaa9995c73e476863130bcdb6 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 12 Mar 2026 12:31:11 +0000 Subject: [PATCH 2/2] fix: address review findings in FitnessEvaluator (#604) - Wrap upgradeTo() in try/catch: malformed candidate bytecode no longer aborts the entire batch; emit {"fitness":0,"error":"upgrade_failed"} and continue to the next candidate - Bootstrap recenter: require() after 5 retry attempts so silent failure (all scores identically equal to free WETH only) is surfaced as a hard test failure rather than silently producing meaningless results - mint_lp: capture the NPM tokenId returned by mint() and push it to _mintedNpmTokenIds; burn_lp now uses a 1-based index into that array (same pattern as stake/unstake), making attack files fork-block-independent - Remove dead atkBaseSnap variable and its compiler-warning suppression - Remove orphaned vm.snapshot() after vm.revertTo() in the attack loop - Fix misleading comment on delete _stakedPositionIds Co-Authored-By: Claude Sonnet 4.6 --- onchain/test/FitnessEvaluator.t.sol | 70 ++++++++++++++++++----------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/onchain/test/FitnessEvaluator.t.sol b/onchain/test/FitnessEvaluator.t.sol index 379bf68..5eb2699 100644 --- a/onchain/test/FitnessEvaluator.t.sol +++ b/onchain/test/FitnessEvaluator.t.sol @@ -169,6 +169,11 @@ contract FitnessEvaluator is Test { /// vm.snapshot/revertTo reverts this array's storage between attacks. uint256[] internal _stakedPositionIds; + /// @dev NPM tokenIds returned by mint_lp ops (in insertion order). + /// burn_lp references positions by 1-based index into this array so that + /// attack files are fork-block-independent (tokenIds vary by fork tip). + uint256[] internal _mintedNpmTokenIds; + // ─── Entry point ───────────────────────────────────────────────────────── /** @@ -217,17 +222,24 @@ contract FitnessEvaluator is Test { baseSnap = vm.snapshot(); // Etch candidate optimizer bytecode and upgrade proxy. + // Wrapped in try/catch: a malformed candidate (compiler bug, bad transpiler output) + // would otherwise abort the entire batch. On failure, emit fitness=0 and continue; + // vm.revertTo(baseSnap) at the top of the next iteration cleans up state. bytes memory candidateBytecode = vm.parseBytes(bytecodeHex); vm.etch(IMPL_SLOT, candidateBytecode); - // Test contract is the UUPS admin (set during initialize in _deploy). - UUPSUpgradeable(optProxy).upgradeTo(IMPL_SLOT); + bool upgradeOk = true; + try UUPSUpgradeable(optProxy).upgradeTo(IMPL_SLOT) { } + catch { + upgradeOk = false; + } + if (!upgradeOk) { + console.log(string.concat('{"candidate_id":"', candidateId, '","fitness":0,"error":"upgrade_failed"}')); + continue; + } // Bootstrap: fund LM, set recenterAccess, initial recenter. _bootstrap(); - // Post-bootstrap snapshot used to reset state between attacks. - uint256 atkBaseSnap = vm.snapshot(); - // Score: sum lm_eth_total across all attack sequences. uint256 totalFitness = 0; Vm.DirEntry[] memory entries = vm.readDir(attacksDir); @@ -238,15 +250,10 @@ contract FitnessEvaluator is Test { uint256 score = _runAttack(entries[i].path); totalFitness += score; vm.revertTo(atkSnap); - atkSnap = vm.snapshot(); // refresh for next attack } // Emit score as a JSON line (parsed by batch-eval.sh). console.log(string.concat('{"candidate_id":"', candidateId, '","fitness":', _uint2str(totalFitness), "}")); - - // Restore atkBaseSnap snapshot variable (already consumed above) — not needed - // since we always revert to baseSnap at the top of the loop. - (atkBaseSnap); // suppress unused variable warning } // Close manifest files. @@ -340,18 +347,18 @@ contract FitnessEvaluator is Test { // e. Initial recenter: no ANCHOR position exists yet so amplitude check is skipped; // recenterAccess is set so TWAP stability check is also skipped. - vm.prank(recenterAddr); - try ILM(lmAddr).recenter() { } - catch { - // If recenter fails on first attempt (e.g. pool not ready), mine some blocks and retry. - for (uint256 _attempt = 0; _attempt < 4; _attempt++) { - vm.roll(block.number + 50); - vm.prank(recenterAddr); - try ILM(lmAddr).recenter() returns (bool) { - break; - } catch { } - } + // If all retries fail, revert with a clear message — silent failure would make every + // candidate score identically (all lm_eth_total = free WETH only, no positions). + bool recentered = false; + for (uint256 _attempt = 0; _attempt < 5; _attempt++) { + if (_attempt > 0) vm.roll(block.number + 50); + vm.prank(recenterAddr); + try ILM(lmAddr).recenter() returns (bool) { + recentered = true; + break; + } catch { } } + require(recentered, "FitnessEvaluator: bootstrap recenter failed after 5 attempts"); } // ─── Attack execution ───────────────────────────────────────────────────── @@ -363,9 +370,11 @@ contract FitnessEvaluator is Test { function _runAttack(string memory attackFile) internal returns (uint256) { // Reset file read position so each call to _runAttack starts from line 1. vm.closeFile(attackFile); - // Clear any staked positions from a prior attack (snapshot revert handles on-chain state, - // but the dynamic array in test storage also needs resetting). + // vm.revertTo() reverts all EVM state including test contract storage, so these + // arrays are already empty after revert. Explicit delete is a defensive reset + // for the first attack (no preceding revert) and any future call-path changes. delete _stakedPositionIds; + delete _mintedNpmTokenIds; string memory line = vm.readLine(attackFile); while (bytes(line).length > 0) { @@ -439,7 +448,9 @@ contract FitnessEvaluator is Test { uint256 amount1 = vm.parseUint(vm.parseJsonString(line, ".amount1")); (address t0, address t1) = token0isWeth ? (WETH_ADDR, krkAddr) : (krkAddr, WETH_ADDR); vm.prank(advAddr); - INonfungiblePositionManager(NPM_ADDR).mint( + // Track the returned tokenId so burn_lp can reference it by 1-based index, + // making attack files fork-block-independent (NPM tokenIds depend on fork tip). + (uint256 mintedTokenId,,,) = INonfungiblePositionManager(NPM_ADDR).mint( INonfungiblePositionManager.MintParams({ token0: t0, token1: t1, @@ -454,8 +465,17 @@ contract FitnessEvaluator is Test { deadline: block.timestamp + 3600 }) ); + _mintedNpmTokenIds.push(mintedTokenId); } else if (_eq(op, "burn_lp")) { - uint256 tokenId = vm.parseJsonUint(line, ".tokenId"); + // .tokenId in the attack file is a 1-based index into _mintedNpmTokenIds + // (positions created by mint_lp ops in this run), not a raw NPM tokenId. + // This mirrors the stake/unstake index pattern and avoids fork-block sensitivity. + uint256 tokenIndex = vm.parseJsonUint(line, ".tokenId"); + require( + tokenIndex >= 1 && tokenIndex <= _mintedNpmTokenIds.length, + "FitnessEvaluator: burn_lp tokenId out of range (must be 1-based index of a prior mint_lp op)" + ); + uint256 tokenId = _mintedNpmTokenIds[tokenIndex - 1]; (,,,,,, , uint128 liquidity,,,,) = INonfungiblePositionManager(NPM_ADDR).positions(tokenId); if (liquidity == 0) return; vm.startPrank(advAddr);