Merge pull request 'fix: feat: revm-based fitness evaluator for evolution at scale (#604)' (#613) from fix/issue-604 into master
This commit is contained in:
commit
685a0e488e
3 changed files with 890 additions and 6 deletions
597
onchain/test/FitnessEvaluator.t.sol
Normal file
597
onchain/test/FitnessEvaluator.t.sol
Normal file
|
|
@ -0,0 +1,597 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
pragma solidity ^0.8.19;
|
||||
|
||||
/**
|
||||
* @title FitnessEvaluator
|
||||
* @notice In-process (revm) batch fitness evaluator for Push3 evolution.
|
||||
*
|
||||
* Replaces the Anvil+forge-script pipeline with in-process EVM execution.
|
||||
* Uses Foundry's native revm backend: vm.snapshot/revertTo are memory operations
|
||||
* with no JSON-RPC overhead, giving 100-1000x speedup over per-candidate Anvil.
|
||||
*
|
||||
* Architecture:
|
||||
* batch-eval.sh compiles each candidate (Push3→Solidity→bytecode) and writes a
|
||||
* two-file manifest (ids.txt + bytecodes.txt). This test reads the manifest,
|
||||
* forks Base mainnet once, deploys the full KRAIKEN stack once, then for each
|
||||
* candidate:
|
||||
* 1. snapshot → etch candidate bytecode → UUPS upgrade proxy → bootstrap
|
||||
* 2. For each attack: snapshot → execute → accumulate lm_eth_total → revert
|
||||
* 3. Emit JSON score line
|
||||
* 4. Revert to pre-bootstrap snapshot
|
||||
*
|
||||
* Required env vars:
|
||||
* BASE_RPC_URL Base network RPC endpoint (for fork)
|
||||
* FITNESS_MANIFEST_DIR Directory containing ids.txt and bytecodes.txt
|
||||
*
|
||||
* Optional env vars:
|
||||
* ATTACKS_DIR Path to *.jsonl attack files (default: script/backtesting/attacks)
|
||||
*
|
||||
* Run:
|
||||
* BASE_RPC_URL=https://mainnet.base.org \
|
||||
* FITNESS_MANIFEST_DIR=/tmp/manifest \
|
||||
* forge test --match-contract FitnessEvaluator --match-test testBatchEvaluate -vv
|
||||
*/
|
||||
|
||||
import "forge-std/Test.sol";
|
||||
import { Kraiken } from "../src/Kraiken.sol";
|
||||
import { Stake } from "../src/Stake.sol";
|
||||
import { Optimizer } from "../src/Optimizer.sol";
|
||||
import { LiquidityManager } from "../src/LiquidityManager.sol";
|
||||
import { ERC1967Proxy } from "@openzeppelin/proxy/ERC1967/ERC1967Proxy.sol";
|
||||
import { UUPSUpgradeable } from "@openzeppelin/proxy/utils/UUPSUpgradeable.sol";
|
||||
import { IERC20 } from "@openzeppelin/token/ERC20/IERC20.sol";
|
||||
import { IUniswapV3Factory } from "@uniswap-v3-core/interfaces/IUniswapV3Factory.sol";
|
||||
import { IUniswapV3Pool } from "@uniswap-v3-core/interfaces/IUniswapV3Pool.sol";
|
||||
import { FullMath } from "@aperture/uni-v3-lib/FullMath.sol";
|
||||
import { LiquidityAmounts } from "@aperture/uni-v3-lib/LiquidityAmounts.sol";
|
||||
import { TickMath } from "@aperture/uni-v3-lib/TickMath.sol";
|
||||
import { UniswapHelpers } from "../src/helpers/UniswapHelpers.sol";
|
||||
import { IWETH9 } from "../src/interfaces/IWETH9.sol";
|
||||
|
||||
// ─── External interfaces (mirrors AttackRunner.s.sol) ─────────────────────────
|
||||
|
||||
interface ISwapRouter02 {
|
||||
struct ExactInputSingleParams {
|
||||
address tokenIn;
|
||||
address tokenOut;
|
||||
uint24 fee;
|
||||
address recipient;
|
||||
uint256 amountIn;
|
||||
uint256 amountOutMinimum;
|
||||
uint160 sqrtPriceLimitX96;
|
||||
}
|
||||
|
||||
function exactInputSingle(ExactInputSingleParams calldata params) external returns (uint256);
|
||||
}
|
||||
|
||||
interface ILM {
|
||||
function getVWAP() external view returns (uint256);
|
||||
function positions(uint8 stage) external view returns (uint128 liquidity, int24 tickLower, int24 tickUpper);
|
||||
function recenter() external returns (bool);
|
||||
}
|
||||
|
||||
interface IStake {
|
||||
function snatch(uint256 assets, address receiver, uint32 taxRate, uint256[] calldata positionsToSnatch)
|
||||
external
|
||||
returns (uint256 positionId);
|
||||
function exitPosition(uint256 positionId) external;
|
||||
}
|
||||
|
||||
interface INonfungiblePositionManager {
|
||||
struct MintParams {
|
||||
address token0;
|
||||
address token1;
|
||||
uint24 fee;
|
||||
int24 tickLower;
|
||||
int24 tickUpper;
|
||||
uint256 amount0Desired;
|
||||
uint256 amount1Desired;
|
||||
uint256 amount0Min;
|
||||
uint256 amount1Min;
|
||||
address recipient;
|
||||
uint256 deadline;
|
||||
}
|
||||
|
||||
struct DecreaseLiquidityParams {
|
||||
uint256 tokenId;
|
||||
uint128 liquidity;
|
||||
uint256 amount0Min;
|
||||
uint256 amount1Min;
|
||||
uint256 deadline;
|
||||
}
|
||||
|
||||
struct CollectParams {
|
||||
uint256 tokenId;
|
||||
address recipient;
|
||||
uint128 amount0Max;
|
||||
uint128 amount1Max;
|
||||
}
|
||||
|
||||
function mint(MintParams calldata params) external payable returns (uint256 tokenId, uint128 liquidity, uint256 amount0, uint256 amount1);
|
||||
function positions(uint256 tokenId)
|
||||
external
|
||||
view
|
||||
returns (
|
||||
uint96 nonce,
|
||||
address operator,
|
||||
address token0,
|
||||
address token1,
|
||||
uint24 fee,
|
||||
int24 tickLower,
|
||||
int24 tickUpper,
|
||||
uint128 liquidity,
|
||||
uint256 feeGrowthInside0LastX128,
|
||||
uint256 feeGrowthInside1LastX128,
|
||||
uint128 tokensOwed0,
|
||||
uint128 tokensOwed1
|
||||
);
|
||||
function decreaseLiquidity(DecreaseLiquidityParams calldata params) external payable returns (uint256 amount0, uint256 amount1);
|
||||
function collect(CollectParams calldata params) external payable returns (uint256 amount0, uint256 amount1);
|
||||
}
|
||||
|
||||
// ─── Main test contract ────────────────────────────────────────────────────────
|
||||
|
||||
contract FitnessEvaluator is Test {
|
||||
using UniswapHelpers for IUniswapV3Pool;
|
||||
|
||||
// ─── Base network constants ───────────────────────────────────────────────
|
||||
|
||||
uint24 internal constant POOL_FEE = 10_000;
|
||||
address internal constant WETH_ADDR = 0x4200000000000000000000000000000000000006;
|
||||
address internal constant SWAP_ROUTER = 0x94cC0AaC535CCDB3C01d6787D6413C739ae12bc4;
|
||||
address internal constant NPM_ADDR = 0x27F971cb582BF9E50F397e4d29a5C7A34f11faA2;
|
||||
address internal constant V3_FACTORY = 0x4752ba5DBc23f44D87826276BF6Fd6b1C372aD24;
|
||||
address internal constant FEE_DEST = 0xf6a3eef9088A255c32b6aD2025f83E57291D9011;
|
||||
|
||||
/// @dev Fixed address used with vm.etch to inject candidate bytecode.
|
||||
/// Chosen to be deterministic and not collide with real Base addresses.
|
||||
address internal constant IMPL_SLOT = address(uint160(uint256(keccak256("fitness.impl.slot"))));
|
||||
|
||||
// ─── Anvil test accounts (deterministic mnemonic) ────────────────────────
|
||||
|
||||
/// @dev Account 8 — adversary (10 000 ETH in Anvil; funded via vm.deal here)
|
||||
uint256 internal constant ADV_PK = 0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97;
|
||||
/// @dev Account 2 — recenter caller (granted recenterAccess in bootstrap)
|
||||
uint256 internal constant RECENTER_PK = 0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a;
|
||||
|
||||
// ─── Runtime state ────────────────────────────────────────────────────────
|
||||
|
||||
address internal lmAddr;
|
||||
address internal krkAddr;
|
||||
address internal stakeAddr;
|
||||
address internal optProxy;
|
||||
address internal advAddr;
|
||||
address internal recenterAddr;
|
||||
IUniswapV3Pool internal pool;
|
||||
bool internal token0isWeth;
|
||||
|
||||
/// @dev Mirrors AttackRunner._stakedPositionIds: position IDs returned by stake ops.
|
||||
/// vm.snapshot/revertTo reverts this array's storage between attacks.
|
||||
uint256[] internal _stakedPositionIds;
|
||||
|
||||
/// @dev NPM tokenIds returned by mint_lp ops (in insertion order).
|
||||
/// burn_lp references positions by 1-based index into this array so that
|
||||
/// attack files are fork-block-independent (tokenIds vary by fork tip).
|
||||
uint256[] internal _mintedNpmTokenIds;
|
||||
|
||||
// ─── Entry point ─────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* @notice Batch fitness evaluator: score all candidates in the manifest.
|
||||
*
|
||||
* Reads FITNESS_MANIFEST_DIR/{ids.txt,bytecodes.txt} line-by-line.
|
||||
* Outputs one JSON line per candidate to stdout:
|
||||
* {"candidate_id":"gen0_c000","fitness":1234567890}
|
||||
*
|
||||
* Skipped (with a pass) if BASE_RPC_URL is not set, so CI without a Base
|
||||
* RPC key does not fail the test suite.
|
||||
*/
|
||||
function testBatchEvaluate() public {
|
||||
string memory rpcUrl = vm.envOr("BASE_RPC_URL", string(""));
|
||||
vm.skip(bytes(rpcUrl).length == 0);
|
||||
|
||||
string memory manifestDir = vm.envOr("FITNESS_MANIFEST_DIR", string(""));
|
||||
require(bytes(manifestDir).length > 0, "FITNESS_MANIFEST_DIR env var required");
|
||||
|
||||
string memory attacksDir = vm.envOr("ATTACKS_DIR", string("script/backtesting/attacks"));
|
||||
|
||||
// Fork Base mainnet so Uniswap V3, WETH, etc. exist at canonical addresses.
|
||||
vm.createSelectFork(rpcUrl);
|
||||
|
||||
advAddr = vm.addr(ADV_PK);
|
||||
recenterAddr = vm.addr(RECENTER_PK);
|
||||
|
||||
// Deploy the full KRAIKEN stack once on the fork.
|
||||
_deploy();
|
||||
|
||||
// Snapshot after deployment (pre-bootstrap, pre-candidate-specific state).
|
||||
uint256 baseSnap = vm.snapshot();
|
||||
|
||||
// Discover attack files (sorted alphabetically by path).
|
||||
string memory idsFile = string.concat(manifestDir, "/ids.txt");
|
||||
string memory bytecodesFile = string.concat(manifestDir, "/bytecodes.txt");
|
||||
|
||||
// Process candidates one at a time.
|
||||
while (true) {
|
||||
string memory candidateId = vm.readLine(idsFile);
|
||||
string memory bytecodeHex = vm.readLine(bytecodesFile);
|
||||
if (bytes(candidateId).length == 0) break;
|
||||
|
||||
// Revert to clean post-deploy state for each candidate.
|
||||
vm.revertTo(baseSnap);
|
||||
baseSnap = vm.snapshot();
|
||||
|
||||
// Etch candidate optimizer bytecode and upgrade proxy.
|
||||
// Wrapped in try/catch: a malformed candidate (compiler bug, bad transpiler output)
|
||||
// would otherwise abort the entire batch. On failure, emit fitness=0 and continue;
|
||||
// vm.revertTo(baseSnap) at the top of the next iteration cleans up state.
|
||||
bytes memory candidateBytecode = vm.parseBytes(bytecodeHex);
|
||||
vm.etch(IMPL_SLOT, candidateBytecode);
|
||||
bool upgradeOk = true;
|
||||
try UUPSUpgradeable(optProxy).upgradeTo(IMPL_SLOT) { }
|
||||
catch {
|
||||
upgradeOk = false;
|
||||
}
|
||||
if (!upgradeOk) {
|
||||
console.log(string.concat('{"candidate_id":"', candidateId, '","fitness":0,"error":"upgrade_failed"}'));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Bootstrap: fund LM, set recenterAccess, initial recenter.
|
||||
_bootstrap();
|
||||
|
||||
// Score: sum lm_eth_total across all attack sequences.
|
||||
uint256 totalFitness = 0;
|
||||
Vm.DirEntry[] memory entries = vm.readDir(attacksDir);
|
||||
for (uint256 i = 0; i < entries.length; i++) {
|
||||
if (entries[i].isDir || !_endsWith(entries[i].path, ".jsonl")) continue;
|
||||
|
||||
uint256 atkSnap = vm.snapshot();
|
||||
uint256 score = _runAttack(entries[i].path);
|
||||
totalFitness += score;
|
||||
vm.revertTo(atkSnap);
|
||||
}
|
||||
|
||||
// Emit score as a JSON line (parsed by batch-eval.sh).
|
||||
console.log(string.concat('{"candidate_id":"', candidateId, '","fitness":', _uint2str(totalFitness), "}"));
|
||||
}
|
||||
|
||||
// Close manifest files.
|
||||
vm.closeFile(idsFile);
|
||||
vm.closeFile(bytecodesFile);
|
||||
}
|
||||
|
||||
// ─── Deployment ───────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* @notice Deploy the full KRAIKEN stack (mirrors DeployLocal.sol).
|
||||
* @dev All contracts are deployed as address(this) (the test contract),
|
||||
* which becomes the UUPS admin for the Optimizer proxy.
|
||||
*/
|
||||
function _deploy() internal {
|
||||
// Deploy Kraiken token.
|
||||
Kraiken kraiken = new Kraiken("Kraiken", "KRK");
|
||||
krkAddr = address(kraiken);
|
||||
token0isWeth = WETH_ADDR < krkAddr;
|
||||
|
||||
// Deploy Stake.
|
||||
Stake stake = new Stake(krkAddr, FEE_DEST);
|
||||
stakeAddr = address(stake);
|
||||
kraiken.setStakingPool(stakeAddr);
|
||||
|
||||
// Get or create Uniswap V3 pool.
|
||||
IUniswapV3Factory factory = IUniswapV3Factory(V3_FACTORY);
|
||||
address poolAddr = factory.getPool(WETH_ADDR, krkAddr, POOL_FEE);
|
||||
if (poolAddr == address(0)) {
|
||||
poolAddr = factory.createPool(WETH_ADDR, krkAddr, POOL_FEE);
|
||||
}
|
||||
pool = IUniswapV3Pool(poolAddr);
|
||||
|
||||
// Initialize pool at 1-cent price if not already initialized.
|
||||
(uint160 sqrtPriceX96,,,,,,) = pool.slot0();
|
||||
if (sqrtPriceX96 == 0) {
|
||||
pool.initializePoolFor1Cent(token0isWeth);
|
||||
}
|
||||
|
||||
// Deploy Optimizer implementation + UUPS proxy.
|
||||
// address(this) (test contract) becomes the UUPS admin via initialize.
|
||||
Optimizer optimizerImpl = new Optimizer();
|
||||
bytes memory initData = abi.encodeWithSignature("initialize(address,address)", krkAddr, stakeAddr);
|
||||
ERC1967Proxy proxy = new ERC1967Proxy(address(optimizerImpl), initData);
|
||||
optProxy = address(proxy);
|
||||
|
||||
// Deploy LiquidityManager.
|
||||
LiquidityManager lm = new LiquidityManager(V3_FACTORY, WETH_ADDR, krkAddr, optProxy);
|
||||
lmAddr = address(lm);
|
||||
|
||||
// Wire contracts together.
|
||||
lm.setFeeDestination(FEE_DEST);
|
||||
kraiken.setLiquidityManager(lmAddr);
|
||||
}
|
||||
|
||||
// ─── Bootstrap ────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* @notice Bootstrap LM state for a candidate evaluation (mirrors fitness.sh bootstrap).
|
||||
*
|
||||
* Steps (same order as fitness.sh):
|
||||
* a. Grant recenterAccess to recenterAddr (impersonate feeDestination).
|
||||
* b. Fund adversary account and wrap ETH → WETH.
|
||||
* c. Transfer 1000 WETH to LM.
|
||||
* d. Wrap 9000 WETH for adversary trades + set approvals.
|
||||
* e. Initial recenter (succeeds immediately: recenterAccess set, no ANCHOR liquidity yet).
|
||||
*/
|
||||
function _bootstrap() internal {
|
||||
// a. Grant recenterAccess (feeDestination call, no ETH needed with gas_price=0).
|
||||
vm.prank(FEE_DEST);
|
||||
LiquidityManager(payable(lmAddr)).setRecenterAccess(recenterAddr);
|
||||
|
||||
// b. Fund adversary with ETH.
|
||||
vm.deal(advAddr, 10_000 ether);
|
||||
|
||||
// c. Wrap 1000 ETH → WETH and send to LM.
|
||||
vm.startPrank(advAddr);
|
||||
IWETH9(WETH_ADDR).deposit{ value: 1_000 ether }();
|
||||
IERC20(WETH_ADDR).transfer(lmAddr, 1_000 ether);
|
||||
vm.stopPrank();
|
||||
|
||||
// d. Wrap remaining 9000 ETH for trade operations + set approvals.
|
||||
vm.startPrank(advAddr);
|
||||
IWETH9(WETH_ADDR).deposit{ value: 9_000 ether }();
|
||||
IERC20(WETH_ADDR).approve(SWAP_ROUTER, type(uint256).max);
|
||||
IERC20(WETH_ADDR).approve(NPM_ADDR, type(uint256).max);
|
||||
IERC20(krkAddr).approve(SWAP_ROUTER, type(uint256).max);
|
||||
IERC20(krkAddr).approve(stakeAddr, type(uint256).max);
|
||||
IERC20(krkAddr).approve(NPM_ADDR, type(uint256).max);
|
||||
vm.stopPrank();
|
||||
|
||||
// e. Initial recenter: no ANCHOR position exists yet so amplitude check is skipped;
|
||||
// recenterAccess is set so TWAP stability check is also skipped.
|
||||
// If all retries fail, revert with a clear message — silent failure would make every
|
||||
// candidate score identically (all lm_eth_total = free WETH only, no positions).
|
||||
bool recentered = false;
|
||||
for (uint256 _attempt = 0; _attempt < 5; _attempt++) {
|
||||
if (_attempt > 0) vm.roll(block.number + 50);
|
||||
vm.prank(recenterAddr);
|
||||
try ILM(lmAddr).recenter() returns (bool) {
|
||||
recentered = true;
|
||||
break;
|
||||
} catch { }
|
||||
}
|
||||
require(recentered, "FitnessEvaluator: bootstrap recenter failed after 5 attempts");
|
||||
}
|
||||
|
||||
// ─── Attack execution ─────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* @notice Execute one attack sequence and return the final lm_eth_total.
|
||||
* @param attackFile Path to the *.jsonl attack file.
|
||||
*/
|
||||
function _runAttack(string memory attackFile) internal returns (uint256) {
|
||||
// Reset file read position so each call to _runAttack starts from line 1.
|
||||
vm.closeFile(attackFile);
|
||||
// vm.revertTo() reverts all EVM state including test contract storage, so these
|
||||
// arrays are already empty after revert. Explicit delete is a defensive reset
|
||||
// for the first attack (no preceding revert) and any future call-path changes.
|
||||
delete _stakedPositionIds;
|
||||
delete _mintedNpmTokenIds;
|
||||
|
||||
string memory line = vm.readLine(attackFile);
|
||||
while (bytes(line).length > 0) {
|
||||
_executeOp(line);
|
||||
line = vm.readLine(attackFile);
|
||||
}
|
||||
|
||||
return _computeLmEthTotal();
|
||||
}
|
||||
|
||||
/**
|
||||
* @notice Execute a single attack operation (mirrors AttackRunner._execute).
|
||||
*/
|
||||
function _executeOp(string memory line) internal {
|
||||
string memory op = vm.parseJsonString(line, ".op");
|
||||
|
||||
if (_eq(op, "buy")) {
|
||||
uint256 amount = vm.parseUint(vm.parseJsonString(line, ".amount"));
|
||||
vm.prank(advAddr);
|
||||
ISwapRouter02(SWAP_ROUTER).exactInputSingle(
|
||||
ISwapRouter02.ExactInputSingleParams({
|
||||
tokenIn: WETH_ADDR,
|
||||
tokenOut: krkAddr,
|
||||
fee: POOL_FEE,
|
||||
recipient: advAddr,
|
||||
amountIn: amount,
|
||||
amountOutMinimum: 0,
|
||||
sqrtPriceLimitX96: 0
|
||||
})
|
||||
);
|
||||
} else if (_eq(op, "sell")) {
|
||||
string memory amtStr = vm.parseJsonString(line, ".amount");
|
||||
uint256 amount = _eq(amtStr, "all") ? IERC20(krkAddr).balanceOf(advAddr) : vm.parseUint(amtStr);
|
||||
if (amount == 0) return;
|
||||
vm.prank(advAddr);
|
||||
ISwapRouter02(SWAP_ROUTER).exactInputSingle(
|
||||
ISwapRouter02.ExactInputSingleParams({
|
||||
tokenIn: krkAddr,
|
||||
tokenOut: WETH_ADDR,
|
||||
fee: POOL_FEE,
|
||||
recipient: advAddr,
|
||||
amountIn: amount,
|
||||
amountOutMinimum: 0,
|
||||
sqrtPriceLimitX96: 0
|
||||
})
|
||||
);
|
||||
} else if (_eq(op, "recenter")) {
|
||||
vm.prank(recenterAddr);
|
||||
try ILM(lmAddr).recenter() { } catch { }
|
||||
} else if (_eq(op, "stake")) {
|
||||
uint256 amount = vm.parseUint(vm.parseJsonString(line, ".amount"));
|
||||
uint32 taxRate = uint32(vm.parseJsonUint(line, ".taxRateIndex"));
|
||||
vm.prank(advAddr);
|
||||
uint256 posId = IStake(stakeAddr).snatch(amount, advAddr, taxRate, new uint256[](0));
|
||||
_stakedPositionIds.push(posId);
|
||||
} else if (_eq(op, "unstake")) {
|
||||
uint256 posIndex = vm.parseJsonUint(line, ".positionId");
|
||||
require(
|
||||
posIndex >= 1 && posIndex <= _stakedPositionIds.length,
|
||||
"FitnessEvaluator: unstake positionId out of range"
|
||||
);
|
||||
vm.prank(advAddr);
|
||||
IStake(stakeAddr).exitPosition(_stakedPositionIds[posIndex - 1]);
|
||||
} else if (_eq(op, "mine")) {
|
||||
uint256 blocks = vm.parseJsonUint(line, ".blocks");
|
||||
vm.roll(block.number + blocks);
|
||||
} else if (_eq(op, "mint_lp")) {
|
||||
int24 tickLower = int24(vm.parseJsonInt(line, ".tickLower"));
|
||||
int24 tickUpper = int24(vm.parseJsonInt(line, ".tickUpper"));
|
||||
uint256 amount0 = vm.parseUint(vm.parseJsonString(line, ".amount0"));
|
||||
uint256 amount1 = vm.parseUint(vm.parseJsonString(line, ".amount1"));
|
||||
(address t0, address t1) = token0isWeth ? (WETH_ADDR, krkAddr) : (krkAddr, WETH_ADDR);
|
||||
vm.prank(advAddr);
|
||||
// Track the returned tokenId so burn_lp can reference it by 1-based index,
|
||||
// making attack files fork-block-independent (NPM tokenIds depend on fork tip).
|
||||
(uint256 mintedTokenId,,,) = INonfungiblePositionManager(NPM_ADDR).mint(
|
||||
INonfungiblePositionManager.MintParams({
|
||||
token0: t0,
|
||||
token1: t1,
|
||||
fee: POOL_FEE,
|
||||
tickLower: tickLower,
|
||||
tickUpper: tickUpper,
|
||||
amount0Desired: amount0,
|
||||
amount1Desired: amount1,
|
||||
amount0Min: 0,
|
||||
amount1Min: 0,
|
||||
recipient: advAddr,
|
||||
deadline: block.timestamp + 3600
|
||||
})
|
||||
);
|
||||
_mintedNpmTokenIds.push(mintedTokenId);
|
||||
} else if (_eq(op, "burn_lp")) {
|
||||
// .tokenId in the attack file is a 1-based index into _mintedNpmTokenIds
|
||||
// (positions created by mint_lp ops in this run), not a raw NPM tokenId.
|
||||
// This mirrors the stake/unstake index pattern and avoids fork-block sensitivity.
|
||||
uint256 tokenIndex = vm.parseJsonUint(line, ".tokenId");
|
||||
require(
|
||||
tokenIndex >= 1 && tokenIndex <= _mintedNpmTokenIds.length,
|
||||
"FitnessEvaluator: burn_lp tokenId out of range (must be 1-based index of a prior mint_lp op)"
|
||||
);
|
||||
uint256 tokenId = _mintedNpmTokenIds[tokenIndex - 1];
|
||||
(,,,,,, , uint128 liquidity,,,,) = INonfungiblePositionManager(NPM_ADDR).positions(tokenId);
|
||||
if (liquidity == 0) return;
|
||||
vm.startPrank(advAddr);
|
||||
INonfungiblePositionManager(NPM_ADDR).decreaseLiquidity(
|
||||
INonfungiblePositionManager.DecreaseLiquidityParams({
|
||||
tokenId: tokenId,
|
||||
liquidity: liquidity,
|
||||
amount0Min: 0,
|
||||
amount1Min: 0,
|
||||
deadline: block.timestamp + 3600
|
||||
})
|
||||
);
|
||||
INonfungiblePositionManager(NPM_ADDR).collect(
|
||||
INonfungiblePositionManager.CollectParams({
|
||||
tokenId: tokenId,
|
||||
recipient: advAddr,
|
||||
amount0Max: type(uint128).max,
|
||||
amount1Max: type(uint128).max
|
||||
})
|
||||
);
|
||||
vm.stopPrank();
|
||||
}
|
||||
// Unknown ops are silently ignored (mirrors AttackRunner behaviour).
|
||||
}
|
||||
|
||||
// ─── Score computation ────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* @notice Compute lm_eth_total = free ETH + free WETH + sum(position ETH values).
|
||||
* Mirrors AttackRunner._logSnapshot's lm_eth_total calculation.
|
||||
*/
|
||||
function _computeLmEthTotal() internal view returns (uint256) {
|
||||
(uint160 sqrtPriceX96,,,,,,) = pool.slot0();
|
||||
|
||||
uint256 lmEthFree = lmAddr.balance;
|
||||
uint256 lmWethFree = IERC20(WETH_ADDR).balanceOf(lmAddr);
|
||||
|
||||
(uint128 fLiq, int24 fLo, int24 fHi) = ILM(lmAddr).positions(0); // FLOOR
|
||||
(uint128 aLiq, int24 aLo, int24 aHi) = ILM(lmAddr).positions(1); // ANCHOR
|
||||
(uint128 dLiq, int24 dLo, int24 dHi) = ILM(lmAddr).positions(2); // DISCOVERY
|
||||
|
||||
return lmEthFree
|
||||
+ lmWethFree
|
||||
+ _positionEthValue(sqrtPriceX96, fLo, fHi, fLiq)
|
||||
+ _positionEthValue(sqrtPriceX96, aLo, aHi, aLiq)
|
||||
+ _positionEthValue(sqrtPriceX96, dLo, dHi, dLiq);
|
||||
}
|
||||
|
||||
/**
|
||||
* @notice ETH-equivalent value of a Uniswap V3 position at the current price.
|
||||
* Copied verbatim from AttackRunner._positionEthValue.
|
||||
*/
|
||||
function _positionEthValue(
|
||||
uint160 sqrtPriceX96,
|
||||
int24 tickLower,
|
||||
int24 tickUpper,
|
||||
uint128 liquidity
|
||||
)
|
||||
internal
|
||||
view
|
||||
returns (uint256)
|
||||
{
|
||||
if (liquidity == 0) return 0;
|
||||
uint160 sqrtRatioAX96 = TickMath.getSqrtRatioAtTick(tickLower);
|
||||
uint160 sqrtRatioBX96 = TickMath.getSqrtRatioAtTick(tickUpper);
|
||||
(uint256 amount0, uint256 amount1) =
|
||||
LiquidityAmounts.getAmountsForLiquidity(sqrtPriceX96, sqrtRatioAX96, sqrtRatioBX96, liquidity);
|
||||
|
||||
uint256 ethAmount = token0isWeth ? amount0 : amount1;
|
||||
uint256 krkAmount = token0isWeth ? amount1 : amount0;
|
||||
|
||||
if (krkAmount == 0 || sqrtPriceX96 == 0) return ethAmount;
|
||||
|
||||
uint256 krkInEth;
|
||||
if (token0isWeth) {
|
||||
// token0=WETH, token1=KRK: 1 KRK = 2^192 / sqrtP^2 WETH
|
||||
krkInEth = FullMath.mulDiv(FullMath.mulDiv(krkAmount, 1 << 96, sqrtPriceX96), 1 << 96, sqrtPriceX96);
|
||||
} else {
|
||||
// token0=KRK, token1=WETH: 1 KRK = sqrtP^2 / 2^192 WETH
|
||||
krkInEth = FullMath.mulDiv(FullMath.mulDiv(krkAmount, sqrtPriceX96, 1 << 96), sqrtPriceX96, 1 << 96);
|
||||
}
|
||||
|
||||
return ethAmount + krkInEth;
|
||||
}
|
||||
|
||||
// ─── Utilities ────────────────────────────────────────────────────────────
|
||||
|
||||
function _eq(string memory a, string memory b) internal pure returns (bool) {
|
||||
return keccak256(bytes(a)) == keccak256(bytes(b));
|
||||
}
|
||||
|
||||
function _endsWith(string memory str, string memory suffix) internal pure returns (bool) {
|
||||
bytes memory bStr = bytes(str);
|
||||
bytes memory bSuf = bytes(suffix);
|
||||
if (bStr.length < bSuf.length) return false;
|
||||
uint256 offset = bStr.length - bSuf.length;
|
||||
for (uint256 i = 0; i < bSuf.length; i++) {
|
||||
if (bStr[offset + i] != bSuf[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function _uint2str(uint256 n) internal pure returns (string memory) {
|
||||
if (n == 0) return "0";
|
||||
uint256 temp = n;
|
||||
uint256 digits;
|
||||
while (temp != 0) {
|
||||
digits++;
|
||||
temp /= 10;
|
||||
}
|
||||
bytes memory buffer = new bytes(digits);
|
||||
while (n != 0) {
|
||||
digits--;
|
||||
buffer[digits] = bytes1(uint8(48 + (n % 10)));
|
||||
n /= 10;
|
||||
}
|
||||
return string(buffer);
|
||||
}
|
||||
}
|
||||
|
|
@ -44,8 +44,15 @@ export PATH="${HOME}/.foundry/bin:${PATH}"
|
|||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
FITNESS_SH="$SCRIPT_DIR/fitness.sh"
|
||||
BATCH_EVAL_SH="$SCRIPT_DIR/revm-evaluator/batch-eval.sh"
|
||||
MUTATE_CLI="$SCRIPT_DIR/mutate-cli.ts"
|
||||
|
||||
# EVAL_MODE controls which fitness backend is used:
|
||||
# anvil (default) — per-candidate Anvil+forge-script pipeline (fitness.sh)
|
||||
# revm — in-process revm via FitnessEvaluator.t.sol (batch-eval.sh)
|
||||
# Requires BASE_RPC_URL env var. 10-100× faster at scale.
|
||||
EVAL_MODE="${EVAL_MODE:-anvil}"
|
||||
|
||||
# =============================================================================
|
||||
# Argument parsing
|
||||
# =============================================================================
|
||||
|
|
@ -171,6 +178,15 @@ done
|
|||
[ -f "$MUTATE_CLI" ] || fail "mutate-cli.ts not found at $MUTATE_CLI"
|
||||
[ -x "$FITNESS_SH" ] || chmod +x "$FITNESS_SH"
|
||||
|
||||
if [ "$EVAL_MODE" = "revm" ]; then
|
||||
[ -f "$BATCH_EVAL_SH" ] || fail "batch-eval.sh not found at $BATCH_EVAL_SH"
|
||||
[ -x "$BATCH_EVAL_SH" ] || chmod +x "$BATCH_EVAL_SH"
|
||||
[ -n "${BASE_RPC_URL:-}" ] || fail "EVAL_MODE=revm requires BASE_RPC_URL env var (Base network RPC)"
|
||||
command -v forge &>/dev/null || fail "forge not found in PATH (required for EVAL_MODE=revm)"
|
||||
elif [ "$EVAL_MODE" != "anvil" ]; then
|
||||
fail "Unknown EVAL_MODE '$EVAL_MODE' — must be 'anvil' or 'revm'"
|
||||
fi
|
||||
|
||||
TSX_CMD="$(find_tsx_cmd)" || fail \
|
||||
"No TypeScript runner found. Install tsx (npm install -g tsx) or ensure npx is in PATH."
|
||||
|
||||
|
|
@ -194,6 +210,7 @@ log " Generations: $GENERATIONS"
|
|||
log " Mutation rate: $MUTATION_RATE"
|
||||
log " Output: $OUTPUT_DIR"
|
||||
log " TSX: $TSX_CMD"
|
||||
log " Eval mode: $EVAL_MODE"
|
||||
log "========================================================"
|
||||
|
||||
# =============================================================================
|
||||
|
|
@ -241,6 +258,29 @@ for gen in $(seq 0 $((GENERATIONS - 1))); do
|
|||
SCORE_VALUES=""
|
||||
CAND_COUNT=0
|
||||
|
||||
# In revm mode, batch-score all candidates in one forge test invocation before
|
||||
# the per-candidate loop. Scores are written to a temp JSONL file that the
|
||||
# loop reads with a fast Python lookup.
|
||||
BATCH_SCORES_FILE="$WORK_DIR/batch_scores_gen_${gen}.jsonl"
|
||||
|
||||
if [ "$EVAL_MODE" = "revm" ]; then
|
||||
declare -a _BATCH_FILES=()
|
||||
for _CF in "$CURRENT_GEN_DIR"/candidate_*.push3; do
|
||||
[ -f "$_CF" ] && _BATCH_FILES+=("$_CF")
|
||||
done
|
||||
|
||||
if [ "${#_BATCH_FILES[@]}" -gt 0 ]; then
|
||||
BATCH_EC=0
|
||||
bash "$BATCH_EVAL_SH" "${_BATCH_FILES[@]}" > "$BATCH_SCORES_FILE" 2>/dev/null \
|
||||
|| BATCH_EC=$?
|
||||
|
||||
if [ "$BATCH_EC" -eq 2 ]; then
|
||||
fail "batch-eval.sh reported an infrastructure error (exit 2) — aborting evolution"
|
||||
fi
|
||||
log " revm batch scoring complete (exit $BATCH_EC)"
|
||||
fi
|
||||
fi
|
||||
|
||||
for CAND_FILE in "$CURRENT_GEN_DIR"/candidate_*.push3; do
|
||||
[ -f "$CAND_FILE" ] || continue
|
||||
|
||||
|
|
@ -255,16 +295,37 @@ for gen in $(seq 0 $((GENERATIONS - 1))); do
|
|||
|
||||
SCORE=0
|
||||
FITNESS_EC=0
|
||||
SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$?
|
||||
|
||||
# Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
|
||||
if [ "$FITNESS_EC" -eq 2 ]; then
|
||||
fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
|
||||
if [ "$EVAL_MODE" = "revm" ] && [ -f "$BATCH_SCORES_FILE" ]; then
|
||||
# Look up pre-computed score from batch-eval.sh output.
|
||||
SCORE=$(python3 - "$CID" "$BATCH_SCORES_FILE" <<'PYEOF'
|
||||
import json, sys
|
||||
cid = sys.argv[1]
|
||||
with open(sys.argv[2]) as f:
|
||||
for line in f:
|
||||
try:
|
||||
d = json.loads(line)
|
||||
if d.get("candidate_id") == cid:
|
||||
print(d["fitness"])
|
||||
sys.exit(0)
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
print(0)
|
||||
PYEOF
|
||||
) || SCORE=0
|
||||
else
|
||||
# Anvil mode (or revm fallback): score candidate individually.
|
||||
SCORE=$(bash "$FITNESS_SH" "$CAND_FILE" 2>/dev/null) || FITNESS_EC=$?
|
||||
|
||||
# Exit 2 = infrastructure error (Anvil down, missing tools): abort immediately.
|
||||
if [ "$FITNESS_EC" -eq 2 ]; then
|
||||
fail "fitness.sh reported an infrastructure error (exit 2) — aborting evolution"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Validate that score is a non-negative integer; treat any other output as invalid.
|
||||
if [ "$FITNESS_EC" -ne 0 ] || ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then
|
||||
log " $CID: invalid candidate (fitness.sh exit $FITNESS_EC), score=0"
|
||||
if ! [[ "$SCORE" =~ ^[0-9]+$ ]]; then
|
||||
log " $CID: invalid/missing score, using 0"
|
||||
SCORE=0
|
||||
else
|
||||
log " $CID: fitness=$SCORE"
|
||||
|
|
|
|||
226
tools/push3-evolution/revm-evaluator/batch-eval.sh
Executable file
226
tools/push3-evolution/revm-evaluator/batch-eval.sh
Executable file
|
|
@ -0,0 +1,226 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# batch-eval.sh — revm-based batch fitness evaluator
|
||||
#
|
||||
# Replaces the per-candidate Anvil+forge-script pipeline with in-process EVM
|
||||
# execution via Foundry's native revm backend (FitnessEvaluator.t.sol).
|
||||
#
|
||||
# Speedup: compiles each candidate once (unavoidable — different Solidity per
|
||||
# candidate), then runs ALL attack sequences in a single in-process forge test
|
||||
# with O(1) memory snapshot/revert instead of RPC calls per attack.
|
||||
#
|
||||
# Usage:
|
||||
# ./tools/push3-evolution/revm-evaluator/batch-eval.sh \
|
||||
# [--output-dir /tmp/scores] \
|
||||
# candidate0.push3 candidate1.push3 ...
|
||||
#
|
||||
# Output (stdout):
|
||||
# One JSON object per candidate:
|
||||
# {"candidate_id":"gen0_c000","fitness":123456789}
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 Success.
|
||||
# 1 Candidate-level error (transpile/compile failed for at least one candidate).
|
||||
# 2 Infrastructure error (missing tool, BASE_RPC_URL not set, forge test failed).
|
||||
#
|
||||
# Environment:
|
||||
# BASE_RPC_URL Required. Base network RPC endpoint for forking.
|
||||
# ATTACKS_DIR Optional. Path to *.jsonl attack files.
|
||||
# (default: <repo>/onchain/script/backtesting/attacks)
|
||||
# OUTPUT_DIR Optional. Directory to copy scores.jsonl into (--output-dir overrides).
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
export PATH="${HOME}/.foundry/bin:${PATH}"
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
||||
ONCHAIN_DIR="$REPO_ROOT/onchain"
|
||||
TRANSPILER_DIR="$REPO_ROOT/tools/push3-transpiler"
|
||||
TRANSPILER_OUT="$ONCHAIN_DIR/src/OptimizerV3Push3.sol"
|
||||
ARTIFACT_PATH="$ONCHAIN_DIR/out/OptimizerV3Push3.sol/OptimizerV3Push3.json"
|
||||
DEFAULT_ATTACKS_DIR="$ONCHAIN_DIR/script/backtesting/attacks"
|
||||
|
||||
# =============================================================================
|
||||
# Argument parsing
|
||||
# =============================================================================
|
||||
|
||||
OUTPUT_DIR="${OUTPUT_DIR:-}"
|
||||
|
||||
declare -a PUSH3_FILES=()
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--output-dir) OUTPUT_DIR="$2"; shift 2 ;;
|
||||
--*) echo "Unknown option: $1" >&2; exit 2 ;;
|
||||
*) PUSH3_FILES+=("$1"); shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ "${#PUSH3_FILES[@]}" -eq 0 ]; then
|
||||
echo "Usage: $0 [--output-dir DIR] candidate1.push3 ..." >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Environment checks
|
||||
# =============================================================================
|
||||
|
||||
BASE_RPC_URL="${BASE_RPC_URL:-}"
|
||||
if [ -z "$BASE_RPC_URL" ]; then
|
||||
echo " [batch-eval] ERROR: BASE_RPC_URL env var required for Base network fork" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
for _tool in forge node python3; do
|
||||
command -v "$_tool" &>/dev/null || { echo " [batch-eval] ERROR: $_tool not found in PATH" >&2; exit 2; }
|
||||
done
|
||||
|
||||
# =============================================================================
|
||||
# Helpers
|
||||
# =============================================================================
|
||||
|
||||
log() { echo " [batch-eval] $*" >&2; }
|
||||
fail2() { echo " [batch-eval] ERROR: $*" >&2; exit 2; }
|
||||
|
||||
# =============================================================================
|
||||
# Step 1 — Ensure transpiler dependencies are installed
|
||||
# =============================================================================
|
||||
|
||||
if [ ! -d "$TRANSPILER_DIR/node_modules" ]; then
|
||||
log "Installing transpiler dependencies…"
|
||||
(cd "$TRANSPILER_DIR" && npm install --silent) || fail2 "npm install in push3-transpiler failed"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Step 2 — Transpile + compile each candidate, extract bytecodes into manifest
|
||||
# =============================================================================
|
||||
|
||||
MANIFEST_DIR="$(mktemp -d)"
|
||||
IDS_FILE="$MANIFEST_DIR/ids.txt"
|
||||
BYTECODES_FILE="$MANIFEST_DIR/bytecodes.txt"
|
||||
|
||||
: > "$IDS_FILE"
|
||||
: > "$BYTECODES_FILE"
|
||||
|
||||
COMPILED_COUNT=0
|
||||
FAILED_IDS=""
|
||||
|
||||
for PUSH3_FILE in "${PUSH3_FILES[@]}"; do
|
||||
PUSH3_FILE="$(cd "$(dirname "$PUSH3_FILE")" && pwd)/$(basename "$PUSH3_FILE")"
|
||||
CANDIDATE_ID="$(basename "$PUSH3_FILE" .push3)"
|
||||
|
||||
# Transpile Push3 → OptimizerV3Push3.sol
|
||||
TRANSPILE_EC=0
|
||||
(
|
||||
cd "$TRANSPILER_DIR"
|
||||
npx ts-node src/index.ts "$PUSH3_FILE" "$TRANSPILER_OUT"
|
||||
) >/dev/null 2>&1 || TRANSPILE_EC=$?
|
||||
|
||||
if [ "$TRANSPILE_EC" -ne 0 ]; then
|
||||
log "WARNING: transpile failed for $CANDIDATE_ID (exit $TRANSPILE_EC) — skipping"
|
||||
FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Compile (forge's incremental build skips unchanged files quickly)
|
||||
FORGE_EC=0
|
||||
(cd "$ONCHAIN_DIR" && forge build --silent) >/dev/null 2>&1 || FORGE_EC=$?
|
||||
|
||||
if [ "$FORGE_EC" -ne 0 ]; then
|
||||
log "WARNING: forge build failed for $CANDIDATE_ID (exit $FORGE_EC) — skipping"
|
||||
FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Extract bytecode from artifact (strip leading 0x if present)
|
||||
BYTECODE_HEX="$(python3 - "$ARTIFACT_PATH" <<'PYEOF'
|
||||
import json, sys
|
||||
with open(sys.argv[1]) as f:
|
||||
d = json.load(f)
|
||||
bytecode = d["bytecode"]["object"]
|
||||
# Ensure 0x prefix
|
||||
if not bytecode.startswith("0x"):
|
||||
bytecode = "0x" + bytecode
|
||||
print(bytecode)
|
||||
PYEOF
|
||||
)" || { log "WARNING: failed to extract bytecode for $CANDIDATE_ID — skipping"; FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"; continue; }
|
||||
|
||||
if [ -z "$BYTECODE_HEX" ] || [ "$BYTECODE_HEX" = "0x" ]; then
|
||||
log "WARNING: empty bytecode for $CANDIDATE_ID — skipping"
|
||||
FAILED_IDS="$FAILED_IDS $CANDIDATE_ID"
|
||||
continue
|
||||
fi
|
||||
|
||||
printf '%s\n' "$CANDIDATE_ID" >> "$IDS_FILE"
|
||||
printf '%s\n' "$BYTECODE_HEX" >> "$BYTECODES_FILE"
|
||||
COMPILED_COUNT=$((COMPILED_COUNT + 1))
|
||||
log "Compiled $CANDIDATE_ID"
|
||||
done
|
||||
|
||||
if [ "$COMPILED_COUNT" -eq 0 ]; then
|
||||
fail2 "No candidates compiled successfully — aborting"
|
||||
fi
|
||||
|
||||
log "Compiled $COMPILED_COUNT / ${#PUSH3_FILES[@]} candidates"
|
||||
|
||||
# =============================================================================
|
||||
# Step 3 — Run FitnessEvaluator.t.sol (in-process revm, all candidates at once)
|
||||
# =============================================================================
|
||||
|
||||
ATTACKS_DIR="${ATTACKS_DIR:-$DEFAULT_ATTACKS_DIR}"
|
||||
|
||||
log "Running FitnessEvaluator.t.sol (in-process revm, fork: $BASE_RPC_URL)…"
|
||||
|
||||
FORGE_TEST_EC=0
|
||||
FORGE_OUTPUT="$(
|
||||
cd "$ONCHAIN_DIR"
|
||||
BASE_RPC_URL="$BASE_RPC_URL" \
|
||||
FITNESS_MANIFEST_DIR="$MANIFEST_DIR" \
|
||||
ATTACKS_DIR="$ATTACKS_DIR" \
|
||||
forge test \
|
||||
--match-contract FitnessEvaluator \
|
||||
--match-test testBatchEvaluate \
|
||||
-vv \
|
||||
--no-match-path "NOT_A_REAL_PATH" \
|
||||
2>&1
|
||||
)" || FORGE_TEST_EC=$?
|
||||
|
||||
if [ "$FORGE_TEST_EC" -ne 0 ]; then
|
||||
# Surface forge output on failure for diagnosis
|
||||
printf '%s\n' "$FORGE_OUTPUT" >&2
|
||||
fail2 "forge test failed (exit $FORGE_TEST_EC)"
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Step 4 — Extract and emit score JSON lines
|
||||
#
|
||||
# forge test -vv wraps console.log output with leading spaces and a "Logs:" header.
|
||||
# We grep for lines containing the score JSON pattern and strip the indentation.
|
||||
# =============================================================================
|
||||
|
||||
SCORES_JSONL="$(printf '%s\n' "$FORGE_OUTPUT" | grep -E '"candidate_id"' | sed 's/^[[:space:]]*//' || true)"
|
||||
|
||||
if [ -z "$SCORES_JSONL" ]; then
|
||||
printf '%s\n' "$FORGE_OUTPUT" >&2
|
||||
fail2 "No score lines found in forge test output"
|
||||
fi
|
||||
|
||||
# Emit scores to stdout
|
||||
printf '%s\n' "$SCORES_JSONL"
|
||||
|
||||
# Optionally write to output directory
|
||||
if [ -n "$OUTPUT_DIR" ]; then
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
printf '%s\n' "$SCORES_JSONL" > "$OUTPUT_DIR/scores.jsonl"
|
||||
log "Scores written to $OUTPUT_DIR/scores.jsonl"
|
||||
fi
|
||||
|
||||
# Warn about any candidates that were skipped (compile failures)
|
||||
if [ -n "$FAILED_IDS" ]; then
|
||||
log "WARNING: the following candidates were skipped (compile failed): $FAILED_IDS"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "Done — scored $COMPILED_COUNT candidates"
|
||||
Loading…
Add table
Add a link
Reference in a new issue