diff --git a/formulas/run-red-team.toml b/formulas/run-red-team.toml new file mode 100644 index 0000000..875035a --- /dev/null +++ b/formulas/run-red-team.toml @@ -0,0 +1,260 @@ +# formulas/run-red-team.toml +# +# Adversarial red-team — spin up isolated stack, run adversarial agent against +# the active optimizer, commit evidence, export newly discovered attack vectors. +# +# Type: act. Produces evidence (floor held / broken) AND git artifacts +# (new attack vectors via PR to onchain/script/backtesting/attacks/). +# +# Depends on: #973 (evidence/red-team/ directory structure) +# #974 (promote-attacks.sh for attack vector export) + +[formula] +id = "run-red-team" +name = "Adversarial Red-Team" +description = "Spin up isolated stack, run adversarial agent against the active optimizer, commit evidence, export new attack vectors." +type = "act" +# "sense" → read-only, produces metrics only +# "act" → produces git artifacts (cf. run-evolution, run-red-team) +depends_on = [973, 974] + +# ── Inputs ───────────────────────────────────────────────────────────────────── + +[inputs.candidate_name] +type = "string" +required = false +default = "unknown" +description = "Human-readable label used in evidence records and attack filenames (passed as CANDIDATE_NAME)." + +[inputs.optimizer_profile] +type = "string" +required = false +default = "push3-default" +description = "Named optimizer profile / variant (e.g. push3-default, evo_run004_champion) passed as OPTIMIZER_PROFILE." + +[inputs.attack_dir] +type = "string" +required = false +default = "onchain/script/backtesting/attacks" +description = """ +Directory containing existing .jsonl attack patterns for the structured +attack suite. Not yet forwarded to red-team.sh — see run-attack-suite step. +""" +status = "planned" # consumed only when run-attack-suite is implemented + +[inputs.claude_timeout] +type = "integer" +required = false +default = 7200 +description = "Timeout in seconds for the adversarial agent run (maps to CLAUDE_TIMEOUT env var)." + +# ── Execution ────────────────────────────────────────────────────────────────── +# +# red-team.sh owns the full lifecycle: +# bootstrap-light → fund LM → snapshot → adversarial agent → collect +# → promote-attacks (if floor broken) → deliver → teardown. +# +# CANDIDATE_NAME and OPTIMIZER_PROFILE label the evidence record and attack +# filenames; they do not select which optimizer is deployed — bootstrap-light +# always deploys via DeployLocal.sol. Per-candidate Push3 injection is planned +# but not yet wired (see notes.candidate_injection). + +[execution] +script = "scripts/harb-evaluator/red-team.sh" +invocation = "CANDIDATE_NAME={candidate_name} OPTIMIZER_PROFILE={optimizer_profile} CLAUDE_TIMEOUT={claude_timeout} bash scripts/harb-evaluator/red-team.sh" + +# Exit codes propagated by red-team.sh: +# 0 floor held (LM total ETH did not decrease) +# 1 floor broken (adversary extracted ETH from LiquidityManager) +# 2 infra error (Anvil unreachable, bootstrap failed, missing dependency, etc.) + +# ── Steps ────────────────────────────────────────────────────────────────────── + +[[steps]] +id = "stack-up" +description = """ +Bootstrap an isolated Anvil fork with contracts deployed. +scripts/harb-evaluator/bootstrap-light.sh: + - Starts a fresh Anvil instance (or reuses one if already running). + - Deploys KRK, LM, Stake, and OptimizerProxy via DeployLocal.sol. + - Funds LM with 1 000 ETH (as WETH) and calls recenter() to deploy + liquidity into positions — establishing a realistic baseline. + - Verifies Anvil responds and all contract addresses are present in + onchain/deployments-local.json before proceeding. +Note: the deployed optimizer is always the default from DeployLocal.sol. +Per-candidate Push3 transpilation is not yet implemented here; see +notes.candidate_injection. +""" + +[[steps]] +id = "run-attack-suite" +description = """ +Run every existing .jsonl attack file in {attack_dir} through +onchain/script/backtesting/AttackRunner.s.sol. +For each file: + - Record LM total ETH before and after via forge script LmTotalEth.s.sol. + - Revert to the baseline Anvil snapshot between files so attacks are + independent. + - Emit one result entry: strategy name, abstract op pattern, + floor held / broken, delta in basis points. +This phase exhausts the known attack catalogue before the adversarial +agent is given a turn, seeding its memory with which strategies are +already understood. +""" +attack_source = "{attack_dir}/*.jsonl" +forge_script = "onchain/script/backtesting/AttackRunner.s.sol" +snapshot_mode = "revert-between-attacks" +status = "planned" # not yet implemented in red-team.sh; tracked for future PR + +[[steps]] +id = "run-adversarial-agent" +description = """ +Spawn the Claude adversarial agent (red-team-program.md prompt) with full +write access to cast / forge / python3 / jq. +Goal: make ethPerToken() decrease — i.e. extract ETH from LiquidityManager. +The agent: + 1. Iterates freely: snapshot → craft novel attack → execute → measure + → revert → repeat. + 2. Appends each attempted strategy to tmp/red-team-report.txt and + tmp/red-team-stream.jsonl. + 3. On any confirmed ETH decrease: exports the winning op sequence to + tmp/red-team-attacks.jsonl and continues searching. +Runs until CLAUDE_TIMEOUT expires or the agent signals completion. +""" +timeout_env = "CLAUDE_TIMEOUT" +memory_file = "tmp/red-team-memory.jsonl" # cross-run pattern learning +report_file = "tmp/red-team-report.txt" +stream_file = "tmp/red-team-stream.jsonl" + +[[steps]] +id = "collect" +description = """ +After the agent run, red-team.sh: + 1. Reads LM total ETH after (forge script LmTotalEth.s.sol). + 2. Extracts strategy findings from tmp/red-team-stream.jsonl and appends + them to tmp/red-team-memory.jsonl for cross-run learning. + 3. Exports the agent's cast send commands from the stream log to + tmp/red-team-attacks.jsonl via export-attacks.py. + 4. Replays the exported sequence through AttackRunner.s.sol, writing full + state snapshots to tmp/red-team-snapshots.jsonl (used for optimizer + training; non-fatal if replay produces no output). + 5. Computes floor_held / floor_broken and writes evidence/red-team/{date}.json + conforming to the schema in evidence/README.md ## Schema: red-team/. +""" +output = "evidence/red-team/{date}.json" +schema = "evidence/README.md" # see ## Schema: red-team/YYYY-MM-DD.json +side_output_file = "tmp/red-team-snapshots.jsonl" # AttackRunner state snapshots for optimizer training + +[[steps]] +id = "export-vectors" +description = """ +Only runs when the floor is broken (BROKE=true in red-team.sh). +If tmp/red-team-attacks.jsonl is non-empty, call promote-attacks.sh to open +a Codeberg PR with the newly discovered attack vectors. +promote-attacks.sh: + - Deduplicates by op-type fingerprint against existing files in + onchain/script/backtesting/attacks/. + - Auto-classifies the attack type (staking, il-crystallization, + floor-ratchet, fee-drain, lp-manipulation, floor-attack, …). + - Creates a git branch, commits the new .jsonl, and opens a Codeberg PR + targeting main, including the ETH extraction amount in the PR title and body. + - Exits 0 when no novel patterns remain after deduplication (non-fatal). +Skipped gracefully if CODEBERG_TOKEN and ~/.netrc are both absent. +Not called when the floor holds — novel-but-non-exploiting patterns are +not promoted. +""" +script = "scripts/harb-evaluator/promote-attacks.sh" +args = "--attacks tmp/red-team-attacks.jsonl --candidate {candidate_name} --profile {optimizer_profile} --eth-extracted --eth-before " +# --eth-extracted and --eth-before are computed at runtime by red-team.sh (lm_eth_before − lm_eth_after) +# and passed directly to promote-attacks.sh — they are not formula inputs. + +[[steps]] +id = "stack-down" +description = """ +Tear down the Anvil instance started in stack-up. +red-team.sh registers cleanup() as a shell trap (EXIT / INT / TERM): + - Reverts to the baseline Anvil snapshot. + - Kills the Claude sub-process if still running. +Always runs — even on infra error — so port 8545 is not left occupied. +""" + +[[steps]] +id = "deliver" +description = """ +Commit evidence/red-team/{date}.json to main and post a summary comment +to the originating issue. +Comment includes: + - Verdict (floor_held / floor_broken). + - ETH extracted (formatted in ETH) and delta in basis points. + - Total attacks tried (agent-discovered count; structured suite count + added once run-attack-suite is implemented). + - Link to committed evidence file. + - If novel vectors were promoted: link to the attack-vector PR. +On floor_broken: also include the highest-yield attack strategy name and +its abstract op pattern. +""" + +# ── Products ─────────────────────────────────────────────────────────────────── + +[products.evidence_file] +path = "evidence/red-team/{date}.json" +delivery = "commit to main" +schema = "evidence/README.md" # see ## Schema: red-team/YYYY-MM-DD.json + +[products.attack_vectors] +path = "onchain/script/backtesting/attacks/{attack_type}-{candidate_name}.jsonl" +# {attack_type} is not a formula input — it is computed at runtime by +# promote-attacks.sh's classifier (staking, il-crystallization, floor-ratchet, …). +delivery = "PR to main" +script = "scripts/harb-evaluator/promote-attacks.sh" +note = "Only created when the floor is broken AND novel (deduplicated) attack vectors are discovered." + +[products.issue_comment] +delivery = "post to originating issue" +content = "verdict (floor_held/floor_broken), ETH extracted, attacks tried, link to evidence file; if vectors found: link to attack-vector PR" +on_failure = "include highest-yield attack name and op pattern; full agent transcript available in tmp/red-team-stream.jsonl" + +# ── Resources ────────────────────────────────────────────────────────────────── + +[resources] +profile = "heavy" +compute = "local — Anvil fork + revm, no Docker required" +rpc = "Anvil (bootstrap-light, default port 8545)" +agent = "Claude (claude CLI, CLAUDE_TIMEOUT seconds)" +concurrency = "exclusive — shares Anvil port 8545 with run-holdout and other heavy formulas" + +# ── Notes ────────────────────────────────────────────────────────────────────── + +[notes] +floor_metric = """ +The primary safety metric is LM total ETH: free ETH + free WETH + ETH locked +across all three Uniswap V3 positions (floor, anchor, discovery). +Measured via forge script LmTotalEth.s.sol using exact Uniswap V3 integer +math (LiquidityAmounts + TickMath). A decrease in total ETH = floor broken. +""" + +attack_dedup = """ +promote-attacks.sh fingerprints each candidate attack by its abstract op +sequence (e.g. wrap → buy → stake → recenter_multi → sell) and compares +against all existing files in onchain/script/backtesting/attacks/. +Only genuinely novel sequences are included in the PR — duplicate +rediscoveries are silently dropped and the step exits 0. +""" + +candidate_injection = """ +Push3 candidate injection is not yet implemented: bootstrap-light.sh always +deploys the default optimizer via DeployLocal.sol and does not read the +CANDIDATE env var. The candidate_name and optimizer_profile inputs are used +only for labelling (evidence records, attack filenames, PR titles); they do not +affect which optimizer is deployed. +Wiring CANDIDATE → push3-transpiler → forge compile → bootstrap-light is +tracked as a follow-up issue. +""" + +run_attack_suite_gap = """ +The run-attack-suite step (structured loop over attacks/*.jsonl via +AttackRunner.s.sol with snapshot revert between files) is not yet implemented +in red-team.sh. The current script runs only the adversarial Claude agent. +The step is documented here as a forward spec; implementation is tracked as a +follow-up issue. +"""