Merge pull request 'fix: Formula: run-red-team (adversarial attack + discovery) (#976)' (#998) from fix/issue-976 into master

2026-03-19 12:14:41 +01:00 · 2026-03-19 12:14:41 +01:00 · 166c9c14d1
commit 166c9c14d1
parent 5c4ceaf78d 152f6e0a40
1 changed files with 260 additions and 0 deletions
--- a/formulas/run-red-team.toml
+++ b/formulas/run-red-team.toml
@ -0,0 +1,260 @@
+# formulas/run-red-team.toml
+#
+# Adversarial red-team — spin up isolated stack, run adversarial agent against
+# the active optimizer, commit evidence, export newly discovered attack vectors.
+#
+# Type: act.  Produces evidence (floor held / broken) AND git artifacts
+#              (new attack vectors via PR to onchain/script/backtesting/attacks/).
+#
+# Depends on: #973 (evidence/red-team/ directory structure)
+#             #974 (promote-attacks.sh for attack vector export)
+
+[formula]
+id          = "run-red-team"
+name        = "Adversarial Red-Team"
+description = "Spin up isolated stack, run adversarial agent against the active optimizer, commit evidence, export new attack vectors."
+type        = "act"
+# "sense"  → read-only, produces metrics only
+# "act"    → produces git artifacts (cf. run-evolution, run-red-team)
+depends_on  = [973, 974]
+
+# ── Inputs ─────────────────────────────────────────────────────────────────────
+
+[inputs.candidate_name]
+type        = "string"
+required    = false
+default     = "unknown"
+description = "Human-readable label used in evidence records and attack filenames (passed as CANDIDATE_NAME)."
+
+[inputs.optimizer_profile]
+type        = "string"
+required    = false
+default     = "push3-default"
+description = "Named optimizer profile / variant (e.g. push3-default, evo_run004_champion) passed as OPTIMIZER_PROFILE."
+
+[inputs.attack_dir]
+type        = "string"
+required    = false
+default     = "onchain/script/backtesting/attacks"
+description = """
+Directory containing existing .jsonl attack patterns for the structured
+attack suite.  Not yet forwarded to red-team.sh — see run-attack-suite step.
+"""
+status      = "planned"   # consumed only when run-attack-suite is implemented
+
+[inputs.claude_timeout]
+type        = "integer"
+required    = false
+default     = 7200
+description = "Timeout in seconds for the adversarial agent run (maps to CLAUDE_TIMEOUT env var)."
+
+# ── Execution ──────────────────────────────────────────────────────────────────
+#
+# red-team.sh owns the full lifecycle:
+# bootstrap-light → fund LM → snapshot → adversarial agent → collect
+# → promote-attacks (if floor broken) → deliver → teardown.
+#
+# CANDIDATE_NAME and OPTIMIZER_PROFILE label the evidence record and attack
+# filenames; they do not select which optimizer is deployed — bootstrap-light
+# always deploys via DeployLocal.sol.  Per-candidate Push3 injection is planned
+# but not yet wired (see notes.candidate_injection).
+
+[execution]
+script     = "scripts/harb-evaluator/red-team.sh"
+invocation = "CANDIDATE_NAME={candidate_name} OPTIMIZER_PROFILE={optimizer_profile} CLAUDE_TIMEOUT={claude_timeout} bash scripts/harb-evaluator/red-team.sh"
+
+# Exit codes propagated by red-team.sh:
+#   0  floor held (LM total ETH did not decrease)
+#   1  floor broken (adversary extracted ETH from LiquidityManager)
+#   2  infra error (Anvil unreachable, bootstrap failed, missing dependency, etc.)
+
+# ── Steps ──────────────────────────────────────────────────────────────────────
+
+[[steps]]
+id          = "stack-up"
+description = """
+Bootstrap an isolated Anvil fork with contracts deployed.
+scripts/harb-evaluator/bootstrap-light.sh:
+  - Starts a fresh Anvil instance (or reuses one if already running).
+  - Deploys KRK, LM, Stake, and OptimizerProxy via DeployLocal.sol.
+  - Funds LM with 1 000 ETH (as WETH) and calls recenter() to deploy
+    liquidity into positions — establishing a realistic baseline.
+  - Verifies Anvil responds and all contract addresses are present in
+    onchain/deployments-local.json before proceeding.
+Note: the deployed optimizer is always the default from DeployLocal.sol.
+Per-candidate Push3 transpilation is not yet implemented here; see
+notes.candidate_injection.
+"""
+
+[[steps]]
+id          = "run-attack-suite"
+description = """
+Run every existing .jsonl attack file in {attack_dir} through
+onchain/script/backtesting/AttackRunner.s.sol.
+For each file:
+  - Record LM total ETH before and after via forge script LmTotalEth.s.sol.
+  - Revert to the baseline Anvil snapshot between files so attacks are
+    independent.
+  - Emit one result entry: strategy name, abstract op pattern,
+    floor held / broken, delta in basis points.
+This phase exhausts the known attack catalogue before the adversarial
+agent is given a turn, seeding its memory with which strategies are
+already understood.
+"""
+attack_source = "{attack_dir}/*.jsonl"
+forge_script  = "onchain/script/backtesting/AttackRunner.s.sol"
+snapshot_mode = "revert-between-attacks"
+status        = "planned"   # not yet implemented in red-team.sh; tracked for future PR
+
+[[steps]]
+id          = "run-adversarial-agent"
+description = """
+Spawn the Claude adversarial agent (red-team-program.md prompt) with full
+write access to cast / forge / python3 / jq.
+Goal: make ethPerToken() decrease — i.e. extract ETH from LiquidityManager.
+The agent:
+  1. Iterates freely: snapshot → craft novel attack → execute → measure
+     → revert → repeat.
+  2. Appends each attempted strategy to tmp/red-team-report.txt and
+     tmp/red-team-stream.jsonl.
+  3. On any confirmed ETH decrease: exports the winning op sequence to
+     tmp/red-team-attacks.jsonl and continues searching.
+Runs until CLAUDE_TIMEOUT expires or the agent signals completion.
+"""
+timeout_env = "CLAUDE_TIMEOUT"
+memory_file = "tmp/red-team-memory.jsonl"   # cross-run pattern learning
+report_file = "tmp/red-team-report.txt"
+stream_file = "tmp/red-team-stream.jsonl"
+
+[[steps]]
+id          = "collect"
+description = """
+After the agent run, red-team.sh:
+  1. Reads LM total ETH after (forge script LmTotalEth.s.sol).
+  2. Extracts strategy findings from tmp/red-team-stream.jsonl and appends
+     them to tmp/red-team-memory.jsonl for cross-run learning.
+  3. Exports the agent's cast send commands from the stream log to
+     tmp/red-team-attacks.jsonl via export-attacks.py.
+  4. Replays the exported sequence through AttackRunner.s.sol, writing full
+     state snapshots to tmp/red-team-snapshots.jsonl (used for optimizer
+     training; non-fatal if replay produces no output).
+  5. Computes floor_held / floor_broken and writes evidence/red-team/{date}.json
+     conforming to the schema in evidence/README.md ## Schema: red-team/.
+"""
+output            = "evidence/red-team/{date}.json"
+schema            = "evidence/README.md"   # see ## Schema: red-team/YYYY-MM-DD.json
+side_output_file  = "tmp/red-team-snapshots.jsonl"   # AttackRunner state snapshots for optimizer training
+
+[[steps]]
+id          = "export-vectors"
+description = """
+Only runs when the floor is broken (BROKE=true in red-team.sh).
+If tmp/red-team-attacks.jsonl is non-empty, call promote-attacks.sh to open
+a Codeberg PR with the newly discovered attack vectors.
+promote-attacks.sh:
+  - Deduplicates by op-type fingerprint against existing files in
+    onchain/script/backtesting/attacks/.
+  - Auto-classifies the attack type (staking, il-crystallization,
+    floor-ratchet, fee-drain, lp-manipulation, floor-attack, …).
+  - Creates a git branch, commits the new .jsonl, and opens a Codeberg PR
+    targeting main, including the ETH extraction amount in the PR title and body.
+  - Exits 0 when no novel patterns remain after deduplication (non-fatal).
+Skipped gracefully if CODEBERG_TOKEN and ~/.netrc are both absent.
+Not called when the floor holds — novel-but-non-exploiting patterns are
+not promoted.
+"""
+script = "scripts/harb-evaluator/promote-attacks.sh"
+args   = "--attacks tmp/red-team-attacks.jsonl --candidate {candidate_name} --profile {optimizer_profile} --eth-extracted <delta_wei> --eth-before <lm_eth_before_wei>"
+# --eth-extracted and --eth-before are computed at runtime by red-team.sh (lm_eth_before − lm_eth_after)
+# and passed directly to promote-attacks.sh — they are not formula inputs.
+
+[[steps]]
+id          = "stack-down"
+description = """
+Tear down the Anvil instance started in stack-up.
+red-team.sh registers cleanup() as a shell trap (EXIT / INT / TERM):
+  - Reverts to the baseline Anvil snapshot.
+  - Kills the Claude sub-process if still running.
+Always runs — even on infra error — so port 8545 is not left occupied.
+"""
+
+[[steps]]
+id          = "deliver"
+description = """
+Commit evidence/red-team/{date}.json to main and post a summary comment
+to the originating issue.
+Comment includes:
+  - Verdict (floor_held / floor_broken).
+  - ETH extracted (formatted in ETH) and delta in basis points.
+  - Total attacks tried (agent-discovered count; structured suite count
+    added once run-attack-suite is implemented).
+  - Link to committed evidence file.
+  - If novel vectors were promoted: link to the attack-vector PR.
+On floor_broken: also include the highest-yield attack strategy name and
+its abstract op pattern.
+"""
+
+# ── Products ───────────────────────────────────────────────────────────────────
+
+[products.evidence_file]
+path     = "evidence/red-team/{date}.json"
+delivery = "commit to main"
+schema   = "evidence/README.md"   # see ## Schema: red-team/YYYY-MM-DD.json
+
+[products.attack_vectors]
+path     = "onchain/script/backtesting/attacks/{attack_type}-{candidate_name}.jsonl"
+# {attack_type} is not a formula input — it is computed at runtime by
+# promote-attacks.sh's classifier (staking, il-crystallization, floor-ratchet, …).
+delivery = "PR to main"
+script   = "scripts/harb-evaluator/promote-attacks.sh"
+note     = "Only created when the floor is broken AND novel (deduplicated) attack vectors are discovered."
+
+[products.issue_comment]
+delivery   = "post to originating issue"
+content    = "verdict (floor_held/floor_broken), ETH extracted, attacks tried, link to evidence file; if vectors found: link to attack-vector PR"
+on_failure = "include highest-yield attack name and op pattern; full agent transcript available in tmp/red-team-stream.jsonl"
+
+# ── Resources ──────────────────────────────────────────────────────────────────
+
+[resources]
+profile     = "heavy"
+compute     = "local — Anvil fork + revm, no Docker required"
+rpc         = "Anvil (bootstrap-light, default port 8545)"
+agent       = "Claude (claude CLI, CLAUDE_TIMEOUT seconds)"
+concurrency = "exclusive — shares Anvil port 8545 with run-holdout and other heavy formulas"
+
+# ── Notes ──────────────────────────────────────────────────────────────────────
+
+[notes]
+floor_metric = """
+The primary safety metric is LM total ETH: free ETH + free WETH + ETH locked
+across all three Uniswap V3 positions (floor, anchor, discovery).
+Measured via forge script LmTotalEth.s.sol using exact Uniswap V3 integer
+math (LiquidityAmounts + TickMath).  A decrease in total ETH = floor broken.
+"""
+
+attack_dedup = """
+promote-attacks.sh fingerprints each candidate attack by its abstract op
+sequence (e.g. wrap → buy → stake → recenter_multi → sell) and compares
+against all existing files in onchain/script/backtesting/attacks/.
+Only genuinely novel sequences are included in the PR — duplicate
+rediscoveries are silently dropped and the step exits 0.
+"""
+
+candidate_injection = """
+Push3 candidate injection is not yet implemented: bootstrap-light.sh always
+deploys the default optimizer via DeployLocal.sol and does not read the
+CANDIDATE env var.  The candidate_name and optimizer_profile inputs are used
+only for labelling (evidence records, attack filenames, PR titles); they do not
+affect which optimizer is deployed.
+Wiring CANDIDATE → push3-transpiler → forge compile → bootstrap-light is
+tracked as a follow-up issue.
+"""
+
+run_attack_suite_gap = """
+The run-attack-suite step (structured loop over attacks/*.jsonl via
+AttackRunner.s.sol with snapshot revert between files) is not yet implemented
+in red-team.sh.  The current script runs only the adversarial Claude agent.
+The step is documented here as a forward spec; implementation is tracked as a
+follow-up issue.
+"""