fix: Formula: run-red-team (adversarial attack + discovery) (#976)

Address review feedback: - Remove candidate input (Push3 transpilation not wired; documented in notes.candidate_injection as planned follow-up) - Mark run-attack-suite step as status="planned" with run_attack_suite_gap note - Update execution.invocation to only pass env vars red-team.sh actually reads - Fix export-vectors args to include --eth-extracted and --eth-before flags - Clarify export-vectors only runs when floor_broken (BROKE=true) - Document tmp/red-team-snapshots.jsonl (AttackRunner replay side output) - Add comment that {attack_type} in products.attack_vectors.path is runtime-computed by promote-attacks.sh, not a formula input - Fix schema comment notation (§ → ##) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-19 10:48:59 +00:00 · 2026-03-19 10:48:59 +00:00 · 152f6e0a40
commit 152f6e0a40
parent 3564c4ad25
1 changed files with 76 additions and 58 deletions
--- a/formulas/run-red-team.toml
+++ b/formulas/run-red-team.toml
@ -1,7 +1,7 @@
 # formulas/run-red-team.toml
 #
-# Adversarial red-team — spin up isolated stack, run attack suite and adversarial
+# Adversarial red-team — spin up isolated stack, run adversarial agent against
-# agent against a Push3 optimizer candidate, commit evidence, export new vectors.
+# the active optimizer, commit evidence, export newly discovered attack vectors.
 #
 # Type: act.  Produces evidence (floor held / broken) AND git artifacts
 #              (new attack vectors via PR to onchain/script/backtesting/attacks/).
@ -12,7 +12,7 @@
 [formula]
 id          = "run-red-team"
 name        = "Adversarial Red-Team"
-description = "Spin up isolated stack, run attack suite and adversarial agent against a Push3 candidate, commit evidence, export new attack vectors."
+description = "Spin up isolated stack, run adversarial agent against the active optimizer, commit evidence, export new attack vectors."
 type        = "act"
 # "sense"  → read-only, produces metrics only
 # "act"    → produces git artifacts (cf. run-evolution, run-red-team)
@ -20,33 +20,27 @@ depends_on  = [973, 974]
 # ── Inputs ─────────────────────────────────────────────────────────────────────
 [inputs.candidate]
 type        = "string"
 required    = true
 description = """
 Repo-relative path to the Push3 candidate to evaluate
 (e.g. tools/push3-evolution/seeds/optimizer_v3.push3).
 The candidate is transpiled, compiled, and deployed as the active optimizer
 before the attack suite runs.
 """
 [inputs.candidate_name]
 type        = "string"
 required    = false
 default     = "unknown"
-description = "Human-readable label used in evidence records and attack filenames."
+description = "Human-readable label used in evidence records and attack filenames (passed as CANDIDATE_NAME)."
 [inputs.optimizer_profile]
 type        = "string"
 required    = false
 default     = "push3-default"
-description = "Named optimizer profile / variant (e.g. push3-default, evo_run004_champion)."
+description = "Named optimizer profile / variant (e.g. push3-default, evo_run004_champion) passed as OPTIMIZER_PROFILE."
 [inputs.attack_dir]
 type        = "string"
 required    = false
 default     = "onchain/script/backtesting/attacks"
-description = "Directory containing existing .jsonl attack patterns to run as the structured attack suite."
+description = """
 Directory containing existing .jsonl attack patterns for the structured
 attack suite.  Not yet forwarded to red-team.sh — see run-attack-suite step.
 """
 status      = "planned"   # consumed only when run-attack-suite is implemented
 [inputs.claude_timeout]
 type        = "integer"
@ -57,19 +51,20 @@ description = "Timeout in seconds for the adversarial agent run (maps to CLAUDE_
 # ── Execution ──────────────────────────────────────────────────────────────────
 #
 # red-team.sh owns the full lifecycle:
-# bootstrap-light → fund LM → snapshot → attack suite → adversarial agent
+# bootstrap-light → fund LM → snapshot → adversarial agent → collect
-# → collect → promote-attacks → deliver → teardown.
+# → promote-attacks (if floor broken) → deliver → teardown.
 #
-# The orchestrator sets CANDIDATE / CANDIDATE_NAME / OPTIMIZER_PROFILE env vars
+# CANDIDATE_NAME and OPTIMIZER_PROFILE label the evidence record and attack
-# before invoking the script so the agent and evidence record name the candidate
+# filenames; they do not select which optimizer is deployed — bootstrap-light
-# correctly.
+# always deploys via DeployLocal.sol.  Per-candidate Push3 injection is planned
 # but not yet wired (see notes.candidate_injection).
 [execution]
 script     = "scripts/harb-evaluator/red-team.sh"
-invocation = "CANDIDATE={candidate} CANDIDATE_NAME={candidate_name} OPTIMIZER_PROFILE={optimizer_profile} CLAUDE_TIMEOUT={claude_timeout} bash scripts/harb-evaluator/red-team.sh"
+invocation = "CANDIDATE_NAME={candidate_name} OPTIMIZER_PROFILE={optimizer_profile} CLAUDE_TIMEOUT={claude_timeout} bash scripts/harb-evaluator/red-team.sh"
 # Exit codes propagated by red-team.sh:
-#   0  floor held (no confirmed ETH extraction across all attacks + agent run)
+#   0  floor held (LM total ETH did not decrease)
 #   1  floor broken (adversary extracted ETH from LiquidityManager)
 #   2  infra error (Anvil unreachable, bootstrap failed, missing dependency, etc.)
@ -78,16 +73,17 @@ invocation = "CANDIDATE={candidate} CANDIDATE_NAME={candidate_name} OPTIMIZER_PR
 [[steps]]
 id          = "stack-up"
 description = """
-Bootstrap an isolated Anvil fork with the candidate optimizer deployed.
+Bootstrap an isolated Anvil fork with contracts deployed.
 scripts/harb-evaluator/bootstrap-light.sh:
-  - Transpiles {candidate} via push3-transpiler and compiles via forge.
+  - Starts a fresh Anvil instance (or reuses one if already running).
-    Aborts with exit 2 if transpilation or compilation fails.
+  - Deploys KRK, LM, Stake, and OptimizerProxy via DeployLocal.sol.
  - Deploys KRK, LM, Stake, and OptimizerProxy with the compiled candidate
    as the active optimizer implementation.
  - Funds LM with 1 000 ETH (as WETH) and calls recenter() to deploy
    liquidity into positions — establishing a realistic baseline.
  - Verifies Anvil responds and all contract addresses are present in
    onchain/deployments-local.json before proceeding.
 Note: the deployed optimizer is always the default from DeployLocal.sol.
 Per-candidate Push3 transpilation is not yet implemented here; see
 notes.candidate_injection.
 """
 [[steps]]
@ -96,19 +92,19 @@ description = """
 Run every existing .jsonl attack file in {attack_dir} through
 onchain/script/backtesting/AttackRunner.s.sol.
 For each file:
-  - Record LM total ETH before and after (forge script LmTotalEth.s.sol —
+  - Record LM total ETH before and after via forge script LmTotalEth.s.sol.
    exact Uniswap V3 integer math, not a cast-call approximation).
  - Revert to the baseline Anvil snapshot between files so attacks are
    independent.
-  - Emit one result entry per file: strategy name, abstract op pattern,
+  - Emit one result entry: strategy name, abstract op pattern,
    floor held / broken, delta in basis points.
 This phase exhausts the known attack catalogue before the adversarial
-agent is given a turn, and seeds the agent's memory with which strategies
+agent is given a turn, seeding its memory with which strategies are
-are already understood.
+already understood.
 """
 attack_source = "{attack_dir}/*.jsonl"
 forge_script  = "onchain/script/backtesting/AttackRunner.s.sol"
 snapshot_mode = "revert-between-attacks"
 status        = "planned"   # not yet implemented in red-team.sh; tracked for future PR
 [[steps]]
 id          = "run-adversarial-agent"
@ -117,13 +113,12 @@ Spawn the Claude adversarial agent (red-team-program.md prompt) with full
 write access to cast / forge / python3 / jq.
 Goal: make ethPerToken() decrease — i.e. extract ETH from LiquidityManager.
 The agent:
-  1. Reads the attack-suite results to skip known-dead strategies.
+  1. Iterates freely: snapshot → craft novel attack → execute → measure
  2. Iterates freely: snapshot → craft novel attack → execute → measure
     → revert → repeat.
-  3. Appends each attempted strategy to tmp/red-team-report.txt and
+  2. Appends each attempted strategy to tmp/red-team-report.txt and
     tmp/red-team-stream.jsonl.
-  4. On any confirmed ETH decrease: exports the winning op sequence to
+  3. On any confirmed ETH decrease: exports the winning op sequence to
-     tmp/red-team-attacks.jsonl and continues searching for additional vectors.
+     tmp/red-team-attacks.jsonl and continues searching.
 Runs until CLAUDE_TIMEOUT expires or the agent signals completion.
 """
 timeout_env = "CLAUDE_TIMEOUT"
@ -134,35 +129,44 @@ stream_file = "tmp/red-team-stream.jsonl"
 [[steps]]
 id          = "collect"
 description = """
-Aggregate attack-suite results and agent findings into the evidence record.
+After the agent run, red-team.sh:
-Reads tmp/red-team-report.txt + tmp/red-team-stream.jsonl and computes:
+  1. Reads LM total ETH after (forge script LmTotalEth.s.sol).
-  - floor_held: true if LM total ETH never decreased
+  2. Extracts strategy findings from tmp/red-team-stream.jsonl and appends
-  - eth_extracted: lm_eth_before − lm_eth_after (0 if floor held)
+     them to tmp/red-team-memory.jsonl for cross-run learning.
-  - per-attack entries: strategy, pattern, result (HELD/DECREASED/INCREASED),
+  3. Exports the agent's cast send commands from the stream log to
-    delta_bps, insight
+     tmp/red-team-attacks.jsonl via export-attacks.py.
-Writes evidence/red-team/{date}.json conforming to the schema in
+  4. Replays the exported sequence through AttackRunner.s.sol, writing full
-evidence/README.md §red-team.
+     state snapshots to tmp/red-team-snapshots.jsonl (used for optimizer
     training; non-fatal if replay produces no output).
  5. Computes floor_held / floor_broken and writes evidence/red-team/{date}.json
     conforming to the schema in evidence/README.md ## Schema: red-team/.
 """
-output = "evidence/red-team/{date}.json"
+output            = "evidence/red-team/{date}.json"
-schema = "evidence/README.md"   # see §Schema: red-team/YYYY-MM-DD.json
+schema            = "evidence/README.md"   # see ## Schema: red-team/YYYY-MM-DD.json
 side_output_file  = "tmp/red-team-snapshots.jsonl"   # AttackRunner state snapshots for optimizer training
 [[steps]]
 id          = "export-vectors"
 description = """
 Only runs when the floor is broken (BROKE=true in red-team.sh).
 If tmp/red-team-attacks.jsonl is non-empty, call promote-attacks.sh to open
-a PR with the newly discovered attack vectors.
+a Codeberg PR with the newly discovered attack vectors.
 promote-attacks.sh:
  - Deduplicates by op-type fingerprint against existing files in
    onchain/script/backtesting/attacks/.
  - Auto-classifies the attack type (staking, il-crystallization,
    floor-ratchet, fee-drain, lp-manipulation, floor-attack, …).
  - Creates a git branch, commits the new .jsonl, and opens a Codeberg PR
-    targeting main.
+    targeting main, including the ETH extraction amount in the PR title and body.
  - Exits 0 when no novel patterns remain after deduplication (non-fatal).
 Skipped gracefully if CODEBERG_TOKEN and ~/.netrc are both absent.
 Not called when the floor holds — novel-but-non-exploiting patterns are
 not promoted.
 """
 script = "scripts/harb-evaluator/promote-attacks.sh"
-args   = "--attacks tmp/red-team-attacks.jsonl --candidate {candidate_name} --profile {optimizer_profile}"
+args   = "--attacks tmp/red-team-attacks.jsonl --candidate {candidate_name} --profile {optimizer_profile} --eth-extracted <delta_wei> --eth-before <lm_eth_before_wei>"
 # --eth-extracted and --eth-before are computed at runtime by red-team.sh (lm_eth_before − lm_eth_after)
 # and passed directly to promote-attacks.sh — they are not formula inputs.
 [[steps]]
 id          = "stack-down"
@ -182,7 +186,8 @@ to the originating issue.
 Comment includes:
  - Verdict (floor_held / floor_broken).
  - ETH extracted (formatted in ETH) and delta in basis points.
-  - Total attacks tried: structured suite count + agent-discovered count.
+  - Total attacks tried (agent-discovered count; structured suite count
    added once run-attack-suite is implemented).
  - Link to committed evidence file.
  - If novel vectors were promoted: link to the attack-vector PR.
 On floor_broken: also include the highest-yield attack strategy name and
@ -194,13 +199,15 @@ its abstract op pattern.
 [products.evidence_file]
 path     = "evidence/red-team/{date}.json"
 delivery = "commit to main"
-schema   = "evidence/README.md"   # see §Schema: red-team/YYYY-MM-DD.json
+schema   = "evidence/README.md"   # see ## Schema: red-team/YYYY-MM-DD.json
 [products.attack_vectors]
 path     = "onchain/script/backtesting/attacks/{attack_type}-{candidate_name}.jsonl"
 # {attack_type} is not a formula input — it is computed at runtime by
 # promote-attacks.sh's classifier (staking, il-crystallization, floor-ratchet, …).
 delivery = "PR to main"
 script   = "scripts/harb-evaluator/promote-attacks.sh"
-note     = "Only created when novel (deduplicated) attack vectors are discovered; skipped otherwise."
+note     = "Only created when the floor is broken AND novel (deduplicated) attack vectors are discovered."
 [products.issue_comment]
 delivery   = "post to originating issue"
@ -235,8 +242,19 @@ rediscoveries are silently dropped and the step exits 0.
 """
 candidate_injection = """
-The candidate Push3 program must be transpiled and compiled before the stack
+Push3 candidate injection is not yet implemented: bootstrap-light.sh always
-boots.  Transpilation failures are reported as infra errors (exit 2) so the
+deploys the default optimizer via DeployLocal.sol and does not read the
-orchestrator can retry with a different candidate rather than wasting time on
+CANDIDATE env var.  The candidate_name and optimizer_profile inputs are used
-a broken program.
+only for labelling (evidence records, attack filenames, PR titles); they do not
 affect which optimizer is deployed.
 Wiring CANDIDATE → push3-transpiler → forge compile → bootstrap-light is
 tracked as a follow-up issue.
 """
 run_attack_suite_gap = """
 The run-attack-suite step (structured loop over attacks/*.jsonl via
 AttackRunner.s.sol with snapshot revert between files) is not yet implemented
 in red-team.sh.  The current script runs only the adversarial Claude agent.
 The step is documented here as a forward spec; implementation is tracked as a
 follow-up issue.
 """