fix: Formula: run-red-team (adversarial attack + discovery) (#976)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
5c4ceaf78d
commit
3564c4ad25
1 changed files with 242 additions and 0 deletions
242
formulas/run-red-team.toml
Normal file
242
formulas/run-red-team.toml
Normal file
|
|
@ -0,0 +1,242 @@
|
|||
# formulas/run-red-team.toml
|
||||
#
|
||||
# Adversarial red-team — spin up isolated stack, run attack suite and adversarial
|
||||
# agent against a Push3 optimizer candidate, commit evidence, export new vectors.
|
||||
#
|
||||
# Type: act. Produces evidence (floor held / broken) AND git artifacts
|
||||
# (new attack vectors via PR to onchain/script/backtesting/attacks/).
|
||||
#
|
||||
# Depends on: #973 (evidence/red-team/ directory structure)
|
||||
# #974 (promote-attacks.sh for attack vector export)
|
||||
|
||||
[formula]
|
||||
id = "run-red-team"
|
||||
name = "Adversarial Red-Team"
|
||||
description = "Spin up isolated stack, run attack suite and adversarial agent against a Push3 candidate, commit evidence, export new attack vectors."
|
||||
type = "act"
|
||||
# "sense" → read-only, produces metrics only
|
||||
# "act" → produces git artifacts (cf. run-evolution, run-red-team)
|
||||
depends_on = [973, 974]
|
||||
|
||||
# ── Inputs ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
[inputs.candidate]
|
||||
type = "string"
|
||||
required = true
|
||||
description = """
|
||||
Repo-relative path to the Push3 candidate to evaluate
|
||||
(e.g. tools/push3-evolution/seeds/optimizer_v3.push3).
|
||||
The candidate is transpiled, compiled, and deployed as the active optimizer
|
||||
before the attack suite runs.
|
||||
"""
|
||||
|
||||
[inputs.candidate_name]
|
||||
type = "string"
|
||||
required = false
|
||||
default = "unknown"
|
||||
description = "Human-readable label used in evidence records and attack filenames."
|
||||
|
||||
[inputs.optimizer_profile]
|
||||
type = "string"
|
||||
required = false
|
||||
default = "push3-default"
|
||||
description = "Named optimizer profile / variant (e.g. push3-default, evo_run004_champion)."
|
||||
|
||||
[inputs.attack_dir]
|
||||
type = "string"
|
||||
required = false
|
||||
default = "onchain/script/backtesting/attacks"
|
||||
description = "Directory containing existing .jsonl attack patterns to run as the structured attack suite."
|
||||
|
||||
[inputs.claude_timeout]
|
||||
type = "integer"
|
||||
required = false
|
||||
default = 7200
|
||||
description = "Timeout in seconds for the adversarial agent run (maps to CLAUDE_TIMEOUT env var)."
|
||||
|
||||
# ── Execution ──────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# red-team.sh owns the full lifecycle:
|
||||
# bootstrap-light → fund LM → snapshot → attack suite → adversarial agent
|
||||
# → collect → promote-attacks → deliver → teardown.
|
||||
#
|
||||
# The orchestrator sets CANDIDATE / CANDIDATE_NAME / OPTIMIZER_PROFILE env vars
|
||||
# before invoking the script so the agent and evidence record name the candidate
|
||||
# correctly.
|
||||
|
||||
[execution]
|
||||
script = "scripts/harb-evaluator/red-team.sh"
|
||||
invocation = "CANDIDATE={candidate} CANDIDATE_NAME={candidate_name} OPTIMIZER_PROFILE={optimizer_profile} CLAUDE_TIMEOUT={claude_timeout} bash scripts/harb-evaluator/red-team.sh"
|
||||
|
||||
# Exit codes propagated by red-team.sh:
|
||||
# 0 floor held (no confirmed ETH extraction across all attacks + agent run)
|
||||
# 1 floor broken (adversary extracted ETH from LiquidityManager)
|
||||
# 2 infra error (Anvil unreachable, bootstrap failed, missing dependency, etc.)
|
||||
|
||||
# ── Steps ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
[[steps]]
|
||||
id = "stack-up"
|
||||
description = """
|
||||
Bootstrap an isolated Anvil fork with the candidate optimizer deployed.
|
||||
scripts/harb-evaluator/bootstrap-light.sh:
|
||||
- Transpiles {candidate} via push3-transpiler and compiles via forge.
|
||||
Aborts with exit 2 if transpilation or compilation fails.
|
||||
- Deploys KRK, LM, Stake, and OptimizerProxy with the compiled candidate
|
||||
as the active optimizer implementation.
|
||||
- Funds LM with 1 000 ETH (as WETH) and calls recenter() to deploy
|
||||
liquidity into positions — establishing a realistic baseline.
|
||||
- Verifies Anvil responds and all contract addresses are present in
|
||||
onchain/deployments-local.json before proceeding.
|
||||
"""
|
||||
|
||||
[[steps]]
|
||||
id = "run-attack-suite"
|
||||
description = """
|
||||
Run every existing .jsonl attack file in {attack_dir} through
|
||||
onchain/script/backtesting/AttackRunner.s.sol.
|
||||
For each file:
|
||||
- Record LM total ETH before and after (forge script LmTotalEth.s.sol —
|
||||
exact Uniswap V3 integer math, not a cast-call approximation).
|
||||
- Revert to the baseline Anvil snapshot between files so attacks are
|
||||
independent.
|
||||
- Emit one result entry per file: strategy name, abstract op pattern,
|
||||
floor held / broken, delta in basis points.
|
||||
This phase exhausts the known attack catalogue before the adversarial
|
||||
agent is given a turn, and seeds the agent's memory with which strategies
|
||||
are already understood.
|
||||
"""
|
||||
attack_source = "{attack_dir}/*.jsonl"
|
||||
forge_script = "onchain/script/backtesting/AttackRunner.s.sol"
|
||||
snapshot_mode = "revert-between-attacks"
|
||||
|
||||
[[steps]]
|
||||
id = "run-adversarial-agent"
|
||||
description = """
|
||||
Spawn the Claude adversarial agent (red-team-program.md prompt) with full
|
||||
write access to cast / forge / python3 / jq.
|
||||
Goal: make ethPerToken() decrease — i.e. extract ETH from LiquidityManager.
|
||||
The agent:
|
||||
1. Reads the attack-suite results to skip known-dead strategies.
|
||||
2. Iterates freely: snapshot → craft novel attack → execute → measure
|
||||
→ revert → repeat.
|
||||
3. Appends each attempted strategy to tmp/red-team-report.txt and
|
||||
tmp/red-team-stream.jsonl.
|
||||
4. On any confirmed ETH decrease: exports the winning op sequence to
|
||||
tmp/red-team-attacks.jsonl and continues searching for additional vectors.
|
||||
Runs until CLAUDE_TIMEOUT expires or the agent signals completion.
|
||||
"""
|
||||
timeout_env = "CLAUDE_TIMEOUT"
|
||||
memory_file = "tmp/red-team-memory.jsonl" # cross-run pattern learning
|
||||
report_file = "tmp/red-team-report.txt"
|
||||
stream_file = "tmp/red-team-stream.jsonl"
|
||||
|
||||
[[steps]]
|
||||
id = "collect"
|
||||
description = """
|
||||
Aggregate attack-suite results and agent findings into the evidence record.
|
||||
Reads tmp/red-team-report.txt + tmp/red-team-stream.jsonl and computes:
|
||||
- floor_held: true if LM total ETH never decreased
|
||||
- eth_extracted: lm_eth_before − lm_eth_after (0 if floor held)
|
||||
- per-attack entries: strategy, pattern, result (HELD/DECREASED/INCREASED),
|
||||
delta_bps, insight
|
||||
Writes evidence/red-team/{date}.json conforming to the schema in
|
||||
evidence/README.md §red-team.
|
||||
"""
|
||||
output = "evidence/red-team/{date}.json"
|
||||
schema = "evidence/README.md" # see §Schema: red-team/YYYY-MM-DD.json
|
||||
|
||||
[[steps]]
|
||||
id = "export-vectors"
|
||||
description = """
|
||||
If tmp/red-team-attacks.jsonl is non-empty, call promote-attacks.sh to open
|
||||
a PR with the newly discovered attack vectors.
|
||||
promote-attacks.sh:
|
||||
- Deduplicates by op-type fingerprint against existing files in
|
||||
onchain/script/backtesting/attacks/.
|
||||
- Auto-classifies the attack type (staking, il-crystallization,
|
||||
floor-ratchet, fee-drain, lp-manipulation, floor-attack, …).
|
||||
- Creates a git branch, commits the new .jsonl, and opens a Codeberg PR
|
||||
targeting main.
|
||||
- Exits 0 when no novel patterns remain after deduplication (non-fatal).
|
||||
Skipped gracefully if CODEBERG_TOKEN and ~/.netrc are both absent.
|
||||
"""
|
||||
script = "scripts/harb-evaluator/promote-attacks.sh"
|
||||
args = "--attacks tmp/red-team-attacks.jsonl --candidate {candidate_name} --profile {optimizer_profile}"
|
||||
|
||||
[[steps]]
|
||||
id = "stack-down"
|
||||
description = """
|
||||
Tear down the Anvil instance started in stack-up.
|
||||
red-team.sh registers cleanup() as a shell trap (EXIT / INT / TERM):
|
||||
- Reverts to the baseline Anvil snapshot.
|
||||
- Kills the Claude sub-process if still running.
|
||||
Always runs — even on infra error — so port 8545 is not left occupied.
|
||||
"""
|
||||
|
||||
[[steps]]
|
||||
id = "deliver"
|
||||
description = """
|
||||
Commit evidence/red-team/{date}.json to main and post a summary comment
|
||||
to the originating issue.
|
||||
Comment includes:
|
||||
- Verdict (floor_held / floor_broken).
|
||||
- ETH extracted (formatted in ETH) and delta in basis points.
|
||||
- Total attacks tried: structured suite count + agent-discovered count.
|
||||
- Link to committed evidence file.
|
||||
- If novel vectors were promoted: link to the attack-vector PR.
|
||||
On floor_broken: also include the highest-yield attack strategy name and
|
||||
its abstract op pattern.
|
||||
"""
|
||||
|
||||
# ── Products ───────────────────────────────────────────────────────────────────
|
||||
|
||||
[products.evidence_file]
|
||||
path = "evidence/red-team/{date}.json"
|
||||
delivery = "commit to main"
|
||||
schema = "evidence/README.md" # see §Schema: red-team/YYYY-MM-DD.json
|
||||
|
||||
[products.attack_vectors]
|
||||
path = "onchain/script/backtesting/attacks/{attack_type}-{candidate_name}.jsonl"
|
||||
delivery = "PR to main"
|
||||
script = "scripts/harb-evaluator/promote-attacks.sh"
|
||||
note = "Only created when novel (deduplicated) attack vectors are discovered; skipped otherwise."
|
||||
|
||||
[products.issue_comment]
|
||||
delivery = "post to originating issue"
|
||||
content = "verdict (floor_held/floor_broken), ETH extracted, attacks tried, link to evidence file; if vectors found: link to attack-vector PR"
|
||||
on_failure = "include highest-yield attack name and op pattern; full agent transcript available in tmp/red-team-stream.jsonl"
|
||||
|
||||
# ── Resources ──────────────────────────────────────────────────────────────────
|
||||
|
||||
[resources]
|
||||
profile = "heavy"
|
||||
compute = "local — Anvil fork + revm, no Docker required"
|
||||
rpc = "Anvil (bootstrap-light, default port 8545)"
|
||||
agent = "Claude (claude CLI, CLAUDE_TIMEOUT seconds)"
|
||||
concurrency = "exclusive — shares Anvil port 8545 with run-holdout and other heavy formulas"
|
||||
|
||||
# ── Notes ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
[notes]
|
||||
floor_metric = """
|
||||
The primary safety metric is LM total ETH: free ETH + free WETH + ETH locked
|
||||
across all three Uniswap V3 positions (floor, anchor, discovery).
|
||||
Measured via forge script LmTotalEth.s.sol using exact Uniswap V3 integer
|
||||
math (LiquidityAmounts + TickMath). A decrease in total ETH = floor broken.
|
||||
"""
|
||||
|
||||
attack_dedup = """
|
||||
promote-attacks.sh fingerprints each candidate attack by its abstract op
|
||||
sequence (e.g. wrap → buy → stake → recenter_multi → sell) and compares
|
||||
against all existing files in onchain/script/backtesting/attacks/.
|
||||
Only genuinely novel sequences are included in the PR — duplicate
|
||||
rediscoveries are silently dropped and the step exits 0.
|
||||
"""
|
||||
|
||||
candidate_injection = """
|
||||
The candidate Push3 program must be transpiled and compiled before the stack
|
||||
boots. Transpilation failures are reported as infra errors (exit 2) so the
|
||||
orchestrator can retry with a different candidate rather than wasting time on
|
||||
a broken program.
|
||||
"""
|
||||
Loading…
Add table
Add a link
Reference in a new issue