2026-03-19 10:16:38 +00:00
|
|
|
|
# formulas/run-red-team.toml
|
|
|
|
|
|
#
|
2026-03-19 10:48:59 +00:00
|
|
|
|
# Adversarial red-team — spin up isolated stack, run adversarial agent against
|
|
|
|
|
|
# the active optimizer, commit evidence, export newly discovered attack vectors.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
#
|
|
|
|
|
|
# Type: act. Produces evidence (floor held / broken) AND git artifacts
|
|
|
|
|
|
# (new attack vectors via PR to onchain/script/backtesting/attacks/).
|
|
|
|
|
|
#
|
|
|
|
|
|
# Depends on: #973 (evidence/red-team/ directory structure)
|
|
|
|
|
|
# #974 (promote-attacks.sh for attack vector export)
|
|
|
|
|
|
|
|
|
|
|
|
[formula]
|
|
|
|
|
|
id = "run-red-team"
|
|
|
|
|
|
name = "Adversarial Red-Team"
|
2026-03-19 10:48:59 +00:00
|
|
|
|
description = "Spin up isolated stack, run adversarial agent against the active optimizer, commit evidence, export new attack vectors."
|
2026-03-19 10:16:38 +00:00
|
|
|
|
type = "act"
|
|
|
|
|
|
# "sense" → read-only, produces metrics only
|
|
|
|
|
|
# "act" → produces git artifacts (cf. run-evolution, run-red-team)
|
|
|
|
|
|
depends_on = [973, 974]
|
|
|
|
|
|
|
|
|
|
|
|
# ── Inputs ─────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
[inputs.candidate_name]
|
|
|
|
|
|
type = "string"
|
|
|
|
|
|
required = false
|
|
|
|
|
|
default = "unknown"
|
2026-03-19 10:48:59 +00:00
|
|
|
|
description = "Human-readable label used in evidence records and attack filenames (passed as CANDIDATE_NAME)."
|
2026-03-19 10:16:38 +00:00
|
|
|
|
|
|
|
|
|
|
[inputs.optimizer_profile]
|
|
|
|
|
|
type = "string"
|
|
|
|
|
|
required = false
|
|
|
|
|
|
default = "push3-default"
|
2026-03-19 10:48:59 +00:00
|
|
|
|
description = "Named optimizer profile / variant (e.g. push3-default, evo_run004_champion) passed as OPTIMIZER_PROFILE."
|
2026-03-19 10:16:38 +00:00
|
|
|
|
|
|
|
|
|
|
[inputs.attack_dir]
|
|
|
|
|
|
type = "string"
|
|
|
|
|
|
required = false
|
|
|
|
|
|
default = "onchain/script/backtesting/attacks"
|
2026-03-19 10:48:59 +00:00
|
|
|
|
description = """
|
|
|
|
|
|
Directory containing existing .jsonl attack patterns for the structured
|
|
|
|
|
|
attack suite. Not yet forwarded to red-team.sh — see run-attack-suite step.
|
|
|
|
|
|
"""
|
|
|
|
|
|
status = "planned" # consumed only when run-attack-suite is implemented
|
2026-03-19 10:16:38 +00:00
|
|
|
|
|
|
|
|
|
|
[inputs.claude_timeout]
|
|
|
|
|
|
type = "integer"
|
|
|
|
|
|
required = false
|
|
|
|
|
|
default = 7200
|
|
|
|
|
|
description = "Timeout in seconds for the adversarial agent run (maps to CLAUDE_TIMEOUT env var)."
|
|
|
|
|
|
|
|
|
|
|
|
# ── Execution ──────────────────────────────────────────────────────────────────
|
|
|
|
|
|
#
|
|
|
|
|
|
# red-team.sh owns the full lifecycle:
|
2026-03-19 10:48:59 +00:00
|
|
|
|
# bootstrap-light → fund LM → snapshot → adversarial agent → collect
|
|
|
|
|
|
# → promote-attacks (if floor broken) → deliver → teardown.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
#
|
2026-03-19 10:48:59 +00:00
|
|
|
|
# CANDIDATE_NAME and OPTIMIZER_PROFILE label the evidence record and attack
|
2026-03-22 13:19:48 +00:00
|
|
|
|
# filenames. To deploy a specific Push3 candidate, set the CANDIDATE env var
|
|
|
|
|
|
# (path to a .push3 file) — bootstrap-light.sh will transpile, recompile, and
|
|
|
|
|
|
# upgrade the Optimizer proxy to OptimizerV3 (see notes.candidate_injection).
|
2026-03-19 10:16:38 +00:00
|
|
|
|
|
|
|
|
|
|
[execution]
|
|
|
|
|
|
script = "scripts/harb-evaluator/red-team.sh"
|
2026-03-19 10:48:59 +00:00
|
|
|
|
invocation = "CANDIDATE_NAME={candidate_name} OPTIMIZER_PROFILE={optimizer_profile} CLAUDE_TIMEOUT={claude_timeout} bash scripts/harb-evaluator/red-team.sh"
|
2026-03-19 10:16:38 +00:00
|
|
|
|
|
|
|
|
|
|
# Exit codes propagated by red-team.sh:
|
2026-03-19 10:48:59 +00:00
|
|
|
|
# 0 floor held (LM total ETH did not decrease)
|
2026-03-19 10:16:38 +00:00
|
|
|
|
# 1 floor broken (adversary extracted ETH from LiquidityManager)
|
|
|
|
|
|
# 2 infra error (Anvil unreachable, bootstrap failed, missing dependency, etc.)
|
|
|
|
|
|
|
|
|
|
|
|
# ── Steps ──────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
[[steps]]
|
|
|
|
|
|
id = "stack-up"
|
|
|
|
|
|
description = """
|
2026-03-19 10:48:59 +00:00
|
|
|
|
Bootstrap an isolated Anvil fork with contracts deployed.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
scripts/harb-evaluator/bootstrap-light.sh:
|
2026-03-19 10:48:59 +00:00
|
|
|
|
- Starts a fresh Anvil instance (or reuses one if already running).
|
|
|
|
|
|
- Deploys KRK, LM, Stake, and OptimizerProxy via DeployLocal.sol.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
- Funds LM with 1 000 ETH (as WETH) and calls recenter() to deploy
|
|
|
|
|
|
liquidity into positions — establishing a realistic baseline.
|
|
|
|
|
|
- Verifies Anvil responds and all contract addresses are present in
|
|
|
|
|
|
onchain/deployments-local.json before proceeding.
|
2026-03-22 13:19:48 +00:00
|
|
|
|
When the CANDIDATE env var is set (path to a .push3 file), bootstrap-light.sh
|
|
|
|
|
|
transpiles the candidate and upgrades the Optimizer proxy to OptimizerV3.
|
|
|
|
|
|
See notes.candidate_injection for details.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
[[steps]]
|
|
|
|
|
|
id = "run-attack-suite"
|
|
|
|
|
|
description = """
|
|
|
|
|
|
Run every existing .jsonl attack file in {attack_dir} through
|
|
|
|
|
|
onchain/script/backtesting/AttackRunner.s.sol.
|
|
|
|
|
|
For each file:
|
2026-03-19 10:48:59 +00:00
|
|
|
|
- Record LM total ETH before and after via forge script LmTotalEth.s.sol.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
- Revert to the baseline Anvil snapshot between files so attacks are
|
|
|
|
|
|
independent.
|
2026-03-19 10:48:59 +00:00
|
|
|
|
- Emit one result entry: strategy name, abstract op pattern,
|
2026-03-19 10:16:38 +00:00
|
|
|
|
floor held / broken, delta in basis points.
|
|
|
|
|
|
This phase exhausts the known attack catalogue before the adversarial
|
2026-03-19 10:48:59 +00:00
|
|
|
|
agent is given a turn, seeding its memory with which strategies are
|
|
|
|
|
|
already understood.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
"""
|
|
|
|
|
|
attack_source = "{attack_dir}/*.jsonl"
|
|
|
|
|
|
forge_script = "onchain/script/backtesting/AttackRunner.s.sol"
|
|
|
|
|
|
snapshot_mode = "revert-between-attacks"
|
2026-03-19 10:48:59 +00:00
|
|
|
|
status = "planned" # not yet implemented in red-team.sh; tracked for future PR
|
2026-03-19 10:16:38 +00:00
|
|
|
|
|
|
|
|
|
|
[[steps]]
|
|
|
|
|
|
id = "run-adversarial-agent"
|
|
|
|
|
|
description = """
|
|
|
|
|
|
Spawn the Claude adversarial agent (red-team-program.md prompt) with full
|
|
|
|
|
|
write access to cast / forge / python3 / jq.
|
|
|
|
|
|
Goal: make ethPerToken() decrease — i.e. extract ETH from LiquidityManager.
|
|
|
|
|
|
The agent:
|
2026-03-19 10:48:59 +00:00
|
|
|
|
1. Iterates freely: snapshot → craft novel attack → execute → measure
|
2026-03-19 10:16:38 +00:00
|
|
|
|
→ revert → repeat.
|
2026-03-19 10:48:59 +00:00
|
|
|
|
2. Appends each attempted strategy to tmp/red-team-report.txt and
|
2026-03-19 10:16:38 +00:00
|
|
|
|
tmp/red-team-stream.jsonl.
|
2026-03-19 10:48:59 +00:00
|
|
|
|
3. On any confirmed ETH decrease: exports the winning op sequence to
|
|
|
|
|
|
tmp/red-team-attacks.jsonl and continues searching.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
Runs until CLAUDE_TIMEOUT expires or the agent signals completion.
|
|
|
|
|
|
"""
|
|
|
|
|
|
timeout_env = "CLAUDE_TIMEOUT"
|
|
|
|
|
|
memory_file = "tmp/red-team-memory.jsonl" # cross-run pattern learning
|
|
|
|
|
|
report_file = "tmp/red-team-report.txt"
|
|
|
|
|
|
stream_file = "tmp/red-team-stream.jsonl"
|
|
|
|
|
|
|
|
|
|
|
|
[[steps]]
|
|
|
|
|
|
id = "collect"
|
|
|
|
|
|
description = """
|
2026-03-19 10:48:59 +00:00
|
|
|
|
After the agent run, red-team.sh:
|
|
|
|
|
|
1. Reads LM total ETH after (forge script LmTotalEth.s.sol).
|
|
|
|
|
|
2. Extracts strategy findings from tmp/red-team-stream.jsonl and appends
|
|
|
|
|
|
them to tmp/red-team-memory.jsonl for cross-run learning.
|
|
|
|
|
|
3. Exports the agent's cast send commands from the stream log to
|
|
|
|
|
|
tmp/red-team-attacks.jsonl via export-attacks.py.
|
|
|
|
|
|
4. Replays the exported sequence through AttackRunner.s.sol, writing full
|
|
|
|
|
|
state snapshots to tmp/red-team-snapshots.jsonl (used for optimizer
|
|
|
|
|
|
training; non-fatal if replay produces no output).
|
|
|
|
|
|
5. Computes floor_held / floor_broken and writes evidence/red-team/{date}.json
|
|
|
|
|
|
conforming to the schema in evidence/README.md ## Schema: red-team/.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
"""
|
2026-03-19 10:48:59 +00:00
|
|
|
|
output = "evidence/red-team/{date}.json"
|
|
|
|
|
|
schema = "evidence/README.md" # see ## Schema: red-team/YYYY-MM-DD.json
|
|
|
|
|
|
side_output_file = "tmp/red-team-snapshots.jsonl" # AttackRunner state snapshots for optimizer training
|
2026-03-19 10:16:38 +00:00
|
|
|
|
|
|
|
|
|
|
[[steps]]
|
|
|
|
|
|
id = "export-vectors"
|
|
|
|
|
|
description = """
|
2026-03-19 10:48:59 +00:00
|
|
|
|
Only runs when the floor is broken (BROKE=true in red-team.sh).
|
2026-03-19 10:16:38 +00:00
|
|
|
|
If tmp/red-team-attacks.jsonl is non-empty, call promote-attacks.sh to open
|
2026-03-19 10:48:59 +00:00
|
|
|
|
a Codeberg PR with the newly discovered attack vectors.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
promote-attacks.sh:
|
|
|
|
|
|
- Deduplicates by op-type fingerprint against existing files in
|
|
|
|
|
|
onchain/script/backtesting/attacks/.
|
|
|
|
|
|
- Auto-classifies the attack type (staking, il-crystallization,
|
|
|
|
|
|
floor-ratchet, fee-drain, lp-manipulation, floor-attack, …).
|
|
|
|
|
|
- Creates a git branch, commits the new .jsonl, and opens a Codeberg PR
|
2026-03-19 10:48:59 +00:00
|
|
|
|
targeting main, including the ETH extraction amount in the PR title and body.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
- Exits 0 when no novel patterns remain after deduplication (non-fatal).
|
|
|
|
|
|
Skipped gracefully if CODEBERG_TOKEN and ~/.netrc are both absent.
|
2026-03-19 10:48:59 +00:00
|
|
|
|
Not called when the floor holds — novel-but-non-exploiting patterns are
|
|
|
|
|
|
not promoted.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
"""
|
|
|
|
|
|
script = "scripts/harb-evaluator/promote-attacks.sh"
|
2026-03-19 10:48:59 +00:00
|
|
|
|
args = "--attacks tmp/red-team-attacks.jsonl --candidate {candidate_name} --profile {optimizer_profile} --eth-extracted <delta_wei> --eth-before <lm_eth_before_wei>"
|
|
|
|
|
|
# --eth-extracted and --eth-before are computed at runtime by red-team.sh (lm_eth_before − lm_eth_after)
|
|
|
|
|
|
# and passed directly to promote-attacks.sh — they are not formula inputs.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
|
|
|
|
|
|
[[steps]]
|
|
|
|
|
|
id = "stack-down"
|
|
|
|
|
|
description = """
|
|
|
|
|
|
Tear down the Anvil instance started in stack-up.
|
|
|
|
|
|
red-team.sh registers cleanup() as a shell trap (EXIT / INT / TERM):
|
|
|
|
|
|
- Reverts to the baseline Anvil snapshot.
|
|
|
|
|
|
- Kills the Claude sub-process if still running.
|
|
|
|
|
|
Always runs — even on infra error — so port 8545 is not left occupied.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
[[steps]]
|
|
|
|
|
|
id = "deliver"
|
|
|
|
|
|
description = """
|
|
|
|
|
|
Commit evidence/red-team/{date}.json to main and post a summary comment
|
|
|
|
|
|
to the originating issue.
|
|
|
|
|
|
Comment includes:
|
|
|
|
|
|
- Verdict (floor_held / floor_broken).
|
|
|
|
|
|
- ETH extracted (formatted in ETH) and delta in basis points.
|
2026-03-19 10:48:59 +00:00
|
|
|
|
- Total attacks tried (agent-discovered count; structured suite count
|
|
|
|
|
|
added once run-attack-suite is implemented).
|
2026-03-19 10:16:38 +00:00
|
|
|
|
- Link to committed evidence file.
|
|
|
|
|
|
- If novel vectors were promoted: link to the attack-vector PR.
|
|
|
|
|
|
On floor_broken: also include the highest-yield attack strategy name and
|
|
|
|
|
|
its abstract op pattern.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
# ── Products ───────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
[products.evidence_file]
|
|
|
|
|
|
path = "evidence/red-team/{date}.json"
|
|
|
|
|
|
delivery = "commit to main"
|
2026-03-19 10:48:59 +00:00
|
|
|
|
schema = "evidence/README.md" # see ## Schema: red-team/YYYY-MM-DD.json
|
2026-03-19 10:16:38 +00:00
|
|
|
|
|
|
|
|
|
|
[products.attack_vectors]
|
|
|
|
|
|
path = "onchain/script/backtesting/attacks/{attack_type}-{candidate_name}.jsonl"
|
2026-03-19 10:48:59 +00:00
|
|
|
|
# {attack_type} is not a formula input — it is computed at runtime by
|
|
|
|
|
|
# promote-attacks.sh's classifier (staking, il-crystallization, floor-ratchet, …).
|
2026-03-19 10:16:38 +00:00
|
|
|
|
delivery = "PR to main"
|
|
|
|
|
|
script = "scripts/harb-evaluator/promote-attacks.sh"
|
2026-03-19 10:48:59 +00:00
|
|
|
|
note = "Only created when the floor is broken AND novel (deduplicated) attack vectors are discovered."
|
2026-03-19 10:16:38 +00:00
|
|
|
|
|
|
|
|
|
|
[products.issue_comment]
|
|
|
|
|
|
delivery = "post to originating issue"
|
|
|
|
|
|
content = "verdict (floor_held/floor_broken), ETH extracted, attacks tried, link to evidence file; if vectors found: link to attack-vector PR"
|
|
|
|
|
|
on_failure = "include highest-yield attack name and op pattern; full agent transcript available in tmp/red-team-stream.jsonl"
|
|
|
|
|
|
|
|
|
|
|
|
# ── Resources ──────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
[resources]
|
|
|
|
|
|
profile = "heavy"
|
|
|
|
|
|
compute = "local — Anvil fork + revm, no Docker required"
|
|
|
|
|
|
rpc = "Anvil (bootstrap-light, default port 8545)"
|
|
|
|
|
|
agent = "Claude (claude CLI, CLAUDE_TIMEOUT seconds)"
|
|
|
|
|
|
concurrency = "exclusive — shares Anvil port 8545 with run-holdout and other heavy formulas"
|
|
|
|
|
|
|
|
|
|
|
|
# ── Notes ──────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
[notes]
|
|
|
|
|
|
floor_metric = """
|
|
|
|
|
|
The primary safety metric is LM total ETH: free ETH + free WETH + ETH locked
|
|
|
|
|
|
across all three Uniswap V3 positions (floor, anchor, discovery).
|
|
|
|
|
|
Measured via forge script LmTotalEth.s.sol using exact Uniswap V3 integer
|
|
|
|
|
|
math (LiquidityAmounts + TickMath). A decrease in total ETH = floor broken.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
attack_dedup = """
|
|
|
|
|
|
promote-attacks.sh fingerprints each candidate attack by its abstract op
|
|
|
|
|
|
sequence (e.g. wrap → buy → stake → recenter_multi → sell) and compares
|
|
|
|
|
|
against all existing files in onchain/script/backtesting/attacks/.
|
|
|
|
|
|
Only genuinely novel sequences are included in the PR — duplicate
|
|
|
|
|
|
rediscoveries are silently dropped and the step exits 0.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
candidate_injection = """
|
2026-03-22 13:19:48 +00:00
|
|
|
|
Push3 candidate injection is supported via the CANDIDATE env var in
|
|
|
|
|
|
bootstrap-light.sh. When CANDIDATE points to a .push3 file the script:
|
|
|
|
|
|
1. Invokes push3-transpiler to regenerate OptimizerV3Push3.sol.
|
|
|
|
|
|
2. Extracts the function body into OptimizerV3Push3Lib.sol (shared library).
|
|
|
|
|
|
3. Deploys contracts normally via DeployLocal.sol (Optimizer v1 behind UUPS proxy).
|
|
|
|
|
|
4. Deploys a fresh OptimizerV3 implementation and upgrades the proxy via upgradeTo().
|
|
|
|
|
|
The candidate_name and optimizer_profile inputs remain metadata-only (evidence
|
|
|
|
|
|
records, attack filenames, PR titles).
|
2026-03-19 10:48:59 +00:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
run_attack_suite_gap = """
|
|
|
|
|
|
The run-attack-suite step (structured loop over attacks/*.jsonl via
|
|
|
|
|
|
AttackRunner.s.sol with snapshot revert between files) is not yet implemented
|
|
|
|
|
|
in red-team.sh. The current script runs only the adversarial Claude agent.
|
|
|
|
|
|
The step is documented here as a forward spec; implementation is tracked as a
|
|
|
|
|
|
follow-up issue.
|
2026-03-19 10:16:38 +00:00
|
|
|
|
"""
|