From 52ba6b2f383d0a4766189141a75cd9bc7bd8da78 Mon Sep 17 00:00:00 2001 From: johba Date: Sun, 22 Mar 2026 15:30:46 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20run-attack-suite=20is=20spec-only=20?= =?UTF-8?q?=E2=80=94=20no=20implementation=20in=20red-team.sh=20(#1000)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the attack catalogue loop (step 5a) in red-team.sh that was previously a forward spec in the formula. The loop replays every *.jsonl attack file through AttackRunner.s.sol with snapshot revert between files, records LM total ETH before/after each attack, and injects results into the adversarial agent prompt so it knows which strategies are already catalogued. Co-Authored-By: Claude Opus 4.6 (1M context) --- formulas/run-red-team.toml | 16 ++--- scripts/harb-evaluator/red-team-program.md | 2 + scripts/harb-evaluator/red-team.sh | 79 +++++++++++++++++++++- 3 files changed, 86 insertions(+), 11 deletions(-) diff --git a/formulas/run-red-team.toml b/formulas/run-red-team.toml index c99b81e..8eaa2f8 100644 --- a/formulas/run-red-team.toml +++ b/formulas/run-red-team.toml @@ -38,9 +38,8 @@ required = false default = "onchain/script/backtesting/attacks" description = """ Directory containing existing .jsonl attack patterns for the structured -attack suite. Not yet forwarded to red-team.sh — see run-attack-suite step. +attack suite. Forwarded to red-team.sh as ATTACK_DIR. """ -status = "planned" # consumed only when run-attack-suite is implemented [inputs.claude_timeout] type = "integer" @@ -104,7 +103,6 @@ already understood. attack_source = "{attack_dir}/*.jsonl" forge_script = "onchain/script/backtesting/AttackRunner.s.sol" snapshot_mode = "revert-between-attacks" -status = "planned" # not yet implemented in red-team.sh; tracked for future PR [[steps]] id = "run-adversarial-agent" @@ -186,8 +184,7 @@ to the originating issue. Comment includes: - Verdict (floor_held / floor_broken). - ETH extracted (formatted in ETH) and delta in basis points. - - Total attacks tried (agent-discovered count; structured suite count - added once run-attack-suite is implemented). + - Total attacks tried (agent-discovered count + structured suite count). - Link to committed evidence file. - If novel vectors were promoted: link to the attack-vector PR. On floor_broken: also include the highest-yield attack strategy name and @@ -253,9 +250,8 @@ records, attack filenames, PR titles). """ run_attack_suite_gap = """ -The run-attack-suite step (structured loop over attacks/*.jsonl via -AttackRunner.s.sol with snapshot revert between files) is not yet implemented -in red-team.sh. The current script runs only the adversarial Claude agent. -The step is documented here as a forward spec; implementation is tracked as a -follow-up issue. +The run-attack-suite step is implemented in red-team.sh (step 5a). It loops +through every *.jsonl file in the attack directory, replays each through +AttackRunner.s.sol, records LM total ETH before/after with snapshot revert +between files, and injects results into the agent prompt. """ diff --git a/scripts/harb-evaluator/red-team-program.md b/scripts/harb-evaluator/red-team-program.md index ba2ef20..4fc91b0 100644 --- a/scripts/harb-evaluator/red-team-program.md +++ b/scripts/harb-evaluator/red-team-program.md @@ -325,6 +325,8 @@ SNAP=$(/home/debian/.foundry/bin/cast rpc anvil_snapshot --rpc-url http://localh --- +{{ATTACK_SUITE_RESULTS}} + {{CROSS_CANDIDATE_SECTION}} {{MEMORY_SECTION}} diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index 55097c5..673879a 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -454,6 +454,79 @@ log "Reading floor before agent run..." LM_ETH_BEFORE=$(compute_lm_total_eth) log " lm_eth_before = $LM_ETH_BEFORE wei" +# ── 5a. Run attack catalogue (structured suite) ────────────────────────────── +# Loop through every existing .jsonl attack file in the attacks directory, +# replay each through AttackRunner.s.sol, record LM total ETH before/after, +# and revert to the baseline snapshot between files so attacks are independent. +ATTACK_DIR="${ATTACK_DIR:-$REPO_ROOT/onchain/script/backtesting/attacks}" +ATTACK_SUITE_RESULTS="" +ATTACK_SUITE_COUNT=0 + +if [[ -d "$ATTACK_DIR" ]]; then + mapfile -t ATTACK_FILES < <(find "$ATTACK_DIR" -maxdepth 1 -name '*.jsonl' -type f | sort) + if [[ ${#ATTACK_FILES[@]} -gt 0 ]]; then + log "Running attack catalogue (${#ATTACK_FILES[@]} files in $ATTACK_DIR)..." + ATTACK_SUITE_RESULTS="## Attack Catalogue Results (pre-run structured suite) + +These attacks were replayed from the known catalogue before your session. +Do NOT repeat these strategies. Focus on novel approaches instead. + +" + for attack_file in "${ATTACK_FILES[@]}"; do + attack_name=$(basename "$attack_file" .jsonl) + log " Running attack: $attack_name ..." + + # Record LM ETH before this attack + suite_eth_before=$(compute_lm_total_eth) + + # Run AttackRunner + set +e + suite_output=$(cd "$REPO_ROOT/onchain" && \ + ATTACK_FILE="$attack_file" \ + DEPLOYMENTS_FILE="deployments-local.json" \ + SWAP_ROUTER="$SWAP_ROUTER" \ + NPM_ADDR="$NPM" \ + "$FORGE" script script/backtesting/AttackRunner.s.sol \ + --rpc-url "$RPC_URL" --broadcast 2>&1) + suite_exit=$? + set -e + + # Record LM ETH after this attack + if [[ $suite_exit -eq 0 ]]; then + suite_eth_after=$(compute_lm_total_eth) + suite_delta_bps=$(python3 -c " +b=int('$suite_eth_before'); a=int('$suite_eth_after') +print(round((a - b) * 10000 / b) if b else 0) +") + if python3 -c "import sys; sys.exit(0 if int('$suite_eth_after') < int('$suite_eth_before') else 1)"; then + suite_verdict="FLOOR_BROKEN" + else + suite_verdict="FLOOR_HELD" + fi + log " $attack_name: $suite_verdict (${suite_delta_bps} bps)" + ATTACK_SUITE_RESULTS+="- **$attack_name**: $suite_verdict (delta: ${suite_delta_bps} bps, before: $suite_eth_before, after: $suite_eth_after) +" + else + log " $attack_name: REPLAY_ERROR (exit $suite_exit)" + ATTACK_SUITE_RESULTS+="- **$attack_name**: REPLAY_ERROR (forge exit $suite_exit) +" + fi + + ATTACK_SUITE_COUNT=$((ATTACK_SUITE_COUNT + 1)) + + # Revert to baseline snapshot so next attack starts from clean state + "$CAST" rpc anvil_revert "$SNAP" --rpc-url "$RPC_URL" >/dev/null 2>&1 || true + # Re-take snapshot (anvil_revert is one-shot) + SNAP=$("$CAST" rpc anvil_snapshot --rpc-url "$RPC_URL" | tr -d '"') + done + log "Attack catalogue complete: $ATTACK_SUITE_COUNT files processed" + else + log "No .jsonl files found in $ATTACK_DIR — skipping attack catalogue" + fi +else + log "Attack directory not found ($ATTACK_DIR) — skipping attack catalogue" +fi + # ── 6. Build agent prompt ────────────────────────────────────────────────────── # ── 6a. Read Solidity source files (reflect the current candidate after inject) ─ @@ -642,6 +715,7 @@ PROMPT=${PROMPT//\{\{SOL_VWAP\}\}/$SOL_VWAP} PROMPT=${PROMPT//\{\{SOL_PRICE_ORACLE\}\}/$SOL_PRICE_ORACLE} PROMPT=${PROMPT//\{\{SOL_KRAIKEN\}\}/$SOL_KRAIKEN} PROMPT=${PROMPT//\{\{SOL_STAKE\}\}/$SOL_STAKE} +PROMPT=${PROMPT//\{\{ATTACK_SUITE_RESULTS\}\}/$ATTACK_SUITE_RESULTS} PROMPT=${PROMPT//\{\{CROSS_CANDIDATE_SECTION\}\}/$CROSS_CANDIDATE_SECTION} PROMPT=${PROMPT//\{\{MEMORY_SECTION\}\}/$MEMORY_SECTION} @@ -773,7 +847,8 @@ fi python3 - "$EVIDENCE_FILE" "$REPO_ROOT/tmp/red-team-memory.jsonl" \ "$EVIDENCE_DATE" "$CANDIDATE_NAME" "$CANDIDATE_COMMIT" "$OPTIMIZER_PROFILE" \ - "$LM_ETH_BEFORE" "$LM_ETH_AFTER" "$_eth_extracted" "$_floor_held" "$_verdict" <<'PYEOF' + "$LM_ETH_BEFORE" "$LM_ETH_AFTER" "$_eth_extracted" "$_floor_held" "$_verdict" \ + "$ATTACK_SUITE_COUNT" <<'PYEOF' import json, sys, os evidence_file = sys.argv[1] @@ -787,6 +862,7 @@ lm_eth_after = int(sys.argv[8]) if sys.argv[8].isdigit() else 0 eth_extracted = int(sys.argv[9]) if sys.argv[9].isdigit() else 0 floor_held = sys.argv[10].lower() == "true" verdict = sys.argv[11] +attack_suite_count = int(sys.argv[12]) if len(sys.argv) > 12 and sys.argv[12].isdigit() else 0 # Build attacks list from memory entries for this candidate attacks = [] @@ -821,6 +897,7 @@ evidence = { "floor_held": floor_held, "verdict": verdict, "attacks": attacks, + "attack_suite_count": attack_suite_count, } with open(evidence_file, "w") as f: json.dump(evidence, f, indent=2)