fix: run-attack-suite is spec-only — no implementation in red-team.sh (#1000)
Implement the attack catalogue loop (step 5a) in red-team.sh that was previously a forward spec in the formula. The loop replays every *.jsonl attack file through AttackRunner.s.sol with snapshot revert between files, records LM total ETH before/after each attack, and injects results into the adversarial agent prompt so it knows which strategies are already catalogued. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
311b8192f6
commit
52ba6b2f38
3 changed files with 86 additions and 11 deletions
|
|
@ -325,6 +325,8 @@ SNAP=$(/home/debian/.foundry/bin/cast rpc anvil_snapshot --rpc-url http://localh
|
|||
|
||||
---
|
||||
|
||||
{{ATTACK_SUITE_RESULTS}}
|
||||
|
||||
{{CROSS_CANDIDATE_SECTION}}
|
||||
|
||||
{{MEMORY_SECTION}}
|
||||
|
|
|
|||
|
|
@ -454,6 +454,79 @@ log "Reading floor before agent run..."
|
|||
LM_ETH_BEFORE=$(compute_lm_total_eth)
|
||||
log " lm_eth_before = $LM_ETH_BEFORE wei"
|
||||
|
||||
# ── 5a. Run attack catalogue (structured suite) ──────────────────────────────
|
||||
# Loop through every existing .jsonl attack file in the attacks directory,
|
||||
# replay each through AttackRunner.s.sol, record LM total ETH before/after,
|
||||
# and revert to the baseline snapshot between files so attacks are independent.
|
||||
ATTACK_DIR="${ATTACK_DIR:-$REPO_ROOT/onchain/script/backtesting/attacks}"
|
||||
ATTACK_SUITE_RESULTS=""
|
||||
ATTACK_SUITE_COUNT=0
|
||||
|
||||
if [[ -d "$ATTACK_DIR" ]]; then
|
||||
mapfile -t ATTACK_FILES < <(find "$ATTACK_DIR" -maxdepth 1 -name '*.jsonl' -type f | sort)
|
||||
if [[ ${#ATTACK_FILES[@]} -gt 0 ]]; then
|
||||
log "Running attack catalogue (${#ATTACK_FILES[@]} files in $ATTACK_DIR)..."
|
||||
ATTACK_SUITE_RESULTS="## Attack Catalogue Results (pre-run structured suite)
|
||||
|
||||
These attacks were replayed from the known catalogue before your session.
|
||||
Do NOT repeat these strategies. Focus on novel approaches instead.
|
||||
|
||||
"
|
||||
for attack_file in "${ATTACK_FILES[@]}"; do
|
||||
attack_name=$(basename "$attack_file" .jsonl)
|
||||
log " Running attack: $attack_name ..."
|
||||
|
||||
# Record LM ETH before this attack
|
||||
suite_eth_before=$(compute_lm_total_eth)
|
||||
|
||||
# Run AttackRunner
|
||||
set +e
|
||||
suite_output=$(cd "$REPO_ROOT/onchain" && \
|
||||
ATTACK_FILE="$attack_file" \
|
||||
DEPLOYMENTS_FILE="deployments-local.json" \
|
||||
SWAP_ROUTER="$SWAP_ROUTER" \
|
||||
NPM_ADDR="$NPM" \
|
||||
"$FORGE" script script/backtesting/AttackRunner.s.sol \
|
||||
--rpc-url "$RPC_URL" --broadcast 2>&1)
|
||||
suite_exit=$?
|
||||
set -e
|
||||
|
||||
# Record LM ETH after this attack
|
||||
if [[ $suite_exit -eq 0 ]]; then
|
||||
suite_eth_after=$(compute_lm_total_eth)
|
||||
suite_delta_bps=$(python3 -c "
|
||||
b=int('$suite_eth_before'); a=int('$suite_eth_after')
|
||||
print(round((a - b) * 10000 / b) if b else 0)
|
||||
")
|
||||
if python3 -c "import sys; sys.exit(0 if int('$suite_eth_after') < int('$suite_eth_before') else 1)"; then
|
||||
suite_verdict="FLOOR_BROKEN"
|
||||
else
|
||||
suite_verdict="FLOOR_HELD"
|
||||
fi
|
||||
log " $attack_name: $suite_verdict (${suite_delta_bps} bps)"
|
||||
ATTACK_SUITE_RESULTS+="- **$attack_name**: $suite_verdict (delta: ${suite_delta_bps} bps, before: $suite_eth_before, after: $suite_eth_after)
|
||||
"
|
||||
else
|
||||
log " $attack_name: REPLAY_ERROR (exit $suite_exit)"
|
||||
ATTACK_SUITE_RESULTS+="- **$attack_name**: REPLAY_ERROR (forge exit $suite_exit)
|
||||
"
|
||||
fi
|
||||
|
||||
ATTACK_SUITE_COUNT=$((ATTACK_SUITE_COUNT + 1))
|
||||
|
||||
# Revert to baseline snapshot so next attack starts from clean state
|
||||
"$CAST" rpc anvil_revert "$SNAP" --rpc-url "$RPC_URL" >/dev/null 2>&1 || true
|
||||
# Re-take snapshot (anvil_revert is one-shot)
|
||||
SNAP=$("$CAST" rpc anvil_snapshot --rpc-url "$RPC_URL" | tr -d '"')
|
||||
done
|
||||
log "Attack catalogue complete: $ATTACK_SUITE_COUNT files processed"
|
||||
else
|
||||
log "No .jsonl files found in $ATTACK_DIR — skipping attack catalogue"
|
||||
fi
|
||||
else
|
||||
log "Attack directory not found ($ATTACK_DIR) — skipping attack catalogue"
|
||||
fi
|
||||
|
||||
# ── 6. Build agent prompt ──────────────────────────────────────────────────────
|
||||
|
||||
# ── 6a. Read Solidity source files (reflect the current candidate after inject) ─
|
||||
|
|
@ -642,6 +715,7 @@ PROMPT=${PROMPT//\{\{SOL_VWAP\}\}/$SOL_VWAP}
|
|||
PROMPT=${PROMPT//\{\{SOL_PRICE_ORACLE\}\}/$SOL_PRICE_ORACLE}
|
||||
PROMPT=${PROMPT//\{\{SOL_KRAIKEN\}\}/$SOL_KRAIKEN}
|
||||
PROMPT=${PROMPT//\{\{SOL_STAKE\}\}/$SOL_STAKE}
|
||||
PROMPT=${PROMPT//\{\{ATTACK_SUITE_RESULTS\}\}/$ATTACK_SUITE_RESULTS}
|
||||
PROMPT=${PROMPT//\{\{CROSS_CANDIDATE_SECTION\}\}/$CROSS_CANDIDATE_SECTION}
|
||||
PROMPT=${PROMPT//\{\{MEMORY_SECTION\}\}/$MEMORY_SECTION}
|
||||
|
||||
|
|
@ -773,7 +847,8 @@ fi
|
|||
|
||||
python3 - "$EVIDENCE_FILE" "$REPO_ROOT/tmp/red-team-memory.jsonl" \
|
||||
"$EVIDENCE_DATE" "$CANDIDATE_NAME" "$CANDIDATE_COMMIT" "$OPTIMIZER_PROFILE" \
|
||||
"$LM_ETH_BEFORE" "$LM_ETH_AFTER" "$_eth_extracted" "$_floor_held" "$_verdict" <<'PYEOF'
|
||||
"$LM_ETH_BEFORE" "$LM_ETH_AFTER" "$_eth_extracted" "$_floor_held" "$_verdict" \
|
||||
"$ATTACK_SUITE_COUNT" <<'PYEOF'
|
||||
import json, sys, os
|
||||
|
||||
evidence_file = sys.argv[1]
|
||||
|
|
@ -787,6 +862,7 @@ lm_eth_after = int(sys.argv[8]) if sys.argv[8].isdigit() else 0
|
|||
eth_extracted = int(sys.argv[9]) if sys.argv[9].isdigit() else 0
|
||||
floor_held = sys.argv[10].lower() == "true"
|
||||
verdict = sys.argv[11]
|
||||
attack_suite_count = int(sys.argv[12]) if len(sys.argv) > 12 and sys.argv[12].isdigit() else 0
|
||||
|
||||
# Build attacks list from memory entries for this candidate
|
||||
attacks = []
|
||||
|
|
@ -821,6 +897,7 @@ evidence = {
|
|||
"floor_held": floor_held,
|
||||
"verdict": verdict,
|
||||
"attacks": attacks,
|
||||
"attack_suite_count": attack_suite_count,
|
||||
}
|
||||
with open(evidence_file, "w") as f:
|
||||
json.dump(evidence, f, indent=2)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue