Merge pull request 'fix: run-attack-suite is spec-only — no implementation in red-team.sh (#1000)' (#1119) from fix/issue-1000 into master

This commit is contained in:
johba 2026-03-22 17:26:11 +01:00
commit 7396bd371f
3 changed files with 87 additions and 12 deletions

View file

@ -38,9 +38,8 @@ required = false
default = "onchain/script/backtesting/attacks" default = "onchain/script/backtesting/attacks"
description = """ description = """
Directory containing existing .jsonl attack patterns for the structured Directory containing existing .jsonl attack patterns for the structured
attack suite. Not yet forwarded to red-team.sh see run-attack-suite step. attack suite. Forwarded to red-team.sh as ATTACK_DIR.
""" """
status = "planned" # consumed only when run-attack-suite is implemented
[inputs.claude_timeout] [inputs.claude_timeout]
type = "integer" type = "integer"
@ -61,7 +60,7 @@ description = "Timeout in seconds for the adversarial agent run (maps to CLAUDE_
[execution] [execution]
script = "scripts/harb-evaluator/red-team.sh" script = "scripts/harb-evaluator/red-team.sh"
invocation = "CANDIDATE_NAME={candidate_name} OPTIMIZER_PROFILE={optimizer_profile} CLAUDE_TIMEOUT={claude_timeout} bash scripts/harb-evaluator/red-team.sh" invocation = "CANDIDATE_NAME={candidate_name} OPTIMIZER_PROFILE={optimizer_profile} CLAUDE_TIMEOUT={claude_timeout} ATTACK_DIR={attack_dir} bash scripts/harb-evaluator/red-team.sh"
# Exit codes propagated by red-team.sh: # Exit codes propagated by red-team.sh:
# 0 floor held (LM total ETH did not decrease) # 0 floor held (LM total ETH did not decrease)
@ -104,7 +103,6 @@ already understood.
attack_source = "{attack_dir}/*.jsonl" attack_source = "{attack_dir}/*.jsonl"
forge_script = "onchain/script/backtesting/AttackRunner.s.sol" forge_script = "onchain/script/backtesting/AttackRunner.s.sol"
snapshot_mode = "revert-between-attacks" snapshot_mode = "revert-between-attacks"
status = "planned" # not yet implemented in red-team.sh; tracked for future PR
[[steps]] [[steps]]
id = "run-adversarial-agent" id = "run-adversarial-agent"
@ -186,8 +184,7 @@ to the originating issue.
Comment includes: Comment includes:
- Verdict (floor_held / floor_broken). - Verdict (floor_held / floor_broken).
- ETH extracted (formatted in ETH) and delta in basis points. - ETH extracted (formatted in ETH) and delta in basis points.
- Total attacks tried (agent-discovered count; structured suite count - Total attacks tried (agent-discovered count + structured suite count).
added once run-attack-suite is implemented).
- Link to committed evidence file. - Link to committed evidence file.
- If novel vectors were promoted: link to the attack-vector PR. - If novel vectors were promoted: link to the attack-vector PR.
On floor_broken: also include the highest-yield attack strategy name and On floor_broken: also include the highest-yield attack strategy name and
@ -253,9 +250,8 @@ records, attack filenames, PR titles).
""" """
run_attack_suite_gap = """ run_attack_suite_gap = """
The run-attack-suite step (structured loop over attacks/*.jsonl via The run-attack-suite step is implemented in red-team.sh (step 5a). It loops
AttackRunner.s.sol with snapshot revert between files) is not yet implemented through every *.jsonl file in the attack directory, replays each through
in red-team.sh. The current script runs only the adversarial Claude agent. AttackRunner.s.sol, records LM total ETH before/after with snapshot revert
The step is documented here as a forward spec; implementation is tracked as a between files, and injects results into the agent prompt.
follow-up issue.
""" """

View file

@ -325,6 +325,8 @@ SNAP=$(/home/debian/.foundry/bin/cast rpc anvil_snapshot --rpc-url http://localh
--- ---
{{ATTACK_SUITE_RESULTS}}
{{CROSS_CANDIDATE_SECTION}} {{CROSS_CANDIDATE_SECTION}}
{{MEMORY_SECTION}} {{MEMORY_SECTION}}

View file

@ -454,6 +454,79 @@ log "Reading floor before agent run..."
LM_ETH_BEFORE=$(compute_lm_total_eth) LM_ETH_BEFORE=$(compute_lm_total_eth)
log " lm_eth_before = $LM_ETH_BEFORE wei" log " lm_eth_before = $LM_ETH_BEFORE wei"
# ── 5a. Run attack catalogue (structured suite) ──────────────────────────────
# Loop through every existing .jsonl attack file in the attacks directory,
# replay each through AttackRunner.s.sol, record LM total ETH before/after,
# and revert to the baseline snapshot between files so attacks are independent.
ATTACK_DIR="${ATTACK_DIR:-$REPO_ROOT/onchain/script/backtesting/attacks}"
ATTACK_SUITE_RESULTS=""
ATTACK_SUITE_COUNT=0
if [[ -d "$ATTACK_DIR" ]]; then
mapfile -t ATTACK_FILES < <(find "$ATTACK_DIR" -maxdepth 1 -name '*.jsonl' -type f | sort)
if [[ ${#ATTACK_FILES[@]} -gt 0 ]]; then
log "Running attack catalogue (${#ATTACK_FILES[@]} files in $ATTACK_DIR)..."
ATTACK_SUITE_RESULTS="## Attack Catalogue Results (pre-run structured suite)
These attacks were replayed from the known catalogue before your session.
Do NOT repeat these strategies. Focus on novel approaches instead.
"
for attack_file in "${ATTACK_FILES[@]}"; do
attack_name=$(basename "$attack_file" .jsonl)
log " Running attack: $attack_name ..."
# Record LM ETH before this attack
suite_eth_before=$(compute_lm_total_eth)
# Run AttackRunner
set +e
suite_output=$(cd "$REPO_ROOT/onchain" && \
ATTACK_FILE="$attack_file" \
DEPLOYMENTS_FILE="deployments-local.json" \
SWAP_ROUTER="$SWAP_ROUTER" \
NPM_ADDR="$NPM" \
"$FORGE" script script/backtesting/AttackRunner.s.sol \
--rpc-url "$RPC_URL" --broadcast 2>&1)
suite_exit=$?
set -e
# Record LM ETH after this attack
if [[ $suite_exit -eq 0 ]]; then
suite_eth_after=$(compute_lm_total_eth)
suite_delta_bps=$(python3 -c "
b=int('$suite_eth_before'); a=int('$suite_eth_after')
print(round((a - b) * 10000 / b) if b else 0)
")
if python3 -c "import sys; sys.exit(0 if int('$suite_eth_after') < int('$suite_eth_before') else 1)"; then
suite_verdict="FLOOR_BROKEN"
else
suite_verdict="FLOOR_HELD"
fi
log " $attack_name: $suite_verdict (${suite_delta_bps} bps)"
ATTACK_SUITE_RESULTS+="- **$attack_name**: $suite_verdict (delta: ${suite_delta_bps} bps, before: $suite_eth_before, after: $suite_eth_after)
"
else
log " $attack_name: REPLAY_ERROR (exit $suite_exit)"
ATTACK_SUITE_RESULTS+="- **$attack_name**: REPLAY_ERROR (forge exit $suite_exit)
"
fi
ATTACK_SUITE_COUNT=$((ATTACK_SUITE_COUNT + 1))
# Revert to baseline snapshot so next attack starts from clean state
"$CAST" rpc anvil_revert "$SNAP" --rpc-url "$RPC_URL" >/dev/null 2>&1 || true
# Re-take snapshot (anvil_revert is one-shot)
SNAP=$("$CAST" rpc anvil_snapshot --rpc-url "$RPC_URL" | tr -d '"')
done
log "Attack catalogue complete: $ATTACK_SUITE_COUNT files processed"
else
log "No .jsonl files found in $ATTACK_DIR — skipping attack catalogue"
fi
else
log "Attack directory not found ($ATTACK_DIR) — skipping attack catalogue"
fi
# ── 6. Build agent prompt ────────────────────────────────────────────────────── # ── 6. Build agent prompt ──────────────────────────────────────────────────────
# ── 6a. Read Solidity source files (reflect the current candidate after inject) ─ # ── 6a. Read Solidity source files (reflect the current candidate after inject) ─
@ -642,6 +715,7 @@ PROMPT=${PROMPT//\{\{SOL_VWAP\}\}/$SOL_VWAP}
PROMPT=${PROMPT//\{\{SOL_PRICE_ORACLE\}\}/$SOL_PRICE_ORACLE} PROMPT=${PROMPT//\{\{SOL_PRICE_ORACLE\}\}/$SOL_PRICE_ORACLE}
PROMPT=${PROMPT//\{\{SOL_KRAIKEN\}\}/$SOL_KRAIKEN} PROMPT=${PROMPT//\{\{SOL_KRAIKEN\}\}/$SOL_KRAIKEN}
PROMPT=${PROMPT//\{\{SOL_STAKE\}\}/$SOL_STAKE} PROMPT=${PROMPT//\{\{SOL_STAKE\}\}/$SOL_STAKE}
PROMPT=${PROMPT//\{\{ATTACK_SUITE_RESULTS\}\}/$ATTACK_SUITE_RESULTS}
PROMPT=${PROMPT//\{\{CROSS_CANDIDATE_SECTION\}\}/$CROSS_CANDIDATE_SECTION} PROMPT=${PROMPT//\{\{CROSS_CANDIDATE_SECTION\}\}/$CROSS_CANDIDATE_SECTION}
PROMPT=${PROMPT//\{\{MEMORY_SECTION\}\}/$MEMORY_SECTION} PROMPT=${PROMPT//\{\{MEMORY_SECTION\}\}/$MEMORY_SECTION}
@ -773,7 +847,8 @@ fi
python3 - "$EVIDENCE_FILE" "$REPO_ROOT/tmp/red-team-memory.jsonl" \ python3 - "$EVIDENCE_FILE" "$REPO_ROOT/tmp/red-team-memory.jsonl" \
"$EVIDENCE_DATE" "$CANDIDATE_NAME" "$CANDIDATE_COMMIT" "$OPTIMIZER_PROFILE" \ "$EVIDENCE_DATE" "$CANDIDATE_NAME" "$CANDIDATE_COMMIT" "$OPTIMIZER_PROFILE" \
"$LM_ETH_BEFORE" "$LM_ETH_AFTER" "$_eth_extracted" "$_floor_held" "$_verdict" <<'PYEOF' "$LM_ETH_BEFORE" "$LM_ETH_AFTER" "$_eth_extracted" "$_floor_held" "$_verdict" \
"$ATTACK_SUITE_COUNT" <<'PYEOF'
import json, sys, os import json, sys, os
evidence_file = sys.argv[1] evidence_file = sys.argv[1]
@ -787,6 +862,7 @@ lm_eth_after = int(sys.argv[8]) if sys.argv[8].isdigit() else 0
eth_extracted = int(sys.argv[9]) if sys.argv[9].isdigit() else 0 eth_extracted = int(sys.argv[9]) if sys.argv[9].isdigit() else 0
floor_held = sys.argv[10].lower() == "true" floor_held = sys.argv[10].lower() == "true"
verdict = sys.argv[11] verdict = sys.argv[11]
attack_suite_count = int(sys.argv[12]) if len(sys.argv) > 12 and sys.argv[12].isdigit() else 0
# Build attacks list from memory entries for this candidate # Build attacks list from memory entries for this candidate
attacks = [] attacks = []
@ -821,6 +897,7 @@ evidence = {
"floor_held": floor_held, "floor_held": floor_held,
"verdict": verdict, "verdict": verdict,
"attacks": attacks, "attacks": attacks,
"attack_suite_count": attack_suite_count,
} }
with open(evidence_file, "w") as f: with open(evidence_file, "w") as f:
json.dump(evidence, f, indent=2) json.dump(evidence, f, indent=2)