harb/scripts/harb-evaluator/red-team.sh
openhands 816b211c2b fix: address review findings in red-team memory (#528)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-09 10:00:56 +00:00

662 lines
25 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# red-team.sh — Adversarial floor-attack agent runner.
#
# Spawns a Claude sub-agent with tools and a goal: make ethPerToken() decrease.
# The agent iterates freely — snapshot → strategy → check floor → revert → repeat.
#
# Usage: red-team.sh
#
# Exit codes:
# 0 floor held (no confirmed decrease)
# 1 floor broken (agent found a strategy that decreased ethPerToken)
# 2 infra error (stack not running, missing dependency, etc.)
#
# Environment overrides:
# CLAUDE_TIMEOUT seconds for the agent run (default: 7200)
# RPC_URL Anvil RPC endpoint (default: http://localhost:8545)
set -euo pipefail
CAST=/home/debian/.foundry/bin/cast
RPC_URL="${RPC_URL:-http://localhost:8545}"
CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}"
REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
REPORT_DIR="$REPO_ROOT/tmp"
REPORT="$REPORT_DIR/red-team-report.txt"
STREAM_LOG="$REPORT_DIR/red-team-stream.jsonl"
MEMORY_FILE="$REPORT_DIR/red-team-memory.jsonl"
DEPLOYMENTS="$REPO_ROOT/onchain/deployments-local.json"
# ── Anvil accounts ─────────────────────────────────────────────────────────────
# Account 8 — adversary (10k ETH, 0 KRK)
ADV_PK=0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97
# Account 2 — recenter caller (granted recenterAccess by bootstrap)
RECENTER_PK=0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a
# ── Infrastructure constants ───────────────────────────────────────────────────
WETH=0x4200000000000000000000000000000000000006
SWAP_ROUTER=0x94cC0AaC535CCDB3C01d6787D6413C739ae12bc4
V3_FACTORY=0x4752ba5DBc23f44D87826276BF6Fd6b1C372aD24
NPM=0x27F971cb582BF9E50F397e4d29a5C7A34f11faA2
POOL_FEE=10000
# ── Logging helpers ────────────────────────────────────────────────────────────
log() { echo "[red-team] $*"; }
die() { echo "[red-team] ERROR: $*" >&2; exit 2; }
# ── Prerequisites ──────────────────────────────────────────────────────────────
command -v "$CAST" &>/dev/null || die "cast not found at $CAST"
command -v claude &>/dev/null || die "claude CLI not found (install: npm i -g @anthropic-ai/claude-code)"
command -v python3 &>/dev/null || die "python3 not found"
command -v jq &>/dev/null || die "jq not found"
# ── 1. Verify stack is running ─────────────────────────────────────────────────
log "Verifying Anvil is accessible at $RPC_URL ..."
"$CAST" chain-id --rpc-url "$RPC_URL" >/dev/null 2>&1 \
|| die "Anvil not accessible at $RPC_URL — run: ./scripts/dev.sh start"
# ── 2. Read contract addresses ─────────────────────────────────────────────────
[[ -f "$DEPLOYMENTS" ]] || die "deployments-local.json not found at $DEPLOYMENTS (bootstrap not complete)"
KRK=$(jq -r '.contracts.Kraiken' "$DEPLOYMENTS")
STAKE=$(jq -r '.contracts.Stake' "$DEPLOYMENTS")
LM=$(jq -r '.contracts.LiquidityManager' "$DEPLOYMENTS")
OPT=$(jq -r '.contracts.OptimizerProxy' "$DEPLOYMENTS")
for var in KRK STAKE LM OPT; do
val="${!var}"
[[ -n "$val" && "$val" != "null" ]] \
|| die "$var address missing from deployments-local.json — was bootstrap successful?"
done
log " KRK: $KRK"
log " STAKE: $STAKE"
log " LM: $LM"
log " OPT: $OPT"
# Derive Anvil account addresses from their private keys
ADV_ADDR=$("$CAST" wallet address --private-key "$ADV_PK")
RECENTER_ADDR=$("$CAST" wallet address --private-key "$RECENTER_PK")
log " Adversary: $ADV_ADDR (account 8)"
log " Recenter: $RECENTER_ADDR (account 2)"
# Get Uniswap V3 Pool address
POOL=$("$CAST" call "$V3_FACTORY" "getPool(address,address,uint24)(address)" \
"$WETH" "$KRK" "$POOL_FEE" --rpc-url "$RPC_URL")
log " Pool: $POOL"
# ── 3. Grant recenterAccess to account 2 ──────────────────────────────────────
# Done BEFORE the snapshot so every revert restores account 2's access.
# LM.recenterAccess is a single address slot — replace it with account 2.
# Only the feeDestination is authorised to call setRecenterAccess().
log "Granting recenterAccess to account 2 ($RECENTER_ADDR) ..."
FEE_DEST=$("$CAST" call "$LM" "feeDestination()(address)" --rpc-url "$RPC_URL") \
|| die "Failed to read feeDestination() from LM"
FEE_DEST=$(echo "$FEE_DEST" | tr -d '[:space:]')
"$CAST" rpc --rpc-url "$RPC_URL" anvil_impersonateAccount "$FEE_DEST" \
|| die "anvil_impersonateAccount $FEE_DEST failed"
"$CAST" send --rpc-url "$RPC_URL" --from "$FEE_DEST" --unlocked \
"$LM" "setRecenterAccess(address)" "$RECENTER_ADDR" >/dev/null 2>&1 \
|| die "setRecenterAccess($RECENTER_ADDR) failed — check that feeDestination is correct"
"$CAST" rpc --rpc-url "$RPC_URL" anvil_stopImpersonatingAccount "$FEE_DEST" \
|| die "anvil_stopImpersonatingAccount $FEE_DEST failed"
log " recenterAccess granted"
# ── 4. Take Anvil snapshot (clean baseline, includes recenterAccess grant) ─────
log "Taking Anvil snapshot..."
SNAP=$("$CAST" rpc anvil_snapshot --rpc-url "$RPC_URL" | tr -d '"')
log " Snapshot ID: $SNAP"
# Revert to the baseline snapshot on exit so subsequent runs start clean.
cleanup() {
local rc=$?
if [[ -n "${SNAP:-}" ]]; then
"$CAST" rpc anvil_revert "$SNAP" --rpc-url "$RPC_URL" >/dev/null 2>&1 || true
fi
exit $rc
}
trap cleanup EXIT INT TERM
# ── Helper: compute ethPerToken (mirrors floor.ts getEthPerToken) ──────────────
# ethPerToken = (lm_native_eth + lm_weth) * 1e18 / adjusted_outstanding_supply
# adjusted_supply = outstandingSupply() - KRK_at_feeDestination - KRK_at_stakingPool
compute_eth_per_token() {
local lm_eth lm_weth supply fee_bal stake_bal adj_supply
lm_eth=$("$CAST" balance "$LM" --rpc-url "$RPC_URL" 2>/dev/null | tr -d '[:space:]')
lm_weth=$("$CAST" call "$WETH" "balanceOf(address)(uint256)" "$LM" \
--rpc-url "$RPC_URL" 2>/dev/null | tr -d '[:space:]')
supply=$("$CAST" call "$KRK" "outstandingSupply()(uint256)" \
--rpc-url "$RPC_URL" 2>/dev/null | tr -d '[:space:]')
# Fee destination: read from contract (set at deploy time, may differ per fork)
local fee_dest
fee_dest=$("$CAST" call "$LM" "feeDestination()(address)" \
--rpc-url "$RPC_URL" 2>/dev/null | tr -d '[:space:]')
fee_bal=0
local zero="0x0000000000000000000000000000000000000000"
if [[ "${fee_dest,,}" != "${zero,,}" ]]; then
fee_bal=$("$CAST" call "$KRK" "balanceOf(address)(uint256)" "$fee_dest" \
--rpc-url "$RPC_URL" 2>/dev/null | tr -d '[:space:]' || echo 0)
fi
# Staking pool: use the deployed Stake address (mirrors peripheryContracts()[1])
stake_bal=$("$CAST" call "$KRK" "balanceOf(address)(uint256)" "$STAKE" \
--rpc-url "$RPC_URL" 2>/dev/null | tr -d '[:space:]' || echo 0)
python3 - <<PYEOF
e = int('${lm_eth:-0}' or 0)
w = int('${lm_weth:-0}' or 0)
s = int('${supply:-0}' or 0)
f = int('${fee_bal:-0}' or 0)
k = int('${stake_bal:-0}' or 0)
adj = s - f - k
print(0 if adj <= 0 else (e + w) * 10**18 // adj)
PYEOF
}
# ── Helper: extract strategy findings from stream-json and append to memory ────
extract_memory() {
local stream_file="$1"
local run_num memory_file="$MEMORY_FILE"
# Determine run number: one entry per line in JSONL, so next run = line_count + 1
if [[ -f "$memory_file" ]]; then
run_num=$(wc -l < "$memory_file")
run_num=$((run_num + 1))
else
run_num=1
fi
python3 - "$stream_file" "$memory_file" "$run_num" "$FLOOR_BEFORE" <<'PYEOF'
import json, sys, re
from datetime import datetime, timezone
stream_file = sys.argv[1]
memory_file = sys.argv[2]
run_num = int(sys.argv[3])
try:
floor_before = int(sys.argv[4])
except (ValueError, IndexError):
print(" extract_memory: invalid floor_before value, skipping", file=sys.stderr)
sys.exit(0)
texts = []
with open(stream_file) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
if obj.get("type") == "assistant":
for block in obj.get("message", {}).get("content", []):
if block.get("type") == "text":
texts.append(block["text"])
except:
pass
# Parse strategies from agent text
strategies = []
current = None
for text in texts:
# Detect strategy headers: matches "## Strategy 1: name" and "STRATEGY 1: name"
strat_match = re.search(r"(?:##\s*)?[Ss][Tt][Rr][Aa][Tt][Ee][Gg][Yy]\s*\d+[^:]*:\s*(.+)", text)
if strat_match:
if current:
strategies.append(current)
current = {
"strategy": strat_match.group(1).strip(),
"steps": "",
"floor_after": None,
"insight": ""
}
if current:
# Capture floor readings — take the last match in the block (most recent value)
floor_matches = list(re.finditer(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE))
if floor_matches:
current["floor_after"] = int(floor_matches[-1].group(1))
# Capture insights
for pattern in [r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)"]:
insight_match = re.search(pattern, text)
if insight_match and len(insight_match.group(1)) > 20:
current["insight"] = insight_match.group(1).strip()[:300]
# Capture step summaries
if any(word in text.lower() for word in ["wrap", "buy", "sell", "stake", "recenter", "mint", "approve"]):
if len(text) < 200:
current["steps"] += text.strip() + "; "
if current:
strategies.append(current)
# Write to memory file
ts = datetime.now(timezone.utc).isoformat()
with open(memory_file, "a") as f:
for s in strategies:
fa = s["floor_after"] if s.get("floor_after") is not None else floor_before
delta_bps = round((fa - floor_before) * 10000 / floor_before) if floor_before else 0
if fa < floor_before:
result = "DECREASED"
elif fa > floor_before:
result = "INCREASED"
else:
result = "HELD"
entry = {
"run": run_num,
"ts": ts,
"strategy": s["strategy"][:100],
"steps": s["steps"][:300].rstrip("; "),
"floor_before": floor_before,
"floor_after": fa,
"delta_bps": delta_bps,
"result": result,
"insight": s["insight"][:300]
}
f.write(json.dumps(entry) + "\n")
print(f" Recorded: {entry['strategy']} → {result} ({delta_bps:+d} bps)")
if not strategies:
print(" No strategies detected in stream output")
# Trim memory file: keep 10 most recent + all DECREASED entries (cap at 50)
with open(memory_file) as f:
all_entries = [json.loads(l) for l in f if l.strip()]
if len(all_entries) > 50:
# Keep all DECREASED entries + 10 most recent; deduplicate preserving order
trimmed = [e for e in all_entries if e.get("result") == "DECREASED"] + all_entries[-10:]
seen = set()
deduped = []
for e in trimmed:
key = (e.get("run"), e.get("ts"), e.get("strategy"))
if key not in seen:
seen.add(key)
deduped.append(e)
with open(memory_file, "w") as f:
for e in deduped:
f.write(json.dumps(e) + "\n")
print(f" Trimmed memory to {len(deduped)} entries")
PYEOF
}
# ── 5. Read floor_before ───────────────────────────────────────────────────────
log "Reading floor before agent run..."
FLOOR_BEFORE=$(compute_eth_per_token)
log " floor_before = $FLOOR_BEFORE wei/token"
# ── 6. Build agent prompt ──────────────────────────────────────────────────────
# Build Previous Findings section from memory file
MEMORY_SECTION=""
if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then
MEMORY_SECTION=$(python3 - "$MEMORY_FILE" <<'PYEOF'
import json, sys
entries = []
with open(sys.argv[1]) as f:
for line in f:
line = line.strip()
if line:
entries.append(json.loads(line))
if not entries:
sys.exit(0)
print('## Previous Findings (from earlier runs)')
print()
print('DO NOT repeat strategies marked HELD or INCREASED. Build on the insights.')
print('Try NEW combinations not yet attempted. Combine tools creatively.')
print()
for e in entries:
r = e.get('result', '?')
emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️'
print(f"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}")
print(f"Steps: {e.get('steps','?')}")
print(f"Delta: {e.get('delta_bps',0)} bps")
if e.get('insight'):
print(f"**Insight:** {e['insight']}")
print()
PYEOF
)
fi
PROMPT=$(cat <<PROMPT_EOF
# Red-team mission: break the KRAIKEN protocol floor
## Goal
Make ethPerToken **decrease** compared to its current value.
Current ethPerToken: ${FLOOR_BEFORE} wei/token
ethPerToken measures: (LiquidityManager native ETH + WETH) × 1e18 ÷ adjusted_outstanding_supply
It is the floor price — the minimum ETH per KRK token the protocol will pay.
---
## Contract addresses (local Anvil)
| Contract | Address |
|--------------------|---------|
| Kraiken (KRK) | ${KRK} |
| Stake | ${STAKE} |
| LiquidityManager | ${LM} |
| OptimizerProxy | ${OPT} |
| Pool (WETH/KRK 1%) | ${POOL} |
| NonfungiblePosManager (NPM) | ${NPM} |
| WETH | ${WETH} |
| SwapRouter02 | ${SWAP_ROUTER} |
RPC: http://localhost:8545
CAST binary: /home/debian/.foundry/bin/cast
---
## Your accounts
### Adversary — Anvil account 8 (your main account)
- Address: ${ADV_ADDR}
- Private key: ${ADV_PK}
- Balance: 10 000 ETH, 0 KRK
### Recenter caller — Anvil account 2
- Address: ${RECENTER_ADDR}
- Private key: ${RECENTER_PK}
- Has recenterAccess on LiquidityManager
---
## Protocol mechanics
### ethPerToken (the floor)
\`\`\`
ethPerToken = (LM_native_ETH + LM_WETH) * 1e18 / adjusted_supply
adjusted_supply = KRK.outstandingSupply() - KRK_at_feeDestination - KRK_at_Stake
\`\`\`
To DECREASE the floor you must either:
- Reduce LM's ETH/WETH holdings, OR
- Increase the adjusted outstanding supply of KRK
### Three LM positions
The LiquidityManager maintains three Uniswap V3 positions:
1. **ANCHOR** — straddles the current price; provides two-sided liquidity
2. **DISCOVERY** — above current price; captures upside momentum
3. **FLOOR** — a floor bid: ETH in, KRK out. Backing the floor price.
### recenter()
Calling \`LiquidityManager.recenter()\` removes all three positions, mints or burns KRK
to rebalance, then re-deploys positions at the current price. It:
- Can mint NEW KRK (increasing supply → decreasing floor)
- Can burn KRK (decreasing supply → increasing floor)
- Moves ETH between positions
Only recenterAccess account can call it.
### Staking
\`Stake.snatch(assets, receiver, taxRateIndex, positionsToSnatch)\`
- taxRateIndex: 029 (index into the 30-element TAX_RATES array — not a raw percentage)
- KRK staked is held by the Stake contract (excluded from adjusted_supply)
- KRK in Stake does NOT count against the floor denominator
### outstandingSupply() vs totalSupply()
\`KRK.outstandingSupply() = totalSupply() - balanceOf(liquidityManager)\`
LM-held KRK (in pool positions) is excluded from outstandingSupply.
The floor formula then additionally subtracts KRK at Stake and feeDestination to get adjusted_supply.
---
## Cast command patterns
### Check the floor (run after each strategy)
\`\`\`bash
LM_ETH=\$(/home/debian/.foundry/bin/cast balance ${LM} --rpc-url http://localhost:8545)
LM_WETH=\$(/home/debian/.foundry/bin/cast call ${WETH} "balanceOf(address)(uint256)" ${LM} --rpc-url http://localhost:8545)
SUPPLY=\$(/home/debian/.foundry/bin/cast call ${KRK} "outstandingSupply()(uint256)" --rpc-url http://localhost:8545)
FEE_DEST=\$(/home/debian/.foundry/bin/cast call ${LM} "feeDestination()(address)" --rpc-url http://localhost:8545)
FEE_BAL=\$(/home/debian/.foundry/bin/cast call ${KRK} "balanceOf(address)(uint256)" \$FEE_DEST --rpc-url http://localhost:8545)
STAKE_BAL=\$(/home/debian/.foundry/bin/cast call ${KRK} "balanceOf(address)(uint256)" ${STAKE} --rpc-url http://localhost:8545)
python3 -c "e=\$LM_ETH; w=\$LM_WETH; s=\$SUPPLY; f=\$FEE_BAL; k=\$STAKE_BAL; adj=s-f-k; print('ethPerToken:', (e+w)*10**18//adj if adj>0 else 0, 'wei/token')"
\`\`\`
### Wrap ETH to WETH
\`\`\`bash
/home/debian/.foundry/bin/cast send ${WETH} "deposit()" --value 100ether \
--private-key ${ADV_PK} --rpc-url http://localhost:8545
\`\`\`
### Approve token spend
\`\`\`bash
/home/debian/.foundry/bin/cast send <TOKEN> "approve(address,uint256)" <SPENDER> \
115792089237316195423570985008687907853269984665640564039457584007913129639935 \
--private-key ${ADV_PK} --rpc-url http://localhost:8545
\`\`\`
### Buy KRK (WETH → KRK via SwapRouter)
\`\`\`bash
# Must wrap ETH and approve WETH first
/home/debian/.foundry/bin/cast send ${SWAP_ROUTER} \
"exactInputSingle((address,address,uint24,address,uint256,uint256,uint160))" \
"(${WETH},${KRK},${POOL_FEE},${ADV_ADDR},<WETH_AMOUNT>,0,0)" \
--private-key ${ADV_PK} --rpc-url http://localhost:8545
\`\`\`
### Sell KRK (KRK → WETH via SwapRouter)
\`\`\`bash
# Must approve KRK first
/home/debian/.foundry/bin/cast send ${SWAP_ROUTER} \
"exactInputSingle((address,address,uint24,address,uint256,uint256,uint160))" \
"(${KRK},${WETH},${POOL_FEE},${ADV_ADDR},<KRK_AMOUNT>,0,0)" \
--private-key ${ADV_PK} --rpc-url http://localhost:8545
\`\`\`
### Stake KRK (snatch with no snatching)
\`\`\`bash
# Approve KRK to Stake first
/home/debian/.foundry/bin/cast send ${STAKE} \
"snatch(uint256,address,uint32,uint256[])" \
<KRK_AMOUNT> ${ADV_ADDR} 0 "[]" \
--private-key ${ADV_PK} --rpc-url http://localhost:8545
\`\`\`
### Unstake KRK
\`\`\`bash
/home/debian/.foundry/bin/cast send ${STAKE} \
"exitPosition(uint256)" <POSITION_ID> \
--private-key ${ADV_PK} --rpc-url http://localhost:8545
\`\`\`
### Trigger recenter (account 2 only)
\`\`\`bash
/home/debian/.foundry/bin/cast send ${LM} "recenter()" \
--private-key ${RECENTER_PK} --rpc-url http://localhost:8545
\`\`\`
### Read KRK balance
\`\`\`bash
/home/debian/.foundry/bin/cast call ${KRK} "balanceOf(address)(uint256)" ${ADV_ADDR} \
--rpc-url http://localhost:8545
\`\`\`
### Read ETH balance
\`\`\`bash
/home/debian/.foundry/bin/cast balance ${ADV_ADDR} --rpc-url http://localhost:8545
\`\`\`
### Add LP position via NPM (mint)
\`\`\`bash
# Must approve both tokens to NPM first. tickLower/tickUpper must be multiples of 200 (pool tickSpacing).
/home/debian/.foundry/bin/cast send ${NPM} \
"mint((address,address,uint24,int24,int24,uint256,uint256,uint256,uint256,address,uint256))" \
"(${WETH},${KRK},${POOL_FEE},<TICK_LOWER>,<TICK_UPPER>,<AMOUNT0>,<AMOUNT1>,0,0,${ADV_ADDR},<DEADLINE>)" \
--private-key ${ADV_PK} --rpc-url http://localhost:8545
\`\`\`
### Remove LP position via NPM (decreaseLiquidity then collect)
\`\`\`bash
/home/debian/.foundry/bin/cast send ${NPM} \
"decreaseLiquidity((uint256,uint128,uint256,uint256,uint256))" \
"(<TOKEN_ID>,<LIQUIDITY>,0,0,<DEADLINE>)" \
--private-key ${ADV_PK} --rpc-url http://localhost:8545
/home/debian/.foundry/bin/cast send ${NPM} \
"collect((uint256,address,uint128,uint128))" \
"(<TOKEN_ID>,${ADV_ADDR},340282366920938463463374607431768211455,340282366920938463463374607431768211455)" \
--private-key ${ADV_PK} --rpc-url http://localhost:8545
\`\`\`
### Mine a block
\`\`\`bash
/home/debian/.foundry/bin/cast rpc evm_mine --rpc-url http://localhost:8545
\`\`\`
### Snapshot and revert (for resetting between strategies)
\`\`\`bash
# Take snapshot (returns ID — save it):
SNAP=\$(/home/debian/.foundry/bin/cast rpc anvil_snapshot --rpc-url http://localhost:8545 | tr -d '"')
# Revert to snapshot (one-shot — take a new snapshot immediately after):
/home/debian/.foundry/bin/cast rpc anvil_revert \$SNAP --rpc-url http://localhost:8545
\`\`\`
---
## Rules
1. You have 10 000 ETH. Start by wrapping some if you need WETH for swaps.
2. Your goal is to make ethPerToken DECREASE vs the starting value (${FLOOR_BEFORE} wei/token).
3. Try at least 3 distinct strategies. After each attempt:
a. Run the floor-check command above to measure ethPerToken.
b. If the floor DECREASED — report this as a SUCCESS and describe the exact steps.
c. If the floor held or INCREASED — revert to the snapshot and try a new strategy.
Remember: \`anvil_revert\` is one-shot. Take a new snapshot immediately after reverting.
4. You may chain multiple actions in one strategy (e.g. large buy → recenter → large sell).
5. Be methodical. Report every strategy tried even if it failed.
6. If Previous Findings are provided, DO NOT repeat those strategies. Use their insights to design new approaches.
7. Prioritize untried COMBINATIONS: staking + LP, staking + recenter timing, LP + multi-step swaps, etc.
---
${MEMORY_SECTION}
## Final report format
After trying all strategies, output a clearly structured report:
\`\`\`
=== RED-TEAM REPORT ===
floor_before: <value> wei/token
STRATEGY 1: <name>
Steps: <what you did>
floor_after: <value>
Result: INCREASED / HELD / DECREASED
STRATEGY 2: ...
...
=== CONCLUSION ===
Floor broken: YES / NO
Winning strategy: <describe if YES, else "None">
floor_before: ${FLOOR_BEFORE} wei/token
floor_after: <final value>
\`\`\`
PROMPT_EOF
)
# ── 7. Create output directory and run the agent ───────────────────────────────
mkdir -p "$REPORT_DIR"
log "Spawning Claude red-team agent (timeout: ${CLAUDE_TIMEOUT}s)..."
log " Report will be written to: $REPORT"
set +e
# Note: --verbose is required by the claude CLI when --output-format stream-json is used;
# omitting it causes the CLI to exit with an error, producing an empty stream log.
timeout "$CLAUDE_TIMEOUT" claude -p --dangerously-skip-permissions \
--verbose --output-format stream-json \
"$PROMPT" >"$STREAM_LOG" 2>&1
AGENT_EXIT=$?
set -e
if [[ $AGENT_EXIT -ne 0 ]]; then
log "WARNING: claude exited with code $AGENT_EXIT — see $STREAM_LOG for details"
fi
# Extract readable text from stream-json for the report
python3 - "$STREAM_LOG" >"$REPORT" <<'PYEOF'
import json, sys
with open(sys.argv[1]) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
if obj.get("type") == "assistant":
for block in obj.get("message", {}).get("content", []):
if block.get("type") == "text":
print(block["text"], end="")
except:
pass
PYEOF
# If the agent crashed and produced no readable output, treat as an infra error
# rather than silently reporting FLOOR HELD (a false pass).
if [[ $AGENT_EXIT -ne 0 && ! -s "$REPORT" ]]; then
die "claude agent failed (exit $AGENT_EXIT) with no readable output — see $STREAM_LOG"
fi
# ── 8. Read floor_after ────────────────────────────────────────────────────────
log "Reading floor after agent run..."
FLOOR_AFTER=$(compute_eth_per_token)
# ── 8a. Extract and persist strategy findings ──────────────────────────────────
log "Extracting strategy findings from agent output..."
extract_memory "$STREAM_LOG"
log " floor_after = $FLOOR_AFTER wei/token"
# ── 9. Summarise results ───────────────────────────────────────────────────────
log ""
log "=== RED-TEAM SUMMARY ==="
log ""
log " floor_before : $FLOOR_BEFORE wei/token"
log " floor_after : $FLOOR_AFTER wei/token"
log ""
BROKE=false
if python3 -c "import sys; sys.exit(0 if int('$FLOOR_AFTER') < int('$FLOOR_BEFORE') else 1)"; then
BROKE=true
fi
if [[ "$BROKE" == "true" ]]; then
DELTA=$(python3 -c "print($FLOOR_BEFORE - $FLOOR_AFTER)")
log " RESULT: FLOOR BROKEN ❌"
log " Decrease: $DELTA wei/token"
log ""
log " See $REPORT for the winning strategy."
log ""
# Append a machine-readable summary to the report
cat >>"$REPORT" <<SUMMARY_EOF
=== RUNNER SUMMARY ===
floor_before : $FLOOR_BEFORE
floor_after : $FLOOR_AFTER
delta : -$DELTA
verdict : FLOOR_BROKEN
SUMMARY_EOF
exit 1
else
log " RESULT: FLOOR HELD ✅"
log ""
log " See $REPORT for strategies attempted."
log ""
cat >>"$REPORT" <<SUMMARY_EOF
=== RUNNER SUMMARY ===
floor_before : $FLOOR_BEFORE
floor_after : $FLOOR_AFTER
delta : 0 (or increase)
verdict : FLOOR_HELD
SUMMARY_EOF
exit 0
fi