harb/scripts/harb-evaluator/red-team-sweep.sh
openhands e7c60edeb6 fix: feat: red-team memory should track candidate + abstract learnings (#820)
- Add CANDIDATE_NAME and OPTIMIZER_PROFILE env vars to red-team.sh
  (defaults to "unknown" for standalone runs)
- Update extract_memory Python: new fields candidate, optimizer_profile,
  pattern (abstract op sequence via make_pattern()), and improved insight
  extraction that also captures WHY explanations (because/since/due to)
- Update MEMORY_SECTION Python: entries now grouped by candidate;
  universal patterns (DECREASED across multiple candidates) surfaced first
- Update prompt: add "Current Attack Target" table with candidate/profile,
  optimizer parameter explanations (CI/AW/AS/DD behavioral impact),
  Rule 9 requiring pattern+insight per strategy, updated report format
  with Pattern/Insight fields and universal-pattern conclusion field
- Update red-team-sweep.sh: after inject, parse OptimizerV3Push3.sol for
  r40/r39/r38/r37 constants to build OPTIMIZER_PROFILE string; pass
  CANDIDATE_NAME and OPTIMIZER_PROFILE as env vars to red-team.sh

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-15 15:23:43 +00:00

129 lines
4.9 KiB
Bash
Executable file

#!/usr/bin/env bash
# red-team-sweep.sh — Red-team every kindergarten seed sequentially.
# For each seed: inject into OptimizerV3.sol → run red-team.sh → restore → next.
# Usage: bash red-team-sweep.sh [timeout_per_candidate]
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
SEEDS_DIR="$REPO_ROOT/tools/push3-evolution/seeds"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
INJECT="$REPO_ROOT/tools/push3-transpiler/inject.sh"
ATTACKS_OUT="$REPO_ROOT/onchain/script/backtesting/attacks"
PROGRESS_FILE="/tmp/red-team-sweep-progress.json"
OPT_SOL="$REPO_ROOT/onchain/src/OptimizerV3.sol"
TIMEOUT_PER="${1:-3600}"
log() { echo "[sweep $(date -u +%H:%M:%S)] $*"; }
die() { log "FATAL: $*" >&2; exit 1; }
[[ -f "$INJECT" ]] || die "inject.sh not found at $INJECT"
# Load progress
completed=()
if [[ -f "$PROGRESS_FILE" ]]; then
while IFS= read -r line; do completed+=("$line"); done < <(jq -r '.completed[]' "$PROGRESS_FILE" 2>/dev/null || true)
fi
is_done() { for c in "${completed[@]+"${completed[@]}"}"; do [[ "$c" == "$1" ]] && return 0; done; return 1; }
# Collect named seeds only (skip run*_gen* pool entries)
seeds=()
for f in "$SEEDS_DIR"/*.push3; do
[[ -f "$f" ]] || continue
basename "$f" | grep -qE '^run[0-9]+_gen' && continue
seeds+=("$f")
done
log "Found ${#seeds[@]} seeds. Timeout: ${TIMEOUT_PER}s each"
[[ ${#seeds[@]} -gt 0 ]] || die "No seeds found in $SEEDS_DIR"
# ── Smoke test: pick a random seed, inject + compile ──
SMOKE_IDX=$(( RANDOM % ${#seeds[@]} ))
SMOKE_SEED="${seeds[$SMOKE_IDX]}"
SMOKE_NAME=$(basename "$SMOKE_SEED" .push3)
log "Smoke test: $SMOKE_NAME"
cp "$OPT_SOL" "${OPT_SOL}.sweep-backup"
trap 'cp "${OPT_SOL}.sweep-backup" "$OPT_SOL" 2>/dev/null; rm -f "${OPT_SOL}.sweep-backup"' EXIT
bash "$INJECT" "$SMOKE_SEED" "$OPT_SOL" || die "Smoke test inject failed for $SMOKE_NAME"
cd "$REPO_ROOT/onchain" && forge build --silent 2>&1 || die "Smoke test compile failed for $SMOKE_NAME"
cp "${OPT_SOL}.sweep-backup" "$OPT_SOL"
log "Smoke test passed ✓"
# ── Main loop ──
for seed_file in "${seeds[@]}"; do
seed_name=$(basename "$seed_file" .push3)
is_done "$seed_name" && { log "SKIP $seed_name (done)"; continue; }
log "=== RED-TEAM: $seed_name ==="
# 1. Inject candidate into OptimizerV3.sol
cp "${OPT_SOL}.sweep-backup" "$OPT_SOL"
if ! bash "$INJECT" "$seed_file" "$OPT_SOL"; then
log "SKIP $seed_name — inject failed"
continue
fi
log "Injected into OptimizerV3.sol"
# 1b. Extract optimizer profile from transpiler output (CI/AW/AS/DD constants)
TRANSPILER_OUT="$REPO_ROOT/onchain/src/OptimizerV3Push3.sol"
OPTIMIZER_PROFILE=$(python3 - "$TRANSPILER_OUT" <<'PYEOF'
import re, sys
try:
with open(sys.argv[1]) as f:
sol = f.read()
ci_vals = set(re.findall(r'\br40\s*=\s*uint256\((\d+)\)', sol))
aw_vals = set(re.findall(r'\br38\s*=\s*uint256\((\d+)\)', sol))
as_vals = set(re.findall(r'\br39\s*=\s*uint256\((\d+)\)', sol))
dd_vals = set(re.findall(r'\br37\s*=\s*uint256\((\d+)\)', sol))
def fmt_pct(vals):
pcts = sorted(set(round(int(v) * 100 / 1e18) for v in vals))
return '/'.join(str(p) + '%' for p in pcts) if pcts else '?'
def fmt_int(vals):
ints = sorted(set(int(v) for v in vals))
return '/'.join(str(v) for v in ints) if ints else '?'
profile = f"CI={fmt_pct(ci_vals)}, AW={fmt_int(aw_vals)}, AS={fmt_pct(as_vals)}, DD={fmt_pct(dd_vals)}"
if len(ci_vals) > 1 or len(aw_vals) > 1 or len(as_vals) > 1 or len(dd_vals) > 1:
profile += ", adaptive"
print(profile)
except Exception as e:
import sys as _sys
print(f"unknown (parse error: {e})", file=_sys.stderr)
print("unknown")
PYEOF
)
log "Optimizer profile: $OPTIMIZER_PROFILE"
# 2. Clear stale attack file from previous candidate
rm -f "$REPO_ROOT/tmp/red-team-attacks.jsonl"
# 3. Run red-team.sh (handles bootstrap + compile + deploy + attack)
log "Running red-team.sh (timeout: ${TIMEOUT_PER}s)..."
CLAUDE_TIMEOUT="$TIMEOUT_PER" CANDIDATE_NAME="$seed_name" OPTIMIZER_PROFILE="$OPTIMIZER_PROFILE" \
timeout "$((TIMEOUT_PER + 120))" \
bash "$SCRIPT_DIR/red-team.sh" 2>&1 | tee "/tmp/red-team-${seed_name}.log" || true
# 4. Collect attacks
if [[ -f "$REPO_ROOT/tmp/red-team-attacks.jsonl" ]]; then
ATTACK_COUNT=$(wc -l < "$REPO_ROOT/tmp/red-team-attacks.jsonl")
if [[ "$ATTACK_COUNT" -gt 0 ]]; then
cp "$REPO_ROOT/tmp/red-team-attacks.jsonl" "$ATTACKS_OUT/sweep-${seed_name}.jsonl"
log "Saved $ATTACK_COUNT attack(s)"
fi
fi
# 5. Save progress
completed+=("$seed_name")
jq -n --argjson arr "$(printf '%s\n' "${completed[@]}" | jq -R . | jq -s .)" \
'{completed: $arr, last_updated: now | todate}' > "$PROGRESS_FILE"
log "DONE $seed_name"
# 6. Teardown
cd "$REPO_ROOT" && docker compose down -v 2>/dev/null || true
sleep 5
done
# Restore original
cp "${OPT_SOL}.sweep-backup" "$OPT_SOL"
log "=== SWEEP COMPLETE: ${#completed[@]} / ${#seeds[@]} ==="