From 3c6be7d86fc02cc6c99558fb5e85bc6683fe92b3 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 15 Mar 2026 20:20:13 +0000 Subject: [PATCH 1/2] fix: feat: structured sweep-results.tsv for red-team sweep (#818) Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/red-team-sweep.sh | 92 +++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/scripts/harb-evaluator/red-team-sweep.sh b/scripts/harb-evaluator/red-team-sweep.sh index f59023b..1febe3a 100755 --- a/scripts/harb-evaluator/red-team-sweep.sh +++ b/scripts/harb-evaluator/red-team-sweep.sh @@ -12,6 +12,7 @@ ATTACKS_OUT="$REPO_ROOT/onchain/script/backtesting/attacks" PROGRESS_FILE="/tmp/red-team-sweep-progress.json" MEMORY_FILE="$REPO_ROOT/tmp/red-team-memory.jsonl" CROSS_PATTERNS_FILE="/tmp/red-team-cross-patterns.jsonl" +SWEEP_TSV="/tmp/sweep-results.tsv" OPT_SOL="$REPO_ROOT/onchain/src/OptimizerV3.sol" TIMEOUT_PER="${1:-3600}" @@ -53,6 +54,10 @@ cd "$REPO_ROOT/onchain" && forge build --silent 2>&1 || die "Smoke test compile cp "${OPT_SOL}.sweep-backup" "$OPT_SOL" log "Smoke test passed ✓" +# Write TSV header once (file persists across restarts; header only if new) +[[ ! -f "$SWEEP_TSV" ]] && \ + printf 'candidate\teth_before\teth_after\tpct_extracted\tstrategies_tried\tbest_attack\tstatus\n' > "$SWEEP_TSV" + # ── Main loop ── for seed_file in "${seeds[@]}"; do seed_name=$(basename "$seed_file" .push3) @@ -106,9 +111,12 @@ PYEOF # 3. Run red-team.sh (handles bootstrap + compile + deploy + attack) log "Running red-team.sh (timeout: ${TIMEOUT_PER}s)..." + set +e CLAUDE_TIMEOUT="$TIMEOUT_PER" CANDIDATE_NAME="$seed_name" OPTIMIZER_PROFILE="$OPTIMIZER_PROFILE" \ timeout "$((TIMEOUT_PER + 120))" \ - bash "$SCRIPT_DIR/red-team.sh" 2>&1 | tee "/tmp/red-team-${seed_name}.log" || true + bash "$SCRIPT_DIR/red-team.sh" 2>&1 | tee "/tmp/red-team-${seed_name}.log" + RED_TEAM_EXIT="${PIPESTATUS[0]}" + set -e # 4. Collect attacks if [[ -f "$REPO_ROOT/tmp/red-team-attacks.jsonl" ]]; then @@ -119,6 +127,88 @@ PYEOF fi fi + # 4c. Write one TSV row to sweep-results.tsv + if [[ "$RED_TEAM_EXIT" -eq 0 ]]; then + _sweep_status="safe" + elif [[ "$RED_TEAM_EXIT" -eq 1 ]]; then + _sweep_status="broken" + elif [[ "$RED_TEAM_EXIT" -eq 124 ]]; then + _sweep_status="timeout" + else + _sweep_status="crashed" + fi + set +e + python3 - "/tmp/red-team-${seed_name}.log" "$MEMORY_FILE" "$seed_name" "$_sweep_status" "$SWEEP_TSV" <<'PYEOF' +import re, sys, json, os + +log_file = sys.argv[1] +mem_file = sys.argv[2] +candidate = sys.argv[3] +status = sys.argv[4] +tsv_file = sys.argv[5] + +# Parse eth_before and eth_after from the candidate log +eth_before = "" +eth_after = "" +try: + with open(log_file) as f: + for line in f: + m = re.search(r'lm_eth_before\s*[=:]\s*(\d+)', line) + if m: + eth_before = m.group(1) + m = re.search(r'lm_eth_after\s*[=:]\s*(\d+)', line) + if m: + eth_after = m.group(1) +except Exception as e: + print(f" tsv: could not read log: {e}", file=sys.stderr) + +# Parse strategies from the memory file (populated by extract_memory inside red-team.sh) +strategies_tried = 0 +best_attack = "none" +try: + if os.path.isfile(mem_file) and os.path.getsize(mem_file) > 0: + with open(mem_file) as f: + entries = [json.loads(l) for l in f if l.strip()] + cand_entries = [e for e in entries if e.get("candidate") == candidate] + strategies_tried = len(set(e.get("strategy", "") for e in cand_entries)) + best_delta = 0 + for e in cand_entries: + if e.get("result") == "DECREASED" and e.get("delta_bps", 0) < best_delta: + best_delta = e["delta_bps"] + raw = e.get("strategy", "unknown") + best_attack = re.sub(r"\s+", "_", raw.strip()).lower()[:50] +except Exception as e: + print(f" tsv: could not read memory: {e}", file=sys.stderr) + +# Compute pct_extracted +pct_extracted = "0.00" +try: + if eth_before and eth_after: + before = int(eth_before) + after = int(eth_after) + if before > 0: + extracted = max(0, before - after) + pct_extracted = f"{extracted * 100 / before:.2f}" +except Exception: + pass + +# Sanitise fields: strip tabs so the row is always valid TSV +def clean(s): + return str(s).replace("\t", " ") + +row = "\t".join([ + clean(candidate), clean(eth_before), clean(eth_after), + clean(pct_extracted), clean(strategies_tried), + clean(best_attack), clean(status), +]) +with open(tsv_file, "a") as f: + f.write(row + "\n") +print(f" tsv: {status} | {pct_extracted}% extracted | {strategies_tried} strategies | best={best_attack}") +PYEOF + _py_exit=$? + set -e + [[ $_py_exit -ne 0 ]] && log "WARNING: TSV row write failed (exit $_py_exit) — continuing" + # 4b. Extract abstract patterns into cross-candidate file, then clear raw memory if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then set +e From ae3eb14833a7ad03ed343951ab1748106068e838 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 15 Mar 2026 20:48:33 +0000 Subject: [PATCH 2/2] fix: address review findings for sweep-results.tsv (#818) Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/red-team-sweep.sh | 27 +++++++++++++----------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/scripts/harb-evaluator/red-team-sweep.sh b/scripts/harb-evaluator/red-team-sweep.sh index 1febe3a..7e31a12 100755 --- a/scripts/harb-evaluator/red-team-sweep.sh +++ b/scripts/harb-evaluator/red-team-sweep.sh @@ -127,7 +127,8 @@ PYEOF fi fi - # 4c. Write one TSV row to sweep-results.tsv + # 4b. Write one TSV row to sweep-results.tsv + # NOTE: intentionally runs before 4c (memory clear) so strategy data is still available. if [[ "$RED_TEAM_EXIT" -eq 0 ]]; then _sweep_status="safe" elif [[ "$RED_TEAM_EXIT" -eq 1 ]]; then @@ -147,17 +148,17 @@ candidate = sys.argv[3] status = sys.argv[4] tsv_file = sys.argv[5] -# Parse eth_before and eth_after from the candidate log +# Parse eth_before (first occurrence = baseline) and eth_after (last occurrence = final state) eth_before = "" eth_after = "" try: with open(log_file) as f: for line in f: m = re.search(r'lm_eth_before\s*[=:]\s*(\d+)', line) - if m: + if m and not eth_before: # first occurrence wins eth_before = m.group(1) m = re.search(r'lm_eth_after\s*[=:]\s*(\d+)', line) - if m: + if m: # last occurrence wins eth_after = m.group(1) except Exception as e: print(f" tsv: could not read log: {e}", file=sys.stderr) @@ -170,7 +171,7 @@ try: with open(mem_file) as f: entries = [json.loads(l) for l in f if l.strip()] cand_entries = [e for e in entries if e.get("candidate") == candidate] - strategies_tried = len(set(e.get("strategy", "") for e in cand_entries)) + strategies_tried = len(set(e["strategy"] for e in cand_entries if e.get("strategy"))) best_delta = 0 for e in cand_entries: if e.get("result") == "DECREASED" and e.get("delta_bps", 0) < best_delta: @@ -180,17 +181,19 @@ try: except Exception as e: print(f" tsv: could not read memory: {e}", file=sys.stderr) -# Compute pct_extracted -pct_extracted = "0.00" -try: - if eth_before and eth_after: +# Compute pct_extracted; use sentinel when ETH values are absent (crash/early-timeout) +if not eth_before and not eth_after: + pct_extracted = "" +else: + pct_extracted = "0.00" + try: before = int(eth_before) after = int(eth_after) if before > 0: extracted = max(0, before - after) pct_extracted = f"{extracted * 100 / before:.2f}" -except Exception: - pass + except Exception: + pass # Sanitise fields: strip tabs so the row is always valid TSV def clean(s): @@ -209,7 +212,7 @@ PYEOF set -e [[ $_py_exit -ne 0 ]] && log "WARNING: TSV row write failed (exit $_py_exit) — continuing" - # 4b. Extract abstract patterns into cross-candidate file, then clear raw memory + # 4c. Extract abstract patterns into cross-candidate file, then clear raw memory if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then set +e _extract_out=$(python3 - "$MEMORY_FILE" "$CROSS_PATTERNS_FILE" <<'PYEOF'