From c1db4cb93e3d367bf0e56416a0fef944885936da Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 9 Mar 2026 09:23:37 +0000 Subject: [PATCH] fix: Red-team memory: persistent cross-run learning for adversarial agent (#528) Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/red-team.sh | 194 ++++++++++++++++++++++++++++- 1 file changed, 192 insertions(+), 2 deletions(-) diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index 082f220..fba7f54 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -23,6 +23,8 @@ CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}" REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" REPORT_DIR="$REPO_ROOT/tmp" REPORT="$REPORT_DIR/red-team-report.txt" +STREAM_LOG="$REPORT_DIR/red-team-stream.jsonl" +MEMORY_FILE="$REPORT_DIR/red-team-memory.jsonl" DEPLOYMENTS="$REPO_ROOT/onchain/deployments-local.json" # ── Anvil accounts ───────────────────────────────────────────────────────────── @@ -154,12 +156,173 @@ print(0 if adj <= 0 else (e + w) * 10**18 // adj) PYEOF } +# ── Helper: extract strategy findings from stream-json and append to memory ──── +extract_memory() { + local stream_file="$1" + local run_num memory_file="$MEMORY_FILE" + + # Determine run number (rough: count existing entries) + if [[ -f "$memory_file" ]]; then + run_num=$(wc -l < "$memory_file") + run_num=$((run_num / 3 + 1)) + else + run_num=1 + fi + + python3 - "$stream_file" "$memory_file" "$run_num" "$FLOOR_BEFORE" <<'PYEOF' +import json, sys, re +from datetime import datetime, timezone + +stream_file = sys.argv[1] +memory_file = sys.argv[2] +run_num = int(sys.argv[3]) +floor_before = int(sys.argv[4]) + +texts = [] +with open(stream_file) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + if obj.get("type") == "assistant": + for block in obj.get("message", {}).get("content", []): + if block.get("type") == "text": + texts.append(block["text"]) + except: + pass + +# Parse strategies from agent text +strategies = [] +current = None +for text in texts: + # Detect strategy headers + strat_match = re.search(r"##\s*Strategy\s*\d+[^:]*:\s*(.+)", text) + if strat_match: + if current: + strategies.append(current) + current = { + "strategy": strat_match.group(1).strip(), + "steps": "", + "floor_after": None, + "insight": "" + } + + if current: + # Capture floor readings + floor_match = re.search(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE) + if floor_match: + current["floor_after"] = int(floor_match.group(1)) + + # Capture insights + for pattern in [r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)"]: + insight_match = re.search(pattern, text) + if insight_match and len(insight_match.group(1)) > 20: + current["insight"] = insight_match.group(1).strip()[:300] + + # Capture step summaries + if any(word in text.lower() for word in ["wrap", "buy", "sell", "stake", "recenter", "mint", "approve"]): + if len(text) < 200: + current["steps"] += text.strip() + "; " + +if current: + strategies.append(current) + +# Write to memory file +ts = datetime.now(timezone.utc).isoformat() +with open(memory_file, "a") as f: + for s in strategies: + fa = s.get("floor_after") or floor_before + delta_bps = (fa - floor_before) * 10000 // floor_before if floor_before else 0 + if fa < floor_before: + result = "DECREASED" + elif fa > floor_before: + result = "INCREASED" + else: + result = "HELD" + + entry = { + "run": run_num, + "ts": ts, + "strategy": s["strategy"][:100], + "steps": s["steps"][:300].rstrip("; "), + "floor_before": floor_before, + "floor_after": fa, + "delta_bps": delta_bps, + "result": result, + "insight": s["insight"][:300] + } + f.write(json.dumps(entry) + "\n") + print(f" Recorded: {entry['strategy']} → {result} ({delta_bps:+d} bps)") + +if not strategies: + print(" No strategies detected in stream output") + +# Trim memory file: keep 10 most recent + all DECREASED entries (cap at 50) +with open(memory_file) as f: + all_entries = [json.loads(l) for l in f if l.strip()] + +if len(all_entries) > 50: + decreased = [e for e in all_entries if e.get("result") == "DECREASED"] + recent = all_entries[-10:] + kept = {id(e): e for e in decreased + recent}.values() + # Preserve insertion order: filter all_entries keeping only kept ids + kept_set = set(id(e) for e in kept) + # Rebuild from original list preserving order + trimmed = [e for e in all_entries if e.get("result") == "DECREASED"] + all_entries[-10:] + # Deduplicate preserving order + seen = set() + deduped = [] + for e in trimmed: + key = (e.get("run"), e.get("ts"), e.get("strategy")) + if key not in seen: + seen.add(key) + deduped.append(e) + with open(memory_file, "w") as f: + for e in deduped: + f.write(json.dumps(e) + "\n") + print(f" Trimmed memory to {len(deduped)} entries") +PYEOF +} + # ── 5. Read floor_before ─────────────────────────────────────────────────────── log "Reading floor before agent run..." FLOOR_BEFORE=$(compute_eth_per_token) log " floor_before = $FLOOR_BEFORE wei/token" # ── 6. Build agent prompt ────────────────────────────────────────────────────── + +# Build Previous Findings section from memory file +MEMORY_SECTION="" +if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then + MEMORY_SECTION=$(python3 -c " +import json, sys +entries = [] +with open('$MEMORY_FILE') as f: + for line in f: + line = line.strip() + if line: + entries.append(json.loads(line)) +if not entries: + sys.exit(0) +print('## Previous Findings (from earlier runs)') +print() +print('DO NOT repeat strategies marked HELD or INCREASED. Build on the insights.') +print('Try NEW combinations not yet attempted. Combine tools creatively.') +print() +for e in entries: + r = e.get('result','?') + emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️' + print(f\"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}\") + print(f\"Steps: {e.get('steps','?')}\") + print(f\"Delta: {e.get('delta_bps',0)} bps\") + if e.get('insight'): + print(f\"**Insight:** {e['insight']}\") + print() +") +fi + PROMPT=$(cat <"$REPORT" 2>&1 + --verbose --output-format stream-json \ + "$PROMPT" >"$STREAM_LOG" 2>&1 AGENT_EXIT=$? set -e if [[ $AGENT_EXIT -ne 0 ]]; then - log "WARNING: claude exited with code $AGENT_EXIT — see $REPORT for details" + log "WARNING: claude exited with code $AGENT_EXIT — see $STREAM_LOG for details" fi +# Extract readable text from stream-json for the report +python3 - "$STREAM_LOG" >"$REPORT" <<'PYEOF' +import json, sys +with open(sys.argv[1]) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + if obj.get("type") == "assistant": + for block in obj.get("message", {}).get("content", []): + if block.get("type") == "text": + print(block["text"], end="") + except: + pass +PYEOF + # ── 8. Read floor_after ──────────────────────────────────────────────────────── log "Reading floor after agent run..." FLOOR_AFTER=$(compute_eth_per_token) + +# ── 8a. Extract and persist strategy findings ────────────────────────────────── +log "Extracting strategy findings from agent output..." +extract_memory "$STREAM_LOG" log " floor_after = $FLOOR_AFTER wei/token" # ── 9. Summarise results ───────────────────────────────────────────────────────