From c1db4cb93e3d367bf0e56416a0fef944885936da Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 9 Mar 2026 09:23:37 +0000 Subject: [PATCH 1/2] fix: Red-team memory: persistent cross-run learning for adversarial agent (#528) Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/red-team.sh | 194 ++++++++++++++++++++++++++++- 1 file changed, 192 insertions(+), 2 deletions(-) diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index 082f220..fba7f54 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -23,6 +23,8 @@ CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}" REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" REPORT_DIR="$REPO_ROOT/tmp" REPORT="$REPORT_DIR/red-team-report.txt" +STREAM_LOG="$REPORT_DIR/red-team-stream.jsonl" +MEMORY_FILE="$REPORT_DIR/red-team-memory.jsonl" DEPLOYMENTS="$REPO_ROOT/onchain/deployments-local.json" # ── Anvil accounts ───────────────────────────────────────────────────────────── @@ -154,12 +156,173 @@ print(0 if adj <= 0 else (e + w) * 10**18 // adj) PYEOF } +# ── Helper: extract strategy findings from stream-json and append to memory ──── +extract_memory() { + local stream_file="$1" + local run_num memory_file="$MEMORY_FILE" + + # Determine run number (rough: count existing entries) + if [[ -f "$memory_file" ]]; then + run_num=$(wc -l < "$memory_file") + run_num=$((run_num / 3 + 1)) + else + run_num=1 + fi + + python3 - "$stream_file" "$memory_file" "$run_num" "$FLOOR_BEFORE" <<'PYEOF' +import json, sys, re +from datetime import datetime, timezone + +stream_file = sys.argv[1] +memory_file = sys.argv[2] +run_num = int(sys.argv[3]) +floor_before = int(sys.argv[4]) + +texts = [] +with open(stream_file) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + if obj.get("type") == "assistant": + for block in obj.get("message", {}).get("content", []): + if block.get("type") == "text": + texts.append(block["text"]) + except: + pass + +# Parse strategies from agent text +strategies = [] +current = None +for text in texts: + # Detect strategy headers + strat_match = re.search(r"##\s*Strategy\s*\d+[^:]*:\s*(.+)", text) + if strat_match: + if current: + strategies.append(current) + current = { + "strategy": strat_match.group(1).strip(), + "steps": "", + "floor_after": None, + "insight": "" + } + + if current: + # Capture floor readings + floor_match = re.search(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE) + if floor_match: + current["floor_after"] = int(floor_match.group(1)) + + # Capture insights + for pattern in [r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)"]: + insight_match = re.search(pattern, text) + if insight_match and len(insight_match.group(1)) > 20: + current["insight"] = insight_match.group(1).strip()[:300] + + # Capture step summaries + if any(word in text.lower() for word in ["wrap", "buy", "sell", "stake", "recenter", "mint", "approve"]): + if len(text) < 200: + current["steps"] += text.strip() + "; " + +if current: + strategies.append(current) + +# Write to memory file +ts = datetime.now(timezone.utc).isoformat() +with open(memory_file, "a") as f: + for s in strategies: + fa = s.get("floor_after") or floor_before + delta_bps = (fa - floor_before) * 10000 // floor_before if floor_before else 0 + if fa < floor_before: + result = "DECREASED" + elif fa > floor_before: + result = "INCREASED" + else: + result = "HELD" + + entry = { + "run": run_num, + "ts": ts, + "strategy": s["strategy"][:100], + "steps": s["steps"][:300].rstrip("; "), + "floor_before": floor_before, + "floor_after": fa, + "delta_bps": delta_bps, + "result": result, + "insight": s["insight"][:300] + } + f.write(json.dumps(entry) + "\n") + print(f" Recorded: {entry['strategy']} → {result} ({delta_bps:+d} bps)") + +if not strategies: + print(" No strategies detected in stream output") + +# Trim memory file: keep 10 most recent + all DECREASED entries (cap at 50) +with open(memory_file) as f: + all_entries = [json.loads(l) for l in f if l.strip()] + +if len(all_entries) > 50: + decreased = [e for e in all_entries if e.get("result") == "DECREASED"] + recent = all_entries[-10:] + kept = {id(e): e for e in decreased + recent}.values() + # Preserve insertion order: filter all_entries keeping only kept ids + kept_set = set(id(e) for e in kept) + # Rebuild from original list preserving order + trimmed = [e for e in all_entries if e.get("result") == "DECREASED"] + all_entries[-10:] + # Deduplicate preserving order + seen = set() + deduped = [] + for e in trimmed: + key = (e.get("run"), e.get("ts"), e.get("strategy")) + if key not in seen: + seen.add(key) + deduped.append(e) + with open(memory_file, "w") as f: + for e in deduped: + f.write(json.dumps(e) + "\n") + print(f" Trimmed memory to {len(deduped)} entries") +PYEOF +} + # ── 5. Read floor_before ─────────────────────────────────────────────────────── log "Reading floor before agent run..." FLOOR_BEFORE=$(compute_eth_per_token) log " floor_before = $FLOOR_BEFORE wei/token" # ── 6. Build agent prompt ────────────────────────────────────────────────────── + +# Build Previous Findings section from memory file +MEMORY_SECTION="" +if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then + MEMORY_SECTION=$(python3 -c " +import json, sys +entries = [] +with open('$MEMORY_FILE') as f: + for line in f: + line = line.strip() + if line: + entries.append(json.loads(line)) +if not entries: + sys.exit(0) +print('## Previous Findings (from earlier runs)') +print() +print('DO NOT repeat strategies marked HELD or INCREASED. Build on the insights.') +print('Try NEW combinations not yet attempted. Combine tools creatively.') +print() +for e in entries: + r = e.get('result','?') + emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️' + print(f\"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}\") + print(f\"Steps: {e.get('steps','?')}\") + print(f\"Delta: {e.get('delta_bps',0)} bps\") + if e.get('insight'): + print(f\"**Insight:** {e['insight']}\") + print() +") +fi + PROMPT=$(cat <"$REPORT" 2>&1 + --verbose --output-format stream-json \ + "$PROMPT" >"$STREAM_LOG" 2>&1 AGENT_EXIT=$? set -e if [[ $AGENT_EXIT -ne 0 ]]; then - log "WARNING: claude exited with code $AGENT_EXIT — see $REPORT for details" + log "WARNING: claude exited with code $AGENT_EXIT — see $STREAM_LOG for details" fi +# Extract readable text from stream-json for the report +python3 - "$STREAM_LOG" >"$REPORT" <<'PYEOF' +import json, sys +with open(sys.argv[1]) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + if obj.get("type") == "assistant": + for block in obj.get("message", {}).get("content", []): + if block.get("type") == "text": + print(block["text"], end="") + except: + pass +PYEOF + # ── 8. Read floor_after ──────────────────────────────────────────────────────── log "Reading floor after agent run..." FLOOR_AFTER=$(compute_eth_per_token) + +# ── 8a. Extract and persist strategy findings ────────────────────────────────── +log "Extracting strategy findings from agent output..." +extract_memory "$STREAM_LOG" log " floor_after = $FLOOR_AFTER wei/token" # ── 9. Summarise results ─────────────────────────────────────────────────────── From 816b211c2b3dab4d1a83bb0a92c4bf6d4681cfda Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 9 Mar 2026 10:00:56 +0000 Subject: [PATCH 2/2] fix: address review findings in red-team memory (#528) Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/red-team.sh | 59 +++++++++++++++++------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index fba7f54..bcc4b04 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -161,10 +161,10 @@ extract_memory() { local stream_file="$1" local run_num memory_file="$MEMORY_FILE" - # Determine run number (rough: count existing entries) + # Determine run number: one entry per line in JSONL, so next run = line_count + 1 if [[ -f "$memory_file" ]]; then run_num=$(wc -l < "$memory_file") - run_num=$((run_num / 3 + 1)) + run_num=$((run_num + 1)) else run_num=1 fi @@ -176,7 +176,11 @@ from datetime import datetime, timezone stream_file = sys.argv[1] memory_file = sys.argv[2] run_num = int(sys.argv[3]) -floor_before = int(sys.argv[4]) +try: + floor_before = int(sys.argv[4]) +except (ValueError, IndexError): + print(" extract_memory: invalid floor_before value, skipping", file=sys.stderr) + sys.exit(0) texts = [] with open(stream_file) as f: @@ -197,8 +201,8 @@ with open(stream_file) as f: strategies = [] current = None for text in texts: - # Detect strategy headers - strat_match = re.search(r"##\s*Strategy\s*\d+[^:]*:\s*(.+)", text) + # Detect strategy headers: matches "## Strategy 1: name" and "STRATEGY 1: name" + strat_match = re.search(r"(?:##\s*)?[Ss][Tt][Rr][Aa][Tt][Ee][Gg][Yy]\s*\d+[^:]*:\s*(.+)", text) if strat_match: if current: strategies.append(current) @@ -210,10 +214,10 @@ for text in texts: } if current: - # Capture floor readings - floor_match = re.search(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE) - if floor_match: - current["floor_after"] = int(floor_match.group(1)) + # Capture floor readings — take the last match in the block (most recent value) + floor_matches = list(re.finditer(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE)) + if floor_matches: + current["floor_after"] = int(floor_matches[-1].group(1)) # Capture insights for pattern in [r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)"]: @@ -233,8 +237,8 @@ if current: ts = datetime.now(timezone.utc).isoformat() with open(memory_file, "a") as f: for s in strategies: - fa = s.get("floor_after") or floor_before - delta_bps = (fa - floor_before) * 10000 // floor_before if floor_before else 0 + fa = s["floor_after"] if s.get("floor_after") is not None else floor_before + delta_bps = round((fa - floor_before) * 10000 / floor_before) if floor_before else 0 if fa < floor_before: result = "DECREASED" elif fa > floor_before: @@ -264,14 +268,8 @@ with open(memory_file) as f: all_entries = [json.loads(l) for l in f if l.strip()] if len(all_entries) > 50: - decreased = [e for e in all_entries if e.get("result") == "DECREASED"] - recent = all_entries[-10:] - kept = {id(e): e for e in decreased + recent}.values() - # Preserve insertion order: filter all_entries keeping only kept ids - kept_set = set(id(e) for e in kept) - # Rebuild from original list preserving order + # Keep all DECREASED entries + 10 most recent; deduplicate preserving order trimmed = [e for e in all_entries if e.get("result") == "DECREASED"] + all_entries[-10:] - # Deduplicate preserving order seen = set() deduped = [] for e in trimmed: @@ -296,10 +294,10 @@ log " floor_before = $FLOOR_BEFORE wei/token" # Build Previous Findings section from memory file MEMORY_SECTION="" if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then - MEMORY_SECTION=$(python3 -c " + MEMORY_SECTION=$(python3 - "$MEMORY_FILE" <<'PYEOF' import json, sys entries = [] -with open('$MEMORY_FILE') as f: +with open(sys.argv[1]) as f: for line in f: line = line.strip() if line: @@ -312,15 +310,16 @@ print('DO NOT repeat strategies marked HELD or INCREASED. Build on the insights. print('Try NEW combinations not yet attempted. Combine tools creatively.') print() for e in entries: - r = e.get('result','?') + r = e.get('result', '?') emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️' - print(f\"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}\") - print(f\"Steps: {e.get('steps','?')}\") - print(f\"Delta: {e.get('delta_bps',0)} bps\") + print(f"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}") + print(f"Steps: {e.get('steps','?')}") + print(f"Delta: {e.get('delta_bps',0)} bps") if e.get('insight'): - print(f\"**Insight:** {e['insight']}\") + print(f"**Insight:** {e['insight']}") print() -") +PYEOF +) fi PROMPT=$(cat <"$STREAM_LOG" 2>&1 @@ -599,6 +600,12 @@ with open(sys.argv[1]) as f: pass PYEOF +# If the agent crashed and produced no readable output, treat as an infra error +# rather than silently reporting FLOOR HELD (a false pass). +if [[ $AGENT_EXIT -ne 0 && ! -s "$REPORT" ]]; then + die "claude agent failed (exit $AGENT_EXIT) with no readable output — see $STREAM_LOG" +fi + # ── 8. Read floor_after ──────────────────────────────────────────────────────── log "Reading floor after agent run..." FLOOR_AFTER=$(compute_eth_per_token)