From e7c60edeb6148d751a9b0ea628f8035ce209bd5a Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 15 Mar 2026 15:23:43 +0000 Subject: [PATCH 1/2] fix: feat: red-team memory should track candidate + abstract learnings (#820) - Add CANDIDATE_NAME and OPTIMIZER_PROFILE env vars to red-team.sh (defaults to "unknown" for standalone runs) - Update extract_memory Python: new fields candidate, optimizer_profile, pattern (abstract op sequence via make_pattern()), and improved insight extraction that also captures WHY explanations (because/since/due to) - Update MEMORY_SECTION Python: entries now grouped by candidate; universal patterns (DECREASED across multiple candidates) surfaced first - Update prompt: add "Current Attack Target" table with candidate/profile, optimizer parameter explanations (CI/AW/AS/DD behavioral impact), Rule 9 requiring pattern+insight per strategy, updated report format with Pattern/Insight fields and universal-pattern conclusion field - Update red-team-sweep.sh: after inject, parse OptimizerV3Push3.sol for r40/r39/r38/r37 constants to build OPTIMIZER_PROFILE string; pass CANDIDATE_NAME and OPTIMIZER_PROFILE as env vars to red-team.sh Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/red-team-sweep.sh | 35 ++++++- scripts/harb-evaluator/red-team.sh | 126 ++++++++++++++++++++--- 2 files changed, 147 insertions(+), 14 deletions(-) diff --git a/scripts/harb-evaluator/red-team-sweep.sh b/scripts/harb-evaluator/red-team-sweep.sh index 6459875..c0b3247 100755 --- a/scripts/harb-evaluator/red-team-sweep.sh +++ b/scripts/harb-evaluator/red-team-sweep.sh @@ -63,12 +63,45 @@ for seed_file in "${seeds[@]}"; do fi log "Injected into OptimizerV3.sol" + # 1b. Extract optimizer profile from transpiler output (CI/AW/AS/DD constants) + TRANSPILER_OUT="$REPO_ROOT/onchain/src/OptimizerV3Push3.sol" + OPTIMIZER_PROFILE=$(python3 - "$TRANSPILER_OUT" <<'PYEOF' +import re, sys +try: + with open(sys.argv[1]) as f: + sol = f.read() + ci_vals = set(re.findall(r'\br40\s*=\s*uint256\((\d+)\)', sol)) + aw_vals = set(re.findall(r'\br38\s*=\s*uint256\((\d+)\)', sol)) + as_vals = set(re.findall(r'\br39\s*=\s*uint256\((\d+)\)', sol)) + dd_vals = set(re.findall(r'\br37\s*=\s*uint256\((\d+)\)', sol)) + + def fmt_pct(vals): + pcts = sorted(set(round(int(v) * 100 / 1e18) for v in vals)) + return '/'.join(str(p) + '%' for p in pcts) if pcts else '?' + + def fmt_int(vals): + ints = sorted(set(int(v) for v in vals)) + return '/'.join(str(v) for v in ints) if ints else '?' + + profile = f"CI={fmt_pct(ci_vals)}, AW={fmt_int(aw_vals)}, AS={fmt_pct(as_vals)}, DD={fmt_pct(dd_vals)}" + if len(ci_vals) > 1 or len(aw_vals) > 1 or len(as_vals) > 1 or len(dd_vals) > 1: + profile += ", adaptive" + print(profile) +except Exception as e: + import sys as _sys + print(f"unknown (parse error: {e})", file=_sys.stderr) + print("unknown") +PYEOF + ) + log "Optimizer profile: $OPTIMIZER_PROFILE" + # 2. Clear stale attack file from previous candidate rm -f "$REPO_ROOT/tmp/red-team-attacks.jsonl" # 3. Run red-team.sh (handles bootstrap + compile + deploy + attack) log "Running red-team.sh (timeout: ${TIMEOUT_PER}s)..." - CLAUDE_TIMEOUT="$TIMEOUT_PER" timeout "$((TIMEOUT_PER + 120))" \ + CLAUDE_TIMEOUT="$TIMEOUT_PER" CANDIDATE_NAME="$seed_name" OPTIMIZER_PROFILE="$OPTIMIZER_PROFILE" \ + timeout "$((TIMEOUT_PER + 120))" \ bash "$SCRIPT_DIR/red-team.sh" 2>&1 | tee "/tmp/red-team-${seed_name}.log" || true # 4. Collect attacks diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index f04cbb7..a017d7a 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -30,6 +30,10 @@ ATTACK_EXPORT="$REPORT_DIR/red-team-attacks.jsonl" ATTACK_SNAPSHOTS="$REPORT_DIR/red-team-snapshots.jsonl" DEPLOYMENTS="$REPO_ROOT/onchain/deployments-local.json" +# ── Candidate metadata (set by red-team-sweep.sh; defaults to unknown for standalone runs) ─ +CANDIDATE_NAME="${CANDIDATE_NAME:-unknown}" +OPTIMIZER_PROFILE="${OPTIMIZER_PROFILE:-unknown}" + # ── Anvil accounts ───────────────────────────────────────────────────────────── # Account 8 — adversary (10k ETH, 0 KRK) ADV_PK=0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97 @@ -193,7 +197,7 @@ extract_memory() { run_num=1 fi - python3 - "$stream_file" "$memory_file" "$run_num" "$LM_ETH_BEFORE" <<'PYEOF' + python3 - "$stream_file" "$memory_file" "$run_num" "$LM_ETH_BEFORE" "$CANDIDATE_NAME" "$OPTIMIZER_PROFILE" <<'PYEOF' import json, sys, re from datetime import datetime, timezone @@ -205,6 +209,35 @@ try: except (ValueError, IndexError): print(" extract_memory: invalid lm_eth_before value, skipping", file=sys.stderr) sys.exit(0) +candidate = sys.argv[5] if len(sys.argv) > 5 else "unknown" +optimizer_profile = sys.argv[6] if len(sys.argv) > 6 else "unknown" + +def make_pattern(strategy_name, steps_text): + """Extract abstract op sequence: buy → stake → recenter → sell.""" + text = (strategy_name + " " + steps_text).lower() + ops = [] + if "wrap" in text: + ops.append("wrap") + if "buy" in text: + ops.append("buy") + stake_pos = text.find("stake") + unstake_pos = text.find("unstake") + if stake_pos >= 0 and (unstake_pos < 0 or stake_pos < unstake_pos): + ops.append("stake_all" if "all" in text[max(0, stake_pos-10):stake_pos+20] else "stake") + recenters = len(re.findall(r"\brecenter\b", text)) + if recenters == 1: + ops.append("recenter") + elif recenters > 1: + ops.append("recenter_multi") + if unstake_pos >= 0: + ops.append("unstake") + if "sell" in text: + ops.append("sell") + if "add_lp" in text or ("mint" in text and ("lp" in text or "liquidity" in text)): + ops.append("add_lp") + if "remove_lp" in text or "decreaseliquidity" in text: + ops.append("remove_lp") + return " → ".join(ops) if ops else strategy_name[:60] texts = [] with open(stream_file) as f: @@ -243,11 +276,18 @@ for text in texts: if floor_matches: current["lm_eth_after"] = int(floor_matches[-1].group(1)) - # Capture insights - for pattern in [r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)"]: - insight_match = re.search(pattern, text) + # Capture insights — prefer explicit labels, then WHY explanations + for ins_pat in [ + r"[Kk]ey [Ii]nsight:\s*(.+)", + r"[Ii]nsight:\s*(.+)", + r"[Ww][Hh][Yy][^:]*:\s*(.{30,})", + r"(?:because|since|due to)\s+(.{30,})", + r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)" + ]: + insight_match = re.search(ins_pat, text) if insight_match and len(insight_match.group(1)) > 20: current["insight"] = insight_match.group(1).strip()[:300] + break # Capture step summaries if any(word in text.lower() for word in ["wrap", "buy", "sell", "stake", "recenter", "mint", "approve"]): @@ -270,10 +310,14 @@ with open(memory_file, "a") as f: else: result = "HELD" + pattern = make_pattern(s["strategy"], s["steps"]) entry = { "run": run_num, "ts": ts, + "candidate": candidate, + "optimizer_profile": optimizer_profile, "strategy": s["strategy"][:100], + "pattern": pattern[:150], "steps": s["steps"][:300].rstrip("; "), "lm_eth_before": lm_eth_before, "lm_eth_after": fa, @@ -282,7 +326,7 @@ with open(memory_file, "a") as f: "insight": s["insight"][:300] } f.write(json.dumps(entry) + "\n") - print(f" Recorded: {entry['strategy']} → {result} ({delta_bps:+d} bps)") + print(f" Recorded: {entry['strategy']} [{entry['candidate']}] → {result} ({delta_bps:+d} bps)") if not strategies: print(" No strategies detected in stream output") @@ -329,6 +373,7 @@ MEMORY_SECTION="" if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then MEMORY_SECTION=$(python3 - "$MEMORY_FILE" <<'PYEOF' import json, sys +from collections import defaultdict entries = [] with open(sys.argv[1]) as f: for line in f: @@ -340,17 +385,47 @@ if not entries: print('## Previous Findings (from earlier runs)') print() print('DO NOT repeat strategies marked HELD or INCREASED. Build on the insights.') +print('Distinguish optimizer-specific vulnerabilities from universal patterns.') print('Try NEW combinations not yet attempted. Combine tools creatively.') print() -for e in entries: - r = e.get('result', '?') - emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️' - print(f"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}") - print(f"Steps: {e.get('steps','?')}") - print(f"Delta: {e.get('delta_bps',0)} bps") - if e.get('insight'): - print(f"**Insight:** {e['insight']}") + +# Cross-candidate: patterns that DECREASED in multiple distinct candidates +decreased = [e for e in entries if e.get('result') == 'DECREASED'] +cross = defaultdict(set) +for e in decreased: + key = e.get('pattern') or e.get('strategy', '') + cross[key].add(e.get('candidate', 'unknown')) +universal = [(p, cands) for p, cands in cross.items() if len(cands) > 1] +if universal: + print('### Universal Patterns (succeeded across multiple candidates)') + for pat, cands in universal: + print(f"- **{pat}** — worked on: {', '.join(sorted(cands))}") print() + +# Group remaining entries by candidate +by_candidate = defaultdict(list) +for e in entries: + by_candidate[e.get('candidate', 'unknown')].append(e) + +for cand, cand_entries in sorted(by_candidate.items()): + prof = next((e.get('optimizer_profile', '') for e in cand_entries + if e.get('optimizer_profile', '') not in ('', 'unknown')), '') + print(f"### Candidate: {cand}") + if prof: + print(f"Profile: {prof}") + print() + for e in cand_entries: + r = e.get('result', '?') + emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️' + pat = e.get('pattern', '') + print(f"#### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}") + if pat: + print(f"Pattern: `{pat}`") + print(f"Steps: {e.get('steps','?')}") + print(f"Delta: {e.get('delta_bps',0)} bps") + if e.get('insight'): + print(f"**Insight:** {e['insight']}") + print() PYEOF ) fi @@ -371,6 +446,21 @@ The metric is simple: if LM total ETH goes down, you win. --- +## Current Attack Target + +| Field | Value | +|-------|-------| +| Candidate | ${CANDIDATE_NAME} | +| Optimizer Profile | ${OPTIMIZER_PROFILE} | + +Use the optimizer profile to reason about this candidate's behavior: +- **CI** (concentration index %): higher → optimizer recenters more aggressively → more KRK minting opportunities +- **AW** (anchorWidth ticks): wider → liquidity spread over larger price range → less ETH per tick +- **AS** (anchorShare %): higher → more ETH locked in anchor position → different rebalancing behavior +- **DD** (discoveryDepth %): higher → more ETH in discovery position (above-price) → price-sensitive exposure + +--- + ## Contract addresses (local Anvil) | Contract | Address | @@ -649,6 +739,11 @@ SNAP=\$(/home/debian/.foundry/bin/cast rpc anvil_snapshot --rpc-url http://local 6. If Previous Findings are provided, DO NOT repeat those strategies. Use their insights to design new approaches. 7. Prioritize untried COMBINATIONS: staking + LP, staking + recenter timing, LP + multi-step swaps, etc. 8. Start executing immediately. No lengthy planning — act, measure, iterate. +9. For EVERY strategy attempted, record: + - **Pattern**: abstract op sequence (e.g., "buy → stake_all → recenter_multi → unstake → sell") + - **Insight**: WHY this worked or failed, referencing the optimizer profile (${OPTIMIZER_PROFILE}). + For HELD/INCREASED: which mechanism defended the floor? How did CI/AW/AS/DD cause it? + For DECREASED: which parameter combination created the vulnerability? Is it universal or optimizer-specific? --- @@ -661,12 +756,16 @@ After trying all strategies, output a clearly structured report: \`\`\` === RED-TEAM REPORT === +Candidate: ${CANDIDATE_NAME} +Optimizer Profile: ${OPTIMIZER_PROFILE} lm_eth_before: wei (total: free + positions) STRATEGY 1: + Pattern: Steps: lm_eth_after: wei Result: ETH_EXTRACTED / ETH_SAFE / ETH_GAINED + Insight: STRATEGY 2: ... ... @@ -674,6 +773,7 @@ STRATEGY 2: ... === CONCLUSION === ETH extracted: YES / NO Winning strategy: +Universal pattern: lm_eth_before: ${LM_ETH_BEFORE} wei lm_eth_after: wei \`\`\` From 7950608179ede8688a99dac62e0c4f516d811503 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 15 Mar 2026 15:54:01 +0000 Subject: [PATCH 2/2] fix: address review findings for red-team memory tracking (#820) - make_pattern: replace text.find('stake')/find('unstake') with re.search(r'\bstake\b')/re.search(r'\bunstake\b') so 'stake' is never found as a substring of 'unstake' (bug #1) - make_pattern: track first-occurrence position of each op and sort by position before building the sequence string, preserving actual execution order instead of a hardcoded canonical order (bug #2) - insight capture: track insight_pri on the current dict; only overwrite stored insight when new match has strictly higher priority (lower index), preventing a late 'because...' clause from silently replacing an earlier 'Key Insight:' capture (warning #3) - run_num: compute max(run)+1 from JSON entries instead of wc -l so run numbers stay monotonically increasing after memory trim (info #4) - red-team-sweep.sh: also set adaptive flag when any r37-r40 register has a variable-form assignment (r40 = uint256(someVar)), catching candidates where only one branch uses constants (warning #5) - red-team-sweep.sh: remove unnecessary 'import sys as _sys' in except block; sys is already in scope (nit #6) Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/red-team-sweep.sh | 7 +- scripts/harb-evaluator/red-team.sh | 90 ++++++++++++++++-------- 2 files changed, 65 insertions(+), 32 deletions(-) diff --git a/scripts/harb-evaluator/red-team-sweep.sh b/scripts/harb-evaluator/red-team-sweep.sh index c0b3247..339b18f 100755 --- a/scripts/harb-evaluator/red-team-sweep.sh +++ b/scripts/harb-evaluator/red-team-sweep.sh @@ -84,12 +84,13 @@ try: return '/'.join(str(v) for v in ints) if ints else '?' profile = f"CI={fmt_pct(ci_vals)}, AW={fmt_int(aw_vals)}, AS={fmt_pct(as_vals)}, DD={fmt_pct(dd_vals)}" - if len(ci_vals) > 1 or len(aw_vals) > 1 or len(as_vals) > 1 or len(dd_vals) > 1: + # Adaptive: multiple constant branches, OR any register assigned from a variable + has_var_assign = bool(re.search(r'\br(?:37|38|39|40)\s*=\s*uint256\s*\(\s*[a-zA-Z_]\w*\s*\)', sol)) + if len(ci_vals) > 1 or len(aw_vals) > 1 or len(as_vals) > 1 or len(dd_vals) > 1 or has_var_assign: profile += ", adaptive" print(profile) except Exception as e: - import sys as _sys - print(f"unknown (parse error: {e})", file=_sys.stderr) + print(f"unknown (parse error: {e})", file=sys.stderr) print("unknown") PYEOF ) diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index a017d7a..ff581bf 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -189,10 +189,15 @@ extract_memory() { local stream_file="$1" local run_num memory_file="$MEMORY_FILE" - # Determine run number: one entry per line in JSONL, so next run = line_count + 1 + # Determine run number: use max run in file + 1 so it stays monotonic after trim if [[ -f "$memory_file" ]]; then - run_num=$(wc -l < "$memory_file") - run_num=$((run_num + 1)) + run_num=$(python3 - "$memory_file" <<'EOF' +import json, sys +entries = [json.loads(l) for l in open(sys.argv[1]) if l.strip()] +print(max((e.get('run', 0) for e in entries), default=0) + 1) +EOF +) + [[ "$run_num" =~ ^[0-9]+$ ]] || run_num=1 else run_num=1 fi @@ -213,30 +218,53 @@ candidate = sys.argv[5] if len(sys.argv) > 5 else "unknown" optimizer_profile = sys.argv[6] if len(sys.argv) > 6 else "unknown" def make_pattern(strategy_name, steps_text): - """Extract abstract op sequence: buy → stake → recenter → sell.""" + """Extract abstract op sequence preserving execution order.""" text = (strategy_name + " " + steps_text).lower() + op_positions = [] + + for kw, label in [("wrap", "wrap"), ("buy", "buy"), ("sell", "sell")]: + m = re.search(r'\b' + kw + r'\b', text) + if m: + op_positions.append((m.start(), label)) + + # Use word boundaries so 'stake' never matches inside 'unstake' + m_stake = re.search(r'\bstake\b', text) + if m_stake: + ctx = text[max(0, m_stake.start() - 10):m_stake.start() + 20] + op_positions.append((m_stake.start(), "stake_all" if "all" in ctx else "stake")) + + m_unstake = re.search(r'\bunstake\b', text) + if m_unstake: + op_positions.append((m_unstake.start(), "unstake")) + + recenter_matches = list(re.finditer(r'\brecenter\b', text)) + if recenter_matches: + label = "recenter" if len(recenter_matches) == 1 else "recenter_multi" + op_positions.append((recenter_matches[0].start(), label)) + + # add_lp: keyword or mint + LP context + m = re.search(r'\badd_lp\b', text) + if m: + op_positions.append((m.start(), "add_lp")) + elif re.search(r'\bmint\b', text) and ("lp" in text or "liquidity" in text): + m = re.search(r'\bmint\b', text) + op_positions.append((m.start(), "add_lp")) + + # remove_lp: keyword or decreaseliquidity + for pat in [r'\bremove_lp\b', r'\bdecreaseliquidity\b']: + m = re.search(pat, text) + if m: + op_positions.append((m.start(), "remove_lp")) + break + + # Sort by first occurrence position to reflect actual execution order + op_positions.sort(key=lambda x: x[0]) + seen = set() ops = [] - if "wrap" in text: - ops.append("wrap") - if "buy" in text: - ops.append("buy") - stake_pos = text.find("stake") - unstake_pos = text.find("unstake") - if stake_pos >= 0 and (unstake_pos < 0 or stake_pos < unstake_pos): - ops.append("stake_all" if "all" in text[max(0, stake_pos-10):stake_pos+20] else "stake") - recenters = len(re.findall(r"\brecenter\b", text)) - if recenters == 1: - ops.append("recenter") - elif recenters > 1: - ops.append("recenter_multi") - if unstake_pos >= 0: - ops.append("unstake") - if "sell" in text: - ops.append("sell") - if "add_lp" in text or ("mint" in text and ("lp" in text or "liquidity" in text)): - ops.append("add_lp") - if "remove_lp" in text or "decreaseliquidity" in text: - ops.append("remove_lp") + for _, label in op_positions: + if label not in seen: + seen.add(label) + ops.append(label) return " → ".join(ops) if ops else strategy_name[:60] texts = [] @@ -267,7 +295,8 @@ for text in texts: "strategy": strat_match.group(1).strip(), "steps": "", "lm_eth_after": None, - "insight": "" + "insight": "", + "insight_pri": 999 # tracks priority of stored insight; lower index wins } if current: @@ -276,17 +305,20 @@ for text in texts: if floor_matches: current["lm_eth_after"] = int(floor_matches[-1].group(1)) - # Capture insights — prefer explicit labels, then WHY explanations - for ins_pat in [ + # Capture insights — prefer explicit labels; only overwrite if new match is higher priority + for pri, ins_pat in enumerate([ r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"[Ww][Hh][Yy][^:]*:\s*(.{30,})", r"(?:because|since|due to)\s+(.{30,})", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)" - ]: + ]): + if pri >= current["insight_pri"]: + break # already have a higher-priority insight stored insight_match = re.search(ins_pat, text) if insight_match and len(insight_match.group(1)) > 20: current["insight"] = insight_match.group(1).strip()[:300] + current["insight_pri"] = pri break # Capture step summaries