From 7950608179ede8688a99dac62e0c4f516d811503 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 15 Mar 2026 15:54:01 +0000 Subject: [PATCH] fix: address review findings for red-team memory tracking (#820) - make_pattern: replace text.find('stake')/find('unstake') with re.search(r'\bstake\b')/re.search(r'\bunstake\b') so 'stake' is never found as a substring of 'unstake' (bug #1) - make_pattern: track first-occurrence position of each op and sort by position before building the sequence string, preserving actual execution order instead of a hardcoded canonical order (bug #2) - insight capture: track insight_pri on the current dict; only overwrite stored insight when new match has strictly higher priority (lower index), preventing a late 'because...' clause from silently replacing an earlier 'Key Insight:' capture (warning #3) - run_num: compute max(run)+1 from JSON entries instead of wc -l so run numbers stay monotonically increasing after memory trim (info #4) - red-team-sweep.sh: also set adaptive flag when any r37-r40 register has a variable-form assignment (r40 = uint256(someVar)), catching candidates where only one branch uses constants (warning #5) - red-team-sweep.sh: remove unnecessary 'import sys as _sys' in except block; sys is already in scope (nit #6) Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/red-team-sweep.sh | 7 +- scripts/harb-evaluator/red-team.sh | 90 ++++++++++++++++-------- 2 files changed, 65 insertions(+), 32 deletions(-) diff --git a/scripts/harb-evaluator/red-team-sweep.sh b/scripts/harb-evaluator/red-team-sweep.sh index c0b3247..339b18f 100755 --- a/scripts/harb-evaluator/red-team-sweep.sh +++ b/scripts/harb-evaluator/red-team-sweep.sh @@ -84,12 +84,13 @@ try: return '/'.join(str(v) for v in ints) if ints else '?' profile = f"CI={fmt_pct(ci_vals)}, AW={fmt_int(aw_vals)}, AS={fmt_pct(as_vals)}, DD={fmt_pct(dd_vals)}" - if len(ci_vals) > 1 or len(aw_vals) > 1 or len(as_vals) > 1 or len(dd_vals) > 1: + # Adaptive: multiple constant branches, OR any register assigned from a variable + has_var_assign = bool(re.search(r'\br(?:37|38|39|40)\s*=\s*uint256\s*\(\s*[a-zA-Z_]\w*\s*\)', sol)) + if len(ci_vals) > 1 or len(aw_vals) > 1 or len(as_vals) > 1 or len(dd_vals) > 1 or has_var_assign: profile += ", adaptive" print(profile) except Exception as e: - import sys as _sys - print(f"unknown (parse error: {e})", file=_sys.stderr) + print(f"unknown (parse error: {e})", file=sys.stderr) print("unknown") PYEOF ) diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index a017d7a..ff581bf 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -189,10 +189,15 @@ extract_memory() { local stream_file="$1" local run_num memory_file="$MEMORY_FILE" - # Determine run number: one entry per line in JSONL, so next run = line_count + 1 + # Determine run number: use max run in file + 1 so it stays monotonic after trim if [[ -f "$memory_file" ]]; then - run_num=$(wc -l < "$memory_file") - run_num=$((run_num + 1)) + run_num=$(python3 - "$memory_file" <<'EOF' +import json, sys +entries = [json.loads(l) for l in open(sys.argv[1]) if l.strip()] +print(max((e.get('run', 0) for e in entries), default=0) + 1) +EOF +) + [[ "$run_num" =~ ^[0-9]+$ ]] || run_num=1 else run_num=1 fi @@ -213,30 +218,53 @@ candidate = sys.argv[5] if len(sys.argv) > 5 else "unknown" optimizer_profile = sys.argv[6] if len(sys.argv) > 6 else "unknown" def make_pattern(strategy_name, steps_text): - """Extract abstract op sequence: buy → stake → recenter → sell.""" + """Extract abstract op sequence preserving execution order.""" text = (strategy_name + " " + steps_text).lower() + op_positions = [] + + for kw, label in [("wrap", "wrap"), ("buy", "buy"), ("sell", "sell")]: + m = re.search(r'\b' + kw + r'\b', text) + if m: + op_positions.append((m.start(), label)) + + # Use word boundaries so 'stake' never matches inside 'unstake' + m_stake = re.search(r'\bstake\b', text) + if m_stake: + ctx = text[max(0, m_stake.start() - 10):m_stake.start() + 20] + op_positions.append((m_stake.start(), "stake_all" if "all" in ctx else "stake")) + + m_unstake = re.search(r'\bunstake\b', text) + if m_unstake: + op_positions.append((m_unstake.start(), "unstake")) + + recenter_matches = list(re.finditer(r'\brecenter\b', text)) + if recenter_matches: + label = "recenter" if len(recenter_matches) == 1 else "recenter_multi" + op_positions.append((recenter_matches[0].start(), label)) + + # add_lp: keyword or mint + LP context + m = re.search(r'\badd_lp\b', text) + if m: + op_positions.append((m.start(), "add_lp")) + elif re.search(r'\bmint\b', text) and ("lp" in text or "liquidity" in text): + m = re.search(r'\bmint\b', text) + op_positions.append((m.start(), "add_lp")) + + # remove_lp: keyword or decreaseliquidity + for pat in [r'\bremove_lp\b', r'\bdecreaseliquidity\b']: + m = re.search(pat, text) + if m: + op_positions.append((m.start(), "remove_lp")) + break + + # Sort by first occurrence position to reflect actual execution order + op_positions.sort(key=lambda x: x[0]) + seen = set() ops = [] - if "wrap" in text: - ops.append("wrap") - if "buy" in text: - ops.append("buy") - stake_pos = text.find("stake") - unstake_pos = text.find("unstake") - if stake_pos >= 0 and (unstake_pos < 0 or stake_pos < unstake_pos): - ops.append("stake_all" if "all" in text[max(0, stake_pos-10):stake_pos+20] else "stake") - recenters = len(re.findall(r"\brecenter\b", text)) - if recenters == 1: - ops.append("recenter") - elif recenters > 1: - ops.append("recenter_multi") - if unstake_pos >= 0: - ops.append("unstake") - if "sell" in text: - ops.append("sell") - if "add_lp" in text or ("mint" in text and ("lp" in text or "liquidity" in text)): - ops.append("add_lp") - if "remove_lp" in text or "decreaseliquidity" in text: - ops.append("remove_lp") + for _, label in op_positions: + if label not in seen: + seen.add(label) + ops.append(label) return " → ".join(ops) if ops else strategy_name[:60] texts = [] @@ -267,7 +295,8 @@ for text in texts: "strategy": strat_match.group(1).strip(), "steps": "", "lm_eth_after": None, - "insight": "" + "insight": "", + "insight_pri": 999 # tracks priority of stored insight; lower index wins } if current: @@ -276,17 +305,20 @@ for text in texts: if floor_matches: current["lm_eth_after"] = int(floor_matches[-1].group(1)) - # Capture insights — prefer explicit labels, then WHY explanations - for ins_pat in [ + # Capture insights — prefer explicit labels; only overwrite if new match is higher priority + for pri, ins_pat in enumerate([ r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"[Ww][Hh][Yy][^:]*:\s*(.{30,})", r"(?:because|since|due to)\s+(.{30,})", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)" - ]: + ]): + if pri >= current["insight_pri"]: + break # already have a higher-priority insight stored insight_match = re.search(ins_pat, text) if insight_match and len(insight_match.group(1)) > 20: current["insight"] = insight_match.group(1).strip()[:300] + current["insight_pri"] = pri break # Capture step summaries