From d1f75a790c8f685c266de14c2573af01fe51b58a Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 17 Mar 2026 17:30:47 +0000 Subject: [PATCH 1/2] fix: MEMORY_FILE trim may discard DECREASED entries before 4c extraction (#875) Before trimming MEMORY_FILE to 50 entries, export any entries that would be dropped (non-DECREASED entries outside the last 10) directly to CROSS_PATTERNS_FILE. This ensures no entries are permanently lost before red-team-sweep.sh step 4c reads the memory file. Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/red-team.sh | 33 +++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index c2ed250..34b6658 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -201,7 +201,7 @@ EOF run_num=1 fi - python3 - "$stream_file" "$memory_file" "$run_num" "$LM_ETH_BEFORE" "$CANDIDATE_NAME" "$OPTIMIZER_PROFILE" <<'PYEOF' + python3 - "$stream_file" "$memory_file" "$run_num" "$LM_ETH_BEFORE" "$CANDIDATE_NAME" "$OPTIMIZER_PROFILE" "$CROSS_PATTERNS_FILE" <<'PYEOF' import json, sys, re from datetime import datetime, timezone @@ -215,6 +215,7 @@ except (ValueError, IndexError): sys.exit(0) candidate = sys.argv[5] if len(sys.argv) > 5 else "unknown" optimizer_profile = sys.argv[6] if len(sys.argv) > 6 else "unknown" +cross_file = sys.argv[7] if len(sys.argv) > 7 else None def make_pattern(strategy_name, steps_text): """Extract abstract op sequence preserving execution order.""" @@ -380,6 +381,36 @@ if len(all_entries) > 50: if key not in seen: seen.add(key) deduped.append(e) + + # Export entries that would be dropped to cross-patterns before discarding them + if cross_file: + kept_keys = {(e.get("run"), e.get("ts"), e.get("strategy")) for e in deduped} + dropped = [e for e in all_entries if (e.get("run"), e.get("ts"), e.get("strategy")) not in kept_keys] + if dropped: + existing_cross_keys = set() + try: + with open(cross_file) as cf: + for line in cf: + line = line.strip() + if line: + try: + ce = json.loads(line) + existing_cross_keys.add((ce.get("pattern", ""), ce.get("candidate", ""), ce.get("result", ""))) + except Exception: + pass + except FileNotFoundError: + pass + exported = 0 + with open(cross_file, "a") as cf: + for e in dropped: + key = (e.get("pattern", ""), e.get("candidate", ""), e.get("result", "")) + if key not in existing_cross_keys: + existing_cross_keys.add(key) + cf.write(json.dumps(e) + "\n") + exported += 1 + if exported: + print(f" Pre-trim export: {exported} dropped entr{'y' if exported == 1 else 'ies'} saved to cross-patterns") + with open(memory_file, "w") as f: for e in deduped: f.write(json.dumps(e) + "\n") From 8b3fd340ac09b8eea0c603da2bb9a91eea764677 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 17 Mar 2026 17:58:41 +0000 Subject: [PATCH 2/2] fix: MEMORY_FILE trim may discard DECREASED entries before 4c extraction (#875) Address AI reviewer feedback on d1f75a7: - Wrap cross_file append in try/except so a write failure never prevents the memory trim-write from running (bug fix) - Stamp sweep_id on pre-trim exported entries using the SWEEP_ID env var; pass SWEEP_ID from red-team-sweep.sh so entries are attributable to a sweep run (data-consistency fix) - Add inline comment explaining the 3-tuple dedup key (run, ts, strategy) and its relationship to step-4c's identity check (clarity nit) Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/red-team-sweep.sh | 2 +- scripts/harb-evaluator/red-team.sh | 29 +++++++++++++++--------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/scripts/harb-evaluator/red-team-sweep.sh b/scripts/harb-evaluator/red-team-sweep.sh index 3b23826..a4550b5 100755 --- a/scripts/harb-evaluator/red-team-sweep.sh +++ b/scripts/harb-evaluator/red-team-sweep.sh @@ -116,7 +116,7 @@ PYEOF # 3. Run red-team.sh (handles bootstrap + compile + deploy + attack) log "Running red-team.sh (timeout: ${TIMEOUT_PER}s)..." set +e - CLAUDE_TIMEOUT="$TIMEOUT_PER" CANDIDATE_NAME="$seed_name" OPTIMIZER_PROFILE="$OPTIMIZER_PROFILE" \ + CLAUDE_TIMEOUT="$TIMEOUT_PER" CANDIDATE_NAME="$seed_name" OPTIMIZER_PROFILE="$OPTIMIZER_PROFILE" SWEEP_ID="$SWEEP_ID" \ timeout "$((TIMEOUT_PER + 120))" \ bash "$SCRIPT_DIR/red-team.sh" 2>&1 | tee "/tmp/red-team-${seed_name}.log" RED_TEAM_EXIT="${PIPESTATUS[0]}" diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index 34b6658..daedecd 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -202,7 +202,7 @@ EOF fi python3 - "$stream_file" "$memory_file" "$run_num" "$LM_ETH_BEFORE" "$CANDIDATE_NAME" "$OPTIMIZER_PROFILE" "$CROSS_PATTERNS_FILE" <<'PYEOF' -import json, sys, re +import json, os, sys, re from datetime import datetime, timezone stream_file = sys.argv[1] @@ -216,6 +216,7 @@ except (ValueError, IndexError): candidate = sys.argv[5] if len(sys.argv) > 5 else "unknown" optimizer_profile = sys.argv[6] if len(sys.argv) > 6 else "unknown" cross_file = sys.argv[7] if len(sys.argv) > 7 else None +sweep_id = os.environ.get("SWEEP_ID", "unknown") def make_pattern(strategy_name, steps_text): """Extract abstract op sequence preserving execution order.""" @@ -377,6 +378,8 @@ if len(all_entries) > 50: seen = set() deduped = [] for e in trimmed: + # 3-tuple key: run+ts uniquely identifies the extract_memory call; strategy + # distinguishes entries within the same call. Matches step-4c's identity check. key = (e.get("run"), e.get("ts"), e.get("strategy")) if key not in seen: seen.add(key) @@ -400,16 +403,20 @@ if len(all_entries) > 50: pass except FileNotFoundError: pass - exported = 0 - with open(cross_file, "a") as cf: - for e in dropped: - key = (e.get("pattern", ""), e.get("candidate", ""), e.get("result", "")) - if key not in existing_cross_keys: - existing_cross_keys.add(key) - cf.write(json.dumps(e) + "\n") - exported += 1 - if exported: - print(f" Pre-trim export: {exported} dropped entr{'y' if exported == 1 else 'ies'} saved to cross-patterns") + try: + exported = 0 + with open(cross_file, "a") as cf: + for e in dropped: + key = (e.get("pattern", ""), e.get("candidate", ""), e.get("result", "")) + if key not in existing_cross_keys: + existing_cross_keys.add(key) + e.setdefault("sweep_id", sweep_id) + cf.write(json.dumps(e) + "\n") + exported += 1 + if exported: + print(f" Pre-trim export: {exported} dropped entr{'y' if exported == 1 else 'ies'} saved to cross-patterns") + except Exception as ex: + print(f" WARNING: pre-trim export failed: {ex}", file=sys.stderr) with open(memory_file, "w") as f: for e in deduped: