From 8b3fd340ac09b8eea0c603da2bb9a91eea764677 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 17 Mar 2026 17:58:41 +0000 Subject: [PATCH] fix: MEMORY_FILE trim may discard DECREASED entries before 4c extraction (#875) Address AI reviewer feedback on d1f75a7: - Wrap cross_file append in try/except so a write failure never prevents the memory trim-write from running (bug fix) - Stamp sweep_id on pre-trim exported entries using the SWEEP_ID env var; pass SWEEP_ID from red-team-sweep.sh so entries are attributable to a sweep run (data-consistency fix) - Add inline comment explaining the 3-tuple dedup key (run, ts, strategy) and its relationship to step-4c's identity check (clarity nit) Co-Authored-By: Claude Sonnet 4.6 --- scripts/harb-evaluator/red-team-sweep.sh | 2 +- scripts/harb-evaluator/red-team.sh | 29 +++++++++++++++--------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/scripts/harb-evaluator/red-team-sweep.sh b/scripts/harb-evaluator/red-team-sweep.sh index 3b23826..a4550b5 100755 --- a/scripts/harb-evaluator/red-team-sweep.sh +++ b/scripts/harb-evaluator/red-team-sweep.sh @@ -116,7 +116,7 @@ PYEOF # 3. Run red-team.sh (handles bootstrap + compile + deploy + attack) log "Running red-team.sh (timeout: ${TIMEOUT_PER}s)..." set +e - CLAUDE_TIMEOUT="$TIMEOUT_PER" CANDIDATE_NAME="$seed_name" OPTIMIZER_PROFILE="$OPTIMIZER_PROFILE" \ + CLAUDE_TIMEOUT="$TIMEOUT_PER" CANDIDATE_NAME="$seed_name" OPTIMIZER_PROFILE="$OPTIMIZER_PROFILE" SWEEP_ID="$SWEEP_ID" \ timeout "$((TIMEOUT_PER + 120))" \ bash "$SCRIPT_DIR/red-team.sh" 2>&1 | tee "/tmp/red-team-${seed_name}.log" RED_TEAM_EXIT="${PIPESTATUS[0]}" diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index 34b6658..daedecd 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -202,7 +202,7 @@ EOF fi python3 - "$stream_file" "$memory_file" "$run_num" "$LM_ETH_BEFORE" "$CANDIDATE_NAME" "$OPTIMIZER_PROFILE" "$CROSS_PATTERNS_FILE" <<'PYEOF' -import json, sys, re +import json, os, sys, re from datetime import datetime, timezone stream_file = sys.argv[1] @@ -216,6 +216,7 @@ except (ValueError, IndexError): candidate = sys.argv[5] if len(sys.argv) > 5 else "unknown" optimizer_profile = sys.argv[6] if len(sys.argv) > 6 else "unknown" cross_file = sys.argv[7] if len(sys.argv) > 7 else None +sweep_id = os.environ.get("SWEEP_ID", "unknown") def make_pattern(strategy_name, steps_text): """Extract abstract op sequence preserving execution order.""" @@ -377,6 +378,8 @@ if len(all_entries) > 50: seen = set() deduped = [] for e in trimmed: + # 3-tuple key: run+ts uniquely identifies the extract_memory call; strategy + # distinguishes entries within the same call. Matches step-4c's identity check. key = (e.get("run"), e.get("ts"), e.get("strategy")) if key not in seen: seen.add(key) @@ -400,16 +403,20 @@ if len(all_entries) > 50: pass except FileNotFoundError: pass - exported = 0 - with open(cross_file, "a") as cf: - for e in dropped: - key = (e.get("pattern", ""), e.get("candidate", ""), e.get("result", "")) - if key not in existing_cross_keys: - existing_cross_keys.add(key) - cf.write(json.dumps(e) + "\n") - exported += 1 - if exported: - print(f" Pre-trim export: {exported} dropped entr{'y' if exported == 1 else 'ies'} saved to cross-patterns") + try: + exported = 0 + with open(cross_file, "a") as cf: + for e in dropped: + key = (e.get("pattern", ""), e.get("candidate", ""), e.get("result", "")) + if key not in existing_cross_keys: + existing_cross_keys.add(key) + e.setdefault("sweep_id", sweep_id) + cf.write(json.dumps(e) + "\n") + exported += 1 + if exported: + print(f" Pre-trim export: {exported} dropped entr{'y' if exported == 1 else 'ies'} saved to cross-patterns") + except Exception as ex: + print(f" WARNING: pre-trim export failed: {ex}", file=sys.stderr) with open(memory_file, "w") as f: for e in deduped: