diff --git a/scripts/harb-evaluator/red-team-sweep.sh b/scripts/harb-evaluator/red-team-sweep.sh index a41b9e2..d52ec7e 100755 --- a/scripts/harb-evaluator/red-team-sweep.sh +++ b/scripts/harb-evaluator/red-team-sweep.sh @@ -116,7 +116,7 @@ PYEOF # 3. Run red-team.sh (handles bootstrap + compile + deploy + attack) log "Running red-team.sh (timeout: ${TIMEOUT_PER}s)..." set +e - CLAUDE_TIMEOUT="$TIMEOUT_PER" CANDIDATE_NAME="$seed_name" OPTIMIZER_PROFILE="$OPTIMIZER_PROFILE" \ + CLAUDE_TIMEOUT="$TIMEOUT_PER" CANDIDATE_NAME="$seed_name" OPTIMIZER_PROFILE="$OPTIMIZER_PROFILE" SWEEP_ID="$SWEEP_ID" \ timeout "$((TIMEOUT_PER + 120))" \ bash "$SCRIPT_DIR/red-team.sh" 2>&1 | tee "/tmp/red-team-${seed_name}.log" RED_TEAM_EXIT="${PIPESTATUS[0]}" diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index c2ed250..daedecd 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -201,8 +201,8 @@ EOF run_num=1 fi - python3 - "$stream_file" "$memory_file" "$run_num" "$LM_ETH_BEFORE" "$CANDIDATE_NAME" "$OPTIMIZER_PROFILE" <<'PYEOF' -import json, sys, re + python3 - "$stream_file" "$memory_file" "$run_num" "$LM_ETH_BEFORE" "$CANDIDATE_NAME" "$OPTIMIZER_PROFILE" "$CROSS_PATTERNS_FILE" <<'PYEOF' +import json, os, sys, re from datetime import datetime, timezone stream_file = sys.argv[1] @@ -215,6 +215,8 @@ except (ValueError, IndexError): sys.exit(0) candidate = sys.argv[5] if len(sys.argv) > 5 else "unknown" optimizer_profile = sys.argv[6] if len(sys.argv) > 6 else "unknown" +cross_file = sys.argv[7] if len(sys.argv) > 7 else None +sweep_id = os.environ.get("SWEEP_ID", "unknown") def make_pattern(strategy_name, steps_text): """Extract abstract op sequence preserving execution order.""" @@ -376,10 +378,46 @@ if len(all_entries) > 50: seen = set() deduped = [] for e in trimmed: + # 3-tuple key: run+ts uniquely identifies the extract_memory call; strategy + # distinguishes entries within the same call. Matches step-4c's identity check. key = (e.get("run"), e.get("ts"), e.get("strategy")) if key not in seen: seen.add(key) deduped.append(e) + + # Export entries that would be dropped to cross-patterns before discarding them + if cross_file: + kept_keys = {(e.get("run"), e.get("ts"), e.get("strategy")) for e in deduped} + dropped = [e for e in all_entries if (e.get("run"), e.get("ts"), e.get("strategy")) not in kept_keys] + if dropped: + existing_cross_keys = set() + try: + with open(cross_file) as cf: + for line in cf: + line = line.strip() + if line: + try: + ce = json.loads(line) + existing_cross_keys.add((ce.get("pattern", ""), ce.get("candidate", ""), ce.get("result", ""))) + except Exception: + pass + except FileNotFoundError: + pass + try: + exported = 0 + with open(cross_file, "a") as cf: + for e in dropped: + key = (e.get("pattern", ""), e.get("candidate", ""), e.get("result", "")) + if key not in existing_cross_keys: + existing_cross_keys.add(key) + e.setdefault("sweep_id", sweep_id) + cf.write(json.dumps(e) + "\n") + exported += 1 + if exported: + print(f" Pre-trim export: {exported} dropped entr{'y' if exported == 1 else 'ies'} saved to cross-patterns") + except Exception as ex: + print(f" WARNING: pre-trim export failed: {ex}", file=sys.stderr) + with open(memory_file, "w") as f: for e in deduped: f.write(json.dumps(e) + "\n")