diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index c2ed250..34b6658 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -201,7 +201,7 @@ EOF run_num=1 fi - python3 - "$stream_file" "$memory_file" "$run_num" "$LM_ETH_BEFORE" "$CANDIDATE_NAME" "$OPTIMIZER_PROFILE" <<'PYEOF' + python3 - "$stream_file" "$memory_file" "$run_num" "$LM_ETH_BEFORE" "$CANDIDATE_NAME" "$OPTIMIZER_PROFILE" "$CROSS_PATTERNS_FILE" <<'PYEOF' import json, sys, re from datetime import datetime, timezone @@ -215,6 +215,7 @@ except (ValueError, IndexError): sys.exit(0) candidate = sys.argv[5] if len(sys.argv) > 5 else "unknown" optimizer_profile = sys.argv[6] if len(sys.argv) > 6 else "unknown" +cross_file = sys.argv[7] if len(sys.argv) > 7 else None def make_pattern(strategy_name, steps_text): """Extract abstract op sequence preserving execution order.""" @@ -380,6 +381,36 @@ if len(all_entries) > 50: if key not in seen: seen.add(key) deduped.append(e) + + # Export entries that would be dropped to cross-patterns before discarding them + if cross_file: + kept_keys = {(e.get("run"), e.get("ts"), e.get("strategy")) for e in deduped} + dropped = [e for e in all_entries if (e.get("run"), e.get("ts"), e.get("strategy")) not in kept_keys] + if dropped: + existing_cross_keys = set() + try: + with open(cross_file) as cf: + for line in cf: + line = line.strip() + if line: + try: + ce = json.loads(line) + existing_cross_keys.add((ce.get("pattern", ""), ce.get("candidate", ""), ce.get("result", ""))) + except Exception: + pass + except FileNotFoundError: + pass + exported = 0 + with open(cross_file, "a") as cf: + for e in dropped: + key = (e.get("pattern", ""), e.get("candidate", ""), e.get("result", "")) + if key not in existing_cross_keys: + existing_cross_keys.add(key) + cf.write(json.dumps(e) + "\n") + exported += 1 + if exported: + print(f" Pre-trim export: {exported} dropped entr{'y' if exported == 1 else 'ies'} saved to cross-patterns") + with open(memory_file, "w") as f: for e in deduped: f.write(json.dumps(e) + "\n")