diff --git a/scripts/harb-evaluator/red-team.sh b/scripts/harb-evaluator/red-team.sh index fba7f54..bcc4b04 100755 --- a/scripts/harb-evaluator/red-team.sh +++ b/scripts/harb-evaluator/red-team.sh @@ -161,10 +161,10 @@ extract_memory() { local stream_file="$1" local run_num memory_file="$MEMORY_FILE" - # Determine run number (rough: count existing entries) + # Determine run number: one entry per line in JSONL, so next run = line_count + 1 if [[ -f "$memory_file" ]]; then run_num=$(wc -l < "$memory_file") - run_num=$((run_num / 3 + 1)) + run_num=$((run_num + 1)) else run_num=1 fi @@ -176,7 +176,11 @@ from datetime import datetime, timezone stream_file = sys.argv[1] memory_file = sys.argv[2] run_num = int(sys.argv[3]) -floor_before = int(sys.argv[4]) +try: + floor_before = int(sys.argv[4]) +except (ValueError, IndexError): + print(" extract_memory: invalid floor_before value, skipping", file=sys.stderr) + sys.exit(0) texts = [] with open(stream_file) as f: @@ -197,8 +201,8 @@ with open(stream_file) as f: strategies = [] current = None for text in texts: - # Detect strategy headers - strat_match = re.search(r"##\s*Strategy\s*\d+[^:]*:\s*(.+)", text) + # Detect strategy headers: matches "## Strategy 1: name" and "STRATEGY 1: name" + strat_match = re.search(r"(?:##\s*)?[Ss][Tt][Rr][Aa][Tt][Ee][Gg][Yy]\s*\d+[^:]*:\s*(.+)", text) if strat_match: if current: strategies.append(current) @@ -210,10 +214,10 @@ for text in texts: } if current: - # Capture floor readings - floor_match = re.search(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE) - if floor_match: - current["floor_after"] = int(floor_match.group(1)) + # Capture floor readings — take the last match in the block (most recent value) + floor_matches = list(re.finditer(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE)) + if floor_matches: + current["floor_after"] = int(floor_matches[-1].group(1)) # Capture insights for pattern in [r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)"]: @@ -233,8 +237,8 @@ if current: ts = datetime.now(timezone.utc).isoformat() with open(memory_file, "a") as f: for s in strategies: - fa = s.get("floor_after") or floor_before - delta_bps = (fa - floor_before) * 10000 // floor_before if floor_before else 0 + fa = s["floor_after"] if s.get("floor_after") is not None else floor_before + delta_bps = round((fa - floor_before) * 10000 / floor_before) if floor_before else 0 if fa < floor_before: result = "DECREASED" elif fa > floor_before: @@ -264,14 +268,8 @@ with open(memory_file) as f: all_entries = [json.loads(l) for l in f if l.strip()] if len(all_entries) > 50: - decreased = [e for e in all_entries if e.get("result") == "DECREASED"] - recent = all_entries[-10:] - kept = {id(e): e for e in decreased + recent}.values() - # Preserve insertion order: filter all_entries keeping only kept ids - kept_set = set(id(e) for e in kept) - # Rebuild from original list preserving order + # Keep all DECREASED entries + 10 most recent; deduplicate preserving order trimmed = [e for e in all_entries if e.get("result") == "DECREASED"] + all_entries[-10:] - # Deduplicate preserving order seen = set() deduped = [] for e in trimmed: @@ -296,10 +294,10 @@ log " floor_before = $FLOOR_BEFORE wei/token" # Build Previous Findings section from memory file MEMORY_SECTION="" if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then - MEMORY_SECTION=$(python3 -c " + MEMORY_SECTION=$(python3 - "$MEMORY_FILE" <<'PYEOF' import json, sys entries = [] -with open('$MEMORY_FILE') as f: +with open(sys.argv[1]) as f: for line in f: line = line.strip() if line: @@ -312,15 +310,16 @@ print('DO NOT repeat strategies marked HELD or INCREASED. Build on the insights. print('Try NEW combinations not yet attempted. Combine tools creatively.') print() for e in entries: - r = e.get('result','?') + r = e.get('result', '?') emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️' - print(f\"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}\") - print(f\"Steps: {e.get('steps','?')}\") - print(f\"Delta: {e.get('delta_bps',0)} bps\") + print(f"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}") + print(f"Steps: {e.get('steps','?')}") + print(f"Delta: {e.get('delta_bps',0)} bps") if e.get('insight'): - print(f\"**Insight:** {e['insight']}\") + print(f"**Insight:** {e['insight']}") print() -") +PYEOF +) fi PROMPT=$(cat <"$STREAM_LOG" 2>&1 @@ -599,6 +600,12 @@ with open(sys.argv[1]) as f: pass PYEOF +# If the agent crashed and produced no readable output, treat as an infra error +# rather than silently reporting FLOOR HELD (a false pass). +if [[ $AGENT_EXIT -ne 0 && ! -s "$REPORT" ]]; then + die "claude agent failed (exit $AGENT_EXIT) with no readable output — see $STREAM_LOG" +fi + # ── 8. Read floor_after ──────────────────────────────────────────────────────── log "Reading floor after agent run..." FLOOR_AFTER=$(compute_eth_per_token)