fix: address review findings in red-team memory (#528)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-09 10:00:56 +00:00 · 2026-03-09 10:00:56 +00:00 · 816b211c2b
commit 816b211c2b
parent c1db4cb93e
1 changed files with 33 additions and 26 deletions
--- a/scripts/harb-evaluator/red-team.sh
+++ b/scripts/harb-evaluator/red-team.sh
@ -161,10 +161,10 @@ extract_memory() {
  local stream_file="$1"
  local run_num memory_file="$MEMORY_FILE"

-  # Determine run number (rough: count existing entries)
+  # Determine run number: one entry per line in JSONL, so next run = line_count + 1
  if [[ -f "$memory_file" ]]; then
    run_num=$(wc -l < "$memory_file")
-    run_num=$((run_num / 3 + 1))
+    run_num=$((run_num + 1))
  else
    run_num=1
  fi
@ -176,7 +176,11 @@ from datetime import datetime, timezone
 stream_file = sys.argv[1]
 memory_file = sys.argv[2]
 run_num = int(sys.argv[3])
-floor_before = int(sys.argv[4])
+try:
+    floor_before = int(sys.argv[4])
+except (ValueError, IndexError):
+    print("  extract_memory: invalid floor_before value, skipping", file=sys.stderr)
+    sys.exit(0)

 texts = []
 with open(stream_file) as f:
@ -197,8 +201,8 @@ with open(stream_file) as f:
 strategies = []
 current = None
 for text in texts:
-    # Detect strategy headers
-    strat_match = re.search(r"##\s*Strategy\s*\d+[^:]*:\s*(.+)", text)
+    # Detect strategy headers: matches "## Strategy 1: name" and "STRATEGY 1: name"
+    strat_match = re.search(r"(?:##\s*)?[Ss][Tt][Rr][Aa][Tt][Ee][Gg][Yy]\s*\d+[^:]*:\s*(.+)", text)
    if strat_match:
        if current:
            strategies.append(current)
@ -210,10 +214,10 @@ for text in texts:
        }

    if current:
-        # Capture floor readings
-        floor_match = re.search(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE)
-        if floor_match:
-            current["floor_after"] = int(floor_match.group(1))
+        # Capture floor readings — take the last match in the block (most recent value)
+        floor_matches = list(re.finditer(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE))
+        if floor_matches:
+            current["floor_after"] = int(floor_matches[-1].group(1))

        # Capture insights
        for pattern in [r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)"]:
@ -233,8 +237,8 @@ if current:
 ts = datetime.now(timezone.utc).isoformat()
 with open(memory_file, "a") as f:
    for s in strategies:
-        fa = s.get("floor_after") or floor_before
-        delta_bps = (fa - floor_before) * 10000 // floor_before if floor_before else 0
+        fa = s["floor_after"] if s.get("floor_after") is not None else floor_before
+        delta_bps = round((fa - floor_before) * 10000 / floor_before) if floor_before else 0
        if fa < floor_before:
            result = "DECREASED"
        elif fa > floor_before:
@ -264,14 +268,8 @@ with open(memory_file) as f:
    all_entries = [json.loads(l) for l in f if l.strip()]

 if len(all_entries) > 50:
-    decreased = [e for e in all_entries if e.get("result") == "DECREASED"]
-    recent = all_entries[-10:]
-    kept = {id(e): e for e in decreased + recent}.values()
-    # Preserve insertion order: filter all_entries keeping only kept ids
-    kept_set = set(id(e) for e in kept)
-    # Rebuild from original list preserving order
+    # Keep all DECREASED entries + 10 most recent; deduplicate preserving order
    trimmed = [e for e in all_entries if e.get("result") == "DECREASED"] + all_entries[-10:]
-    # Deduplicate preserving order
    seen = set()
    deduped = []
    for e in trimmed:
@ -296,10 +294,10 @@ log "  floor_before = $FLOOR_BEFORE wei/token"
 # Build Previous Findings section from memory file
 MEMORY_SECTION=""
 if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then
-  MEMORY_SECTION=$(python3 -c "
+  MEMORY_SECTION=$(python3 - "$MEMORY_FILE" <<'PYEOF'
 import json, sys
 entries = []
-with open('$MEMORY_FILE') as f:
+with open(sys.argv[1]) as f:
    for line in f:
        line = line.strip()
        if line:
@ -312,15 +310,16 @@ print('DO NOT repeat strategies marked HELD or INCREASED. Build on the insights.
 print('Try NEW combinations not yet attempted. Combine tools creatively.')
 print()
 for e in entries:
-    r = e.get('result','?')
+    r = e.get('result', '?')
    emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️'
-    print(f\"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}\")
-    print(f\"Steps: {e.get('steps','?')}\")
-    print(f\"Delta: {e.get('delta_bps',0)} bps\")
+    print(f"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}")
+    print(f"Steps: {e.get('steps','?')}")
+    print(f"Delta: {e.get('delta_bps',0)} bps")
    if e.get('insight'):
-        print(f\"**Insight:** {e['insight']}\")
+        print(f"**Insight:** {e['insight']}")
    print()
-")
+PYEOF
+)
 fi

 PROMPT=$(cat <<PROMPT_EOF
@ -571,6 +570,8 @@ log "Spawning Claude red-team agent (timeout: ${CLAUDE_TIMEOUT}s)..."
 log "  Report will be written to: $REPORT"

 set +e
+# Note: --verbose is required by the claude CLI when --output-format stream-json is used;
+# omitting it causes the CLI to exit with an error, producing an empty stream log.
 timeout "$CLAUDE_TIMEOUT" claude -p --dangerously-skip-permissions \
  --verbose --output-format stream-json \
  "$PROMPT" >"$STREAM_LOG" 2>&1
@ -599,6 +600,12 @@ with open(sys.argv[1]) as f:
            pass
 PYEOF

+# If the agent crashed and produced no readable output, treat as an infra error
+# rather than silently reporting FLOOR HELD (a false pass).
+if [[ $AGENT_EXIT -ne 0 && ! -s "$REPORT" ]]; then
+  die "claude agent failed (exit $AGENT_EXIT) with no readable output — see $STREAM_LOG"
+fi
+
 # ── 8. Read floor_after ────────────────────────────────────────────────────────
 log "Reading floor after agent run..."
 FLOOR_AFTER=$(compute_eth_per_token)