fix: address review findings in red-team memory (#528)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-09 10:00:56 +00:00
parent c1db4cb93e
commit 816b211c2b

View file

@ -161,10 +161,10 @@ extract_memory() {
local stream_file="$1"
local run_num memory_file="$MEMORY_FILE"
# Determine run number (rough: count existing entries)
# Determine run number: one entry per line in JSONL, so next run = line_count + 1
if [[ -f "$memory_file" ]]; then
run_num=$(wc -l < "$memory_file")
run_num=$((run_num / 3 + 1))
run_num=$((run_num + 1))
else
run_num=1
fi
@ -176,7 +176,11 @@ from datetime import datetime, timezone
stream_file = sys.argv[1]
memory_file = sys.argv[2]
run_num = int(sys.argv[3])
floor_before = int(sys.argv[4])
try:
floor_before = int(sys.argv[4])
except (ValueError, IndexError):
print(" extract_memory: invalid floor_before value, skipping", file=sys.stderr)
sys.exit(0)
texts = []
with open(stream_file) as f:
@ -197,8 +201,8 @@ with open(stream_file) as f:
strategies = []
current = None
for text in texts:
# Detect strategy headers
strat_match = re.search(r"##\s*Strategy\s*\d+[^:]*:\s*(.+)", text)
# Detect strategy headers: matches "## Strategy 1: name" and "STRATEGY 1: name"
strat_match = re.search(r"(?:##\s*)?[Ss][Tt][Rr][Aa][Tt][Ee][Gg][Yy]\s*\d+[^:]*:\s*(.+)", text)
if strat_match:
if current:
strategies.append(current)
@ -210,10 +214,10 @@ for text in texts:
}
if current:
# Capture floor readings
floor_match = re.search(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE)
if floor_match:
current["floor_after"] = int(floor_match.group(1))
# Capture floor readings — take the last match in the block (most recent value)
floor_matches = list(re.finditer(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE))
if floor_matches:
current["floor_after"] = int(floor_matches[-1].group(1))
# Capture insights
for pattern in [r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)"]:
@ -233,8 +237,8 @@ if current:
ts = datetime.now(timezone.utc).isoformat()
with open(memory_file, "a") as f:
for s in strategies:
fa = s.get("floor_after") or floor_before
delta_bps = (fa - floor_before) * 10000 // floor_before if floor_before else 0
fa = s["floor_after"] if s.get("floor_after") is not None else floor_before
delta_bps = round((fa - floor_before) * 10000 / floor_before) if floor_before else 0
if fa < floor_before:
result = "DECREASED"
elif fa > floor_before:
@ -264,14 +268,8 @@ with open(memory_file) as f:
all_entries = [json.loads(l) for l in f if l.strip()]
if len(all_entries) > 50:
decreased = [e for e in all_entries if e.get("result") == "DECREASED"]
recent = all_entries[-10:]
kept = {id(e): e for e in decreased + recent}.values()
# Preserve insertion order: filter all_entries keeping only kept ids
kept_set = set(id(e) for e in kept)
# Rebuild from original list preserving order
# Keep all DECREASED entries + 10 most recent; deduplicate preserving order
trimmed = [e for e in all_entries if e.get("result") == "DECREASED"] + all_entries[-10:]
# Deduplicate preserving order
seen = set()
deduped = []
for e in trimmed:
@ -296,10 +294,10 @@ log " floor_before = $FLOOR_BEFORE wei/token"
# Build Previous Findings section from memory file
MEMORY_SECTION=""
if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then
MEMORY_SECTION=$(python3 -c "
MEMORY_SECTION=$(python3 - "$MEMORY_FILE" <<'PYEOF'
import json, sys
entries = []
with open('$MEMORY_FILE') as f:
with open(sys.argv[1]) as f:
for line in f:
line = line.strip()
if line:
@ -312,15 +310,16 @@ print('DO NOT repeat strategies marked HELD or INCREASED. Build on the insights.
print('Try NEW combinations not yet attempted. Combine tools creatively.')
print()
for e in entries:
r = e.get('result','?')
r = e.get('result', '?')
emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️'
print(f\"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}\")
print(f\"Steps: {e.get('steps','?')}\")
print(f\"Delta: {e.get('delta_bps',0)} bps\")
print(f"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}")
print(f"Steps: {e.get('steps','?')}")
print(f"Delta: {e.get('delta_bps',0)} bps")
if e.get('insight'):
print(f\"**Insight:** {e['insight']}\")
print(f"**Insight:** {e['insight']}")
print()
")
PYEOF
)
fi
PROMPT=$(cat <<PROMPT_EOF
@ -571,6 +570,8 @@ log "Spawning Claude red-team agent (timeout: ${CLAUDE_TIMEOUT}s)..."
log " Report will be written to: $REPORT"
set +e
# Note: --verbose is required by the claude CLI when --output-format stream-json is used;
# omitting it causes the CLI to exit with an error, producing an empty stream log.
timeout "$CLAUDE_TIMEOUT" claude -p --dangerously-skip-permissions \
--verbose --output-format stream-json \
"$PROMPT" >"$STREAM_LOG" 2>&1
@ -599,6 +600,12 @@ with open(sys.argv[1]) as f:
pass
PYEOF
# If the agent crashed and produced no readable output, treat as an infra error
# rather than silently reporting FLOOR HELD (a false pass).
if [[ $AGENT_EXIT -ne 0 && ! -s "$REPORT" ]]; then
die "claude agent failed (exit $AGENT_EXIT) with no readable output — see $STREAM_LOG"
fi
# ── 8. Read floor_after ────────────────────────────────────────────────────────
log "Reading floor after agent run..."
FLOOR_AFTER=$(compute_eth_per_token)