fix: address review findings in red-team memory (#528)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
c1db4cb93e
commit
816b211c2b
1 changed files with 33 additions and 26 deletions
|
|
@ -161,10 +161,10 @@ extract_memory() {
|
|||
local stream_file="$1"
|
||||
local run_num memory_file="$MEMORY_FILE"
|
||||
|
||||
# Determine run number (rough: count existing entries)
|
||||
# Determine run number: one entry per line in JSONL, so next run = line_count + 1
|
||||
if [[ -f "$memory_file" ]]; then
|
||||
run_num=$(wc -l < "$memory_file")
|
||||
run_num=$((run_num / 3 + 1))
|
||||
run_num=$((run_num + 1))
|
||||
else
|
||||
run_num=1
|
||||
fi
|
||||
|
|
@ -176,7 +176,11 @@ from datetime import datetime, timezone
|
|||
stream_file = sys.argv[1]
|
||||
memory_file = sys.argv[2]
|
||||
run_num = int(sys.argv[3])
|
||||
floor_before = int(sys.argv[4])
|
||||
try:
|
||||
floor_before = int(sys.argv[4])
|
||||
except (ValueError, IndexError):
|
||||
print(" extract_memory: invalid floor_before value, skipping", file=sys.stderr)
|
||||
sys.exit(0)
|
||||
|
||||
texts = []
|
||||
with open(stream_file) as f:
|
||||
|
|
@ -197,8 +201,8 @@ with open(stream_file) as f:
|
|||
strategies = []
|
||||
current = None
|
||||
for text in texts:
|
||||
# Detect strategy headers
|
||||
strat_match = re.search(r"##\s*Strategy\s*\d+[^:]*:\s*(.+)", text)
|
||||
# Detect strategy headers: matches "## Strategy 1: name" and "STRATEGY 1: name"
|
||||
strat_match = re.search(r"(?:##\s*)?[Ss][Tt][Rr][Aa][Tt][Ee][Gg][Yy]\s*\d+[^:]*:\s*(.+)", text)
|
||||
if strat_match:
|
||||
if current:
|
||||
strategies.append(current)
|
||||
|
|
@ -210,10 +214,10 @@ for text in texts:
|
|||
}
|
||||
|
||||
if current:
|
||||
# Capture floor readings
|
||||
floor_match = re.search(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE)
|
||||
if floor_match:
|
||||
current["floor_after"] = int(floor_match.group(1))
|
||||
# Capture floor readings — take the last match in the block (most recent value)
|
||||
floor_matches = list(re.finditer(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE))
|
||||
if floor_matches:
|
||||
current["floor_after"] = int(floor_matches[-1].group(1))
|
||||
|
||||
# Capture insights
|
||||
for pattern in [r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)"]:
|
||||
|
|
@ -233,8 +237,8 @@ if current:
|
|||
ts = datetime.now(timezone.utc).isoformat()
|
||||
with open(memory_file, "a") as f:
|
||||
for s in strategies:
|
||||
fa = s.get("floor_after") or floor_before
|
||||
delta_bps = (fa - floor_before) * 10000 // floor_before if floor_before else 0
|
||||
fa = s["floor_after"] if s.get("floor_after") is not None else floor_before
|
||||
delta_bps = round((fa - floor_before) * 10000 / floor_before) if floor_before else 0
|
||||
if fa < floor_before:
|
||||
result = "DECREASED"
|
||||
elif fa > floor_before:
|
||||
|
|
@ -264,14 +268,8 @@ with open(memory_file) as f:
|
|||
all_entries = [json.loads(l) for l in f if l.strip()]
|
||||
|
||||
if len(all_entries) > 50:
|
||||
decreased = [e for e in all_entries if e.get("result") == "DECREASED"]
|
||||
recent = all_entries[-10:]
|
||||
kept = {id(e): e for e in decreased + recent}.values()
|
||||
# Preserve insertion order: filter all_entries keeping only kept ids
|
||||
kept_set = set(id(e) for e in kept)
|
||||
# Rebuild from original list preserving order
|
||||
# Keep all DECREASED entries + 10 most recent; deduplicate preserving order
|
||||
trimmed = [e for e in all_entries if e.get("result") == "DECREASED"] + all_entries[-10:]
|
||||
# Deduplicate preserving order
|
||||
seen = set()
|
||||
deduped = []
|
||||
for e in trimmed:
|
||||
|
|
@ -296,10 +294,10 @@ log " floor_before = $FLOOR_BEFORE wei/token"
|
|||
# Build Previous Findings section from memory file
|
||||
MEMORY_SECTION=""
|
||||
if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then
|
||||
MEMORY_SECTION=$(python3 -c "
|
||||
MEMORY_SECTION=$(python3 - "$MEMORY_FILE" <<'PYEOF'
|
||||
import json, sys
|
||||
entries = []
|
||||
with open('$MEMORY_FILE') as f:
|
||||
with open(sys.argv[1]) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
|
|
@ -312,15 +310,16 @@ print('DO NOT repeat strategies marked HELD or INCREASED. Build on the insights.
|
|||
print('Try NEW combinations not yet attempted. Combine tools creatively.')
|
||||
print()
|
||||
for e in entries:
|
||||
r = e.get('result','?')
|
||||
r = e.get('result', '?')
|
||||
emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️'
|
||||
print(f\"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}\")
|
||||
print(f\"Steps: {e.get('steps','?')}\")
|
||||
print(f\"Delta: {e.get('delta_bps',0)} bps\")
|
||||
print(f"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}")
|
||||
print(f"Steps: {e.get('steps','?')}")
|
||||
print(f"Delta: {e.get('delta_bps',0)} bps")
|
||||
if e.get('insight'):
|
||||
print(f\"**Insight:** {e['insight']}\")
|
||||
print(f"**Insight:** {e['insight']}")
|
||||
print()
|
||||
")
|
||||
PYEOF
|
||||
)
|
||||
fi
|
||||
|
||||
PROMPT=$(cat <<PROMPT_EOF
|
||||
|
|
@ -571,6 +570,8 @@ log "Spawning Claude red-team agent (timeout: ${CLAUDE_TIMEOUT}s)..."
|
|||
log " Report will be written to: $REPORT"
|
||||
|
||||
set +e
|
||||
# Note: --verbose is required by the claude CLI when --output-format stream-json is used;
|
||||
# omitting it causes the CLI to exit with an error, producing an empty stream log.
|
||||
timeout "$CLAUDE_TIMEOUT" claude -p --dangerously-skip-permissions \
|
||||
--verbose --output-format stream-json \
|
||||
"$PROMPT" >"$STREAM_LOG" 2>&1
|
||||
|
|
@ -599,6 +600,12 @@ with open(sys.argv[1]) as f:
|
|||
pass
|
||||
PYEOF
|
||||
|
||||
# If the agent crashed and produced no readable output, treat as an infra error
|
||||
# rather than silently reporting FLOOR HELD (a false pass).
|
||||
if [[ $AGENT_EXIT -ne 0 && ! -s "$REPORT" ]]; then
|
||||
die "claude agent failed (exit $AGENT_EXIT) with no readable output — see $STREAM_LOG"
|
||||
fi
|
||||
|
||||
# ── 8. Read floor_after ────────────────────────────────────────────────────────
|
||||
log "Reading floor after agent run..."
|
||||
FLOOR_AFTER=$(compute_eth_per_token)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue