fix: Red-team memory: persistent cross-run learning for adversarial agent (#528)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
28568dbcfd
commit
c1db4cb93e
1 changed files with 192 additions and 2 deletions
|
|
@ -23,6 +23,8 @@ CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}"
|
|||
REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
||||
REPORT_DIR="$REPO_ROOT/tmp"
|
||||
REPORT="$REPORT_DIR/red-team-report.txt"
|
||||
STREAM_LOG="$REPORT_DIR/red-team-stream.jsonl"
|
||||
MEMORY_FILE="$REPORT_DIR/red-team-memory.jsonl"
|
||||
DEPLOYMENTS="$REPO_ROOT/onchain/deployments-local.json"
|
||||
|
||||
# ── Anvil accounts ─────────────────────────────────────────────────────────────
|
||||
|
|
@ -154,12 +156,173 @@ print(0 if adj <= 0 else (e + w) * 10**18 // adj)
|
|||
PYEOF
|
||||
}
|
||||
|
||||
# ── Helper: extract strategy findings from stream-json and append to memory ────
|
||||
extract_memory() {
|
||||
local stream_file="$1"
|
||||
local run_num memory_file="$MEMORY_FILE"
|
||||
|
||||
# Determine run number (rough: count existing entries)
|
||||
if [[ -f "$memory_file" ]]; then
|
||||
run_num=$(wc -l < "$memory_file")
|
||||
run_num=$((run_num / 3 + 1))
|
||||
else
|
||||
run_num=1
|
||||
fi
|
||||
|
||||
python3 - "$stream_file" "$memory_file" "$run_num" "$FLOOR_BEFORE" <<'PYEOF'
|
||||
import json, sys, re
|
||||
from datetime import datetime, timezone
|
||||
|
||||
stream_file = sys.argv[1]
|
||||
memory_file = sys.argv[2]
|
||||
run_num = int(sys.argv[3])
|
||||
floor_before = int(sys.argv[4])
|
||||
|
||||
texts = []
|
||||
with open(stream_file) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
if obj.get("type") == "assistant":
|
||||
for block in obj.get("message", {}).get("content", []):
|
||||
if block.get("type") == "text":
|
||||
texts.append(block["text"])
|
||||
except:
|
||||
pass
|
||||
|
||||
# Parse strategies from agent text
|
||||
strategies = []
|
||||
current = None
|
||||
for text in texts:
|
||||
# Detect strategy headers
|
||||
strat_match = re.search(r"##\s*Strategy\s*\d+[^:]*:\s*(.+)", text)
|
||||
if strat_match:
|
||||
if current:
|
||||
strategies.append(current)
|
||||
current = {
|
||||
"strategy": strat_match.group(1).strip(),
|
||||
"steps": "",
|
||||
"floor_after": None,
|
||||
"insight": ""
|
||||
}
|
||||
|
||||
if current:
|
||||
# Capture floor readings
|
||||
floor_match = re.search(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE)
|
||||
if floor_match:
|
||||
current["floor_after"] = int(floor_match.group(1))
|
||||
|
||||
# Capture insights
|
||||
for pattern in [r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)"]:
|
||||
insight_match = re.search(pattern, text)
|
||||
if insight_match and len(insight_match.group(1)) > 20:
|
||||
current["insight"] = insight_match.group(1).strip()[:300]
|
||||
|
||||
# Capture step summaries
|
||||
if any(word in text.lower() for word in ["wrap", "buy", "sell", "stake", "recenter", "mint", "approve"]):
|
||||
if len(text) < 200:
|
||||
current["steps"] += text.strip() + "; "
|
||||
|
||||
if current:
|
||||
strategies.append(current)
|
||||
|
||||
# Write to memory file
|
||||
ts = datetime.now(timezone.utc).isoformat()
|
||||
with open(memory_file, "a") as f:
|
||||
for s in strategies:
|
||||
fa = s.get("floor_after") or floor_before
|
||||
delta_bps = (fa - floor_before) * 10000 // floor_before if floor_before else 0
|
||||
if fa < floor_before:
|
||||
result = "DECREASED"
|
||||
elif fa > floor_before:
|
||||
result = "INCREASED"
|
||||
else:
|
||||
result = "HELD"
|
||||
|
||||
entry = {
|
||||
"run": run_num,
|
||||
"ts": ts,
|
||||
"strategy": s["strategy"][:100],
|
||||
"steps": s["steps"][:300].rstrip("; "),
|
||||
"floor_before": floor_before,
|
||||
"floor_after": fa,
|
||||
"delta_bps": delta_bps,
|
||||
"result": result,
|
||||
"insight": s["insight"][:300]
|
||||
}
|
||||
f.write(json.dumps(entry) + "\n")
|
||||
print(f" Recorded: {entry['strategy']} → {result} ({delta_bps:+d} bps)")
|
||||
|
||||
if not strategies:
|
||||
print(" No strategies detected in stream output")
|
||||
|
||||
# Trim memory file: keep 10 most recent + all DECREASED entries (cap at 50)
|
||||
with open(memory_file) as f:
|
||||
all_entries = [json.loads(l) for l in f if l.strip()]
|
||||
|
||||
if len(all_entries) > 50:
|
||||
decreased = [e for e in all_entries if e.get("result") == "DECREASED"]
|
||||
recent = all_entries[-10:]
|
||||
kept = {id(e): e for e in decreased + recent}.values()
|
||||
# Preserve insertion order: filter all_entries keeping only kept ids
|
||||
kept_set = set(id(e) for e in kept)
|
||||
# Rebuild from original list preserving order
|
||||
trimmed = [e for e in all_entries if e.get("result") == "DECREASED"] + all_entries[-10:]
|
||||
# Deduplicate preserving order
|
||||
seen = set()
|
||||
deduped = []
|
||||
for e in trimmed:
|
||||
key = (e.get("run"), e.get("ts"), e.get("strategy"))
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
deduped.append(e)
|
||||
with open(memory_file, "w") as f:
|
||||
for e in deduped:
|
||||
f.write(json.dumps(e) + "\n")
|
||||
print(f" Trimmed memory to {len(deduped)} entries")
|
||||
PYEOF
|
||||
}
|
||||
|
||||
# ── 5. Read floor_before ───────────────────────────────────────────────────────
|
||||
log "Reading floor before agent run..."
|
||||
FLOOR_BEFORE=$(compute_eth_per_token)
|
||||
log " floor_before = $FLOOR_BEFORE wei/token"
|
||||
|
||||
# ── 6. Build agent prompt ──────────────────────────────────────────────────────
|
||||
|
||||
# Build Previous Findings section from memory file
|
||||
MEMORY_SECTION=""
|
||||
if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then
|
||||
MEMORY_SECTION=$(python3 -c "
|
||||
import json, sys
|
||||
entries = []
|
||||
with open('$MEMORY_FILE') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
entries.append(json.loads(line))
|
||||
if not entries:
|
||||
sys.exit(0)
|
||||
print('## Previous Findings (from earlier runs)')
|
||||
print()
|
||||
print('DO NOT repeat strategies marked HELD or INCREASED. Build on the insights.')
|
||||
print('Try NEW combinations not yet attempted. Combine tools creatively.')
|
||||
print()
|
||||
for e in entries:
|
||||
r = e.get('result','?')
|
||||
emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️'
|
||||
print(f\"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}\")
|
||||
print(f\"Steps: {e.get('steps','?')}\")
|
||||
print(f\"Delta: {e.get('delta_bps',0)} bps\")
|
||||
if e.get('insight'):
|
||||
print(f\"**Insight:** {e['insight']}\")
|
||||
print()
|
||||
")
|
||||
fi
|
||||
|
||||
PROMPT=$(cat <<PROMPT_EOF
|
||||
# Red-team mission: break the KRAIKEN protocol floor
|
||||
|
||||
|
|
@ -368,9 +531,13 @@ SNAP=\$(/home/debian/.foundry/bin/cast rpc anvil_snapshot --rpc-url http://local
|
|||
Remember: \`anvil_revert\` is one-shot. Take a new snapshot immediately after reverting.
|
||||
4. You may chain multiple actions in one strategy (e.g. large buy → recenter → large sell).
|
||||
5. Be methodical. Report every strategy tried even if it failed.
|
||||
6. If Previous Findings are provided, DO NOT repeat those strategies. Use their insights to design new approaches.
|
||||
7. Prioritize untried COMBINATIONS: staking + LP, staking + recenter timing, LP + multi-step swaps, etc.
|
||||
|
||||
---
|
||||
|
||||
${MEMORY_SECTION}
|
||||
|
||||
## Final report format
|
||||
|
||||
After trying all strategies, output a clearly structured report:
|
||||
|
|
@ -405,17 +572,40 @@ log " Report will be written to: $REPORT"
|
|||
|
||||
set +e
|
||||
timeout "$CLAUDE_TIMEOUT" claude -p --dangerously-skip-permissions \
|
||||
"$PROMPT" >"$REPORT" 2>&1
|
||||
--verbose --output-format stream-json \
|
||||
"$PROMPT" >"$STREAM_LOG" 2>&1
|
||||
AGENT_EXIT=$?
|
||||
set -e
|
||||
|
||||
if [[ $AGENT_EXIT -ne 0 ]]; then
|
||||
log "WARNING: claude exited with code $AGENT_EXIT — see $REPORT for details"
|
||||
log "WARNING: claude exited with code $AGENT_EXIT — see $STREAM_LOG for details"
|
||||
fi
|
||||
|
||||
# Extract readable text from stream-json for the report
|
||||
python3 - "$STREAM_LOG" >"$REPORT" <<'PYEOF'
|
||||
import json, sys
|
||||
with open(sys.argv[1]) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
if obj.get("type") == "assistant":
|
||||
for block in obj.get("message", {}).get("content", []):
|
||||
if block.get("type") == "text":
|
||||
print(block["text"], end="")
|
||||
except:
|
||||
pass
|
||||
PYEOF
|
||||
|
||||
# ── 8. Read floor_after ────────────────────────────────────────────────────────
|
||||
log "Reading floor after agent run..."
|
||||
FLOOR_AFTER=$(compute_eth_per_token)
|
||||
|
||||
# ── 8a. Extract and persist strategy findings ──────────────────────────────────
|
||||
log "Extracting strategy findings from agent output..."
|
||||
extract_memory "$STREAM_LOG"
|
||||
log " floor_after = $FLOOR_AFTER wei/token"
|
||||
|
||||
# ── 9. Summarise results ───────────────────────────────────────────────────────
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue