Merge pull request 'fix: Red-team memory: persistent cross-run learning for adversarial agent (#528)' (#529) from fix/issue-528 into master

This commit is contained in:
johba 2026-03-09 11:33:52 +01:00
commit 13d40222b6

View file

@ -23,6 +23,8 @@ CLAUDE_TIMEOUT="${CLAUDE_TIMEOUT:-7200}"
REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
REPORT_DIR="$REPO_ROOT/tmp"
REPORT="$REPORT_DIR/red-team-report.txt"
STREAM_LOG="$REPORT_DIR/red-team-stream.jsonl"
MEMORY_FILE="$REPORT_DIR/red-team-memory.jsonl"
DEPLOYMENTS="$REPO_ROOT/onchain/deployments-local.json"
# ── Anvil accounts ─────────────────────────────────────────────────────────────
@ -154,12 +156,172 @@ print(0 if adj <= 0 else (e + w) * 10**18 // adj)
PYEOF
}
# ── Helper: extract strategy findings from stream-json and append to memory ────
extract_memory() {
local stream_file="$1"
local run_num memory_file="$MEMORY_FILE"
# Determine run number: one entry per line in JSONL, so next run = line_count + 1
if [[ -f "$memory_file" ]]; then
run_num=$(wc -l < "$memory_file")
run_num=$((run_num + 1))
else
run_num=1
fi
python3 - "$stream_file" "$memory_file" "$run_num" "$FLOOR_BEFORE" <<'PYEOF'
import json, sys, re
from datetime import datetime, timezone
stream_file = sys.argv[1]
memory_file = sys.argv[2]
run_num = int(sys.argv[3])
try:
floor_before = int(sys.argv[4])
except (ValueError, IndexError):
print(" extract_memory: invalid floor_before value, skipping", file=sys.stderr)
sys.exit(0)
texts = []
with open(stream_file) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
if obj.get("type") == "assistant":
for block in obj.get("message", {}).get("content", []):
if block.get("type") == "text":
texts.append(block["text"])
except:
pass
# Parse strategies from agent text
strategies = []
current = None
for text in texts:
# Detect strategy headers: matches "## Strategy 1: name" and "STRATEGY 1: name"
strat_match = re.search(r"(?:##\s*)?[Ss][Tt][Rr][Aa][Tt][Ee][Gg][Yy]\s*\d+[^:]*:\s*(.+)", text)
if strat_match:
if current:
strategies.append(current)
current = {
"strategy": strat_match.group(1).strip(),
"steps": "",
"floor_after": None,
"insight": ""
}
if current:
# Capture floor readings — take the last match in the block (most recent value)
floor_matches = list(re.finditer(r"(?:floor|ethPerToken)[^\d]*?(\d{4,})\s*(?:wei)?", text, re.IGNORECASE))
if floor_matches:
current["floor_after"] = int(floor_matches[-1].group(1))
# Capture insights
for pattern in [r"[Kk]ey [Ii]nsight:\s*(.+)", r"[Ii]nsight:\s*(.+)", r"(?:discovered|learned|realized)\s+(?:that\s+)?(.+)"]:
insight_match = re.search(pattern, text)
if insight_match and len(insight_match.group(1)) > 20:
current["insight"] = insight_match.group(1).strip()[:300]
# Capture step summaries
if any(word in text.lower() for word in ["wrap", "buy", "sell", "stake", "recenter", "mint", "approve"]):
if len(text) < 200:
current["steps"] += text.strip() + "; "
if current:
strategies.append(current)
# Write to memory file
ts = datetime.now(timezone.utc).isoformat()
with open(memory_file, "a") as f:
for s in strategies:
fa = s["floor_after"] if s.get("floor_after") is not None else floor_before
delta_bps = round((fa - floor_before) * 10000 / floor_before) if floor_before else 0
if fa < floor_before:
result = "DECREASED"
elif fa > floor_before:
result = "INCREASED"
else:
result = "HELD"
entry = {
"run": run_num,
"ts": ts,
"strategy": s["strategy"][:100],
"steps": s["steps"][:300].rstrip("; "),
"floor_before": floor_before,
"floor_after": fa,
"delta_bps": delta_bps,
"result": result,
"insight": s["insight"][:300]
}
f.write(json.dumps(entry) + "\n")
print(f" Recorded: {entry['strategy']} → {result} ({delta_bps:+d} bps)")
if not strategies:
print(" No strategies detected in stream output")
# Trim memory file: keep 10 most recent + all DECREASED entries (cap at 50)
with open(memory_file) as f:
all_entries = [json.loads(l) for l in f if l.strip()]
if len(all_entries) > 50:
# Keep all DECREASED entries + 10 most recent; deduplicate preserving order
trimmed = [e for e in all_entries if e.get("result") == "DECREASED"] + all_entries[-10:]
seen = set()
deduped = []
for e in trimmed:
key = (e.get("run"), e.get("ts"), e.get("strategy"))
if key not in seen:
seen.add(key)
deduped.append(e)
with open(memory_file, "w") as f:
for e in deduped:
f.write(json.dumps(e) + "\n")
print(f" Trimmed memory to {len(deduped)} entries")
PYEOF
}
# ── 5. Read floor_before ───────────────────────────────────────────────────────
log "Reading floor before agent run..."
FLOOR_BEFORE=$(compute_eth_per_token)
log " floor_before = $FLOOR_BEFORE wei/token"
# ── 6. Build agent prompt ──────────────────────────────────────────────────────
# Build Previous Findings section from memory file
MEMORY_SECTION=""
if [[ -f "$MEMORY_FILE" && -s "$MEMORY_FILE" ]]; then
MEMORY_SECTION=$(python3 - "$MEMORY_FILE" <<'PYEOF'
import json, sys
entries = []
with open(sys.argv[1]) as f:
for line in f:
line = line.strip()
if line:
entries.append(json.loads(line))
if not entries:
sys.exit(0)
print('## Previous Findings (from earlier runs)')
print()
print('DO NOT repeat strategies marked HELD or INCREASED. Build on the insights.')
print('Try NEW combinations not yet attempted. Combine tools creatively.')
print()
for e in entries:
r = e.get('result', '?')
emoji = '❌' if r == 'DECREASED' else '⬆️' if r == 'INCREASED' else '➡️'
print(f"### Run {e.get('run','?')}: {e.get('strategy','?')} {emoji} {r}")
print(f"Steps: {e.get('steps','?')}")
print(f"Delta: {e.get('delta_bps',0)} bps")
if e.get('insight'):
print(f"**Insight:** {e['insight']}")
print()
PYEOF
)
fi
PROMPT=$(cat <<PROMPT_EOF
# Red-team mission: break the KRAIKEN protocol floor
@ -368,9 +530,13 @@ SNAP=\$(/home/debian/.foundry/bin/cast rpc anvil_snapshot --rpc-url http://local
Remember: \`anvil_revert\` is one-shot. Take a new snapshot immediately after reverting.
4. You may chain multiple actions in one strategy (e.g. large buy → recenter → large sell).
5. Be methodical. Report every strategy tried even if it failed.
6. If Previous Findings are provided, DO NOT repeat those strategies. Use their insights to design new approaches.
7. Prioritize untried COMBINATIONS: staking + LP, staking + recenter timing, LP + multi-step swaps, etc.
---
${MEMORY_SECTION}
## Final report format
After trying all strategies, output a clearly structured report:
@ -404,18 +570,49 @@ log "Spawning Claude red-team agent (timeout: ${CLAUDE_TIMEOUT}s)..."
log " Report will be written to: $REPORT"
set +e
# Note: --verbose is required by the claude CLI when --output-format stream-json is used;
# omitting it causes the CLI to exit with an error, producing an empty stream log.
timeout "$CLAUDE_TIMEOUT" claude -p --dangerously-skip-permissions \
"$PROMPT" >"$REPORT" 2>&1
--verbose --output-format stream-json \
"$PROMPT" >"$STREAM_LOG" 2>&1
AGENT_EXIT=$?
set -e
if [[ $AGENT_EXIT -ne 0 ]]; then
log "WARNING: claude exited with code $AGENT_EXIT — see $REPORT for details"
log "WARNING: claude exited with code $AGENT_EXIT — see $STREAM_LOG for details"
fi
# Extract readable text from stream-json for the report
python3 - "$STREAM_LOG" >"$REPORT" <<'PYEOF'
import json, sys
with open(sys.argv[1]) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
if obj.get("type") == "assistant":
for block in obj.get("message", {}).get("content", []):
if block.get("type") == "text":
print(block["text"], end="")
except:
pass
PYEOF
# If the agent crashed and produced no readable output, treat as an infra error
# rather than silently reporting FLOOR HELD (a false pass).
if [[ $AGENT_EXIT -ne 0 && ! -s "$REPORT" ]]; then
die "claude agent failed (exit $AGENT_EXIT) with no readable output — see $STREAM_LOG"
fi
# ── 8. Read floor_after ────────────────────────────────────────────────────────
log "Reading floor after agent run..."
FLOOR_AFTER=$(compute_eth_per_token)
# ── 8a. Extract and persist strategy findings ──────────────────────────────────
log "Extracting strategy findings from agent output..."
extract_memory "$STREAM_LOG"
log " floor_after = $FLOOR_AFTER wei/token"
# ── 9. Summarise results ───────────────────────────────────────────────────────