harb/scripts/harb-evaluator/promote-attacks.sh
openhands 5fa08f1a53 fix: address promote-attacks review feedback (#974)
- cleanup_worktree: add `git branch -D $BRANCH` to prevent stale local
  branch refs accumulating on push failure (bug fix)
- .netrc parser: replace fragile line-count awk with field-iteration
  approach that handles both multi-line and single-line .netrc formats
- ETH formatting: pass values as argv to python3 instead of interpolating
  into the code string, removing the injection surface
- mktemp -u: generate path without pre-creating directory; git worktree
  add creates it, avoiding the "already exists" error on some git versions
- mkdir -p guard before cp to attacks destination directory
- sed portability: `s/-\+/-/g` → `s/--*/-/g` (POSIX-compliant)
- red-team.sh: capture PIPESTATUS[0] from promote-attacks pipe and emit
  a distinct warning log line when promotion fails

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-19 07:48:24 +00:00

327 lines
11 KiB
Bash
Executable file

#!/usr/bin/env bash
# promote-attacks.sh — Promote red-team attack vectors to onchain/script/backtesting/attacks/ via PR.
#
# After a red-team run that extracted ETH, this script:
# 1. Reads the discovered attack JSONL
# 2. Deduplicates against existing files (by op-type fingerprint)
# 3. Classifies the attack type from the op sequence
# 4. Creates a git branch, commits the file, pushes, and opens a Codeberg PR
#
# Usage:
# promote-attacks.sh [OPTIONS]
#
# Options:
# --attacks FILE Path to attack JSONL (default: <repo>/tmp/red-team-attacks.jsonl)
# --candidate NAME Optimizer candidate name (default: $CANDIDATE_NAME or "unknown")
# --profile PROFILE Optimizer profile string (default: $OPTIMIZER_PROFILE or "unknown")
# --eth-extracted DELTA ETH extracted in wei (default: 0)
# --eth-before AMOUNT LM ETH before attack in wei (default: 0)
#
# Env (all optional):
# CODEBERG_TOKEN Codeberg API token. If absent, ~/.netrc is tried.
# If neither is present, PR creation is skipped with exit 0.
# CANDIDATE_NAME Fallback for --candidate
# OPTIMIZER_PROFILE Fallback for --profile
#
# Exit codes:
# 0 PR created, or gracefully skipped (no novel attacks, no token, etc.)
# 1 Hard failure (git or API error)
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
ATTACKS_DIR="$REPO_ROOT/onchain/script/backtesting/attacks"
CODEBERG_REPO="johba/harb"
CODEBERG_API="https://codeberg.org/api/v1"
log() { echo "[promote-attacks] $*"; }
warn() { echo "[promote-attacks] WARNING: $*" >&2; }
die() { echo "[promote-attacks] ERROR: $*" >&2; exit 1; }
# ── Parse arguments ──────────────────────────────────────────────────────────
ATTACKS_FILE="$REPO_ROOT/tmp/red-team-attacks.jsonl"
CANDIDATE="${CANDIDATE_NAME:-unknown}"
PROFILE="${OPTIMIZER_PROFILE:-unknown}"
ETH_EXTRACTED="0"
ETH_BEFORE="0"
while [[ $# -gt 0 ]]; do
case "$1" in
--attacks) ATTACKS_FILE="$2"; shift 2 ;;
--candidate) CANDIDATE="$2"; shift 2 ;;
--profile) PROFILE="$2"; shift 2 ;;
--eth-extracted) ETH_EXTRACTED="$2"; shift 2 ;;
--eth-before) ETH_BEFORE="$2"; shift 2 ;;
*) die "Unknown argument: $1" ;;
esac
done
# ── Guard: file must exist and be non-empty ──────────────────────────────────
if [[ ! -f "$ATTACKS_FILE" ]]; then
log "Attack file not found: $ATTACKS_FILE — nothing to promote"
exit 0
fi
if [[ ! -s "$ATTACKS_FILE" ]]; then
log "Attack file is empty — nothing to promote"
exit 0
fi
OP_COUNT=$(wc -l < "$ATTACKS_FILE")
log "Processing $OP_COUNT ops from $ATTACKS_FILE"
log " candidate : $CANDIDATE"
log " profile : $PROFILE"
log " extracted : $ETH_EXTRACTED wei"
# ── Resolve Codeberg API token ───────────────────────────────────────────────
API_TOKEN="${CODEBERG_TOKEN:-}"
if [[ -z "$API_TOKEN" ]] && [[ -f "${HOME:-/home/debian}/.netrc" ]]; then
API_TOKEN=$(awk '{for(i=1;i<=NF;i++){if($i=="machine")m=$(i+1); if($i=="password"&&m=="codeberg.org"){print $(i+1);exit}}}' \
"${HOME:-/home/debian}/.netrc" 2>/dev/null || true)
fi
if [[ -z "$API_TOKEN" ]]; then
warn "No Codeberg token found (set CODEBERG_TOKEN or configure ~/.netrc) — skipping PR"
exit 0
fi
# ── Classify attack type and deduplicate ─────────────────────────────────────
CLASSIFY_OUT=$(python3 - "$ATTACKS_FILE" "$ATTACKS_DIR" <<'PYEOF'
import json
import sys
from pathlib import Path
def load_ops(path):
ops = []
with open(path) as f:
for line in f:
line = line.strip()
if line:
try:
ops.append(json.loads(line))
except json.JSONDecodeError:
pass
return ops
def fingerprint(ops):
"""Ordered tuple of op types — used for deduplication (ignores amounts)."""
return tuple(op.get("op", "") for op in ops)
def classify(ops):
"""Classify attack type from the operation sequence."""
types = [op.get("op", "") for op in ops]
has_stake = "stake" in types
has_unstake = "unstake" in types
has_mint_lp = "mint_lp" in types
has_burn_lp = "burn_lp" in types
has_loop = "buy_recenter_loop" in types
buys = types.count("buy")
sells = types.count("sell")
recenters = types.count("recenter")
if has_stake and has_unstake:
return "staking"
if has_mint_lp or has_burn_lp:
return "lp-manipulation"
# Compound loop op — il-crystallization with large count
if has_loop:
return "il-crystallization"
# Oscillation: many buys and sells with frequent switching
if buys >= 3 and sells >= 3:
non_rc = [t for t in types if t not in ("recenter", "mine")]
alternations = sum(
1 for i in range(len(non_rc) - 1) if non_rc[i] != non_rc[i + 1]
)
if alternations >= 4:
return "fee-drain-oscillation"
# IL crystallisation: multiple buys then one final sell
if buys >= 3 and sells == 1:
return "il-crystallization"
# Floor ratchet: many recenters triggered by buys
if recenters >= 5 and buys >= 2:
return "floor-ratchet"
return "floor-attack"
attacks_file = sys.argv[1]
attacks_dir = sys.argv[2]
new_ops = load_ops(attacks_file)
if not new_ops:
print("EMPTY")
sys.exit(0)
new_fp = fingerprint(new_ops)
# Deduplication: compare op-type fingerprint against every existing file
existing_dir = Path(attacks_dir)
if existing_dir.is_dir():
for existing_file in sorted(existing_dir.glob("*.jsonl")):
try:
existing_ops = load_ops(existing_file)
if fingerprint(existing_ops) == new_fp:
print(f"DUPLICATE:{existing_file.name}")
sys.exit(0)
except Exception:
pass
print(f"NOVEL:{classify(new_ops)}")
PYEOF
)
log "Classifier: $CLASSIFY_OUT"
case "$CLASSIFY_OUT" in
EMPTY)
log "No ops found in attack file — skipping"
exit 0
;;
DUPLICATE:*)
log "Op sequence matches existing file: ${CLASSIFY_OUT#DUPLICATE:} — skipping"
exit 0
;;
NOVEL:*)
ATTACK_TYPE="${CLASSIFY_OUT#NOVEL:}"
;;
*)
warn "Unexpected classifier output: $CLASSIFY_OUT"
exit 0
;;
esac
log "Novel attack type: $ATTACK_TYPE"
# ── Determine destination filename ───────────────────────────────────────────
# Slug: lowercase, alphanumeric + hyphens, max 30 chars
CANDIDATE_SLUG=$(printf '%s' "$CANDIDATE" \
| tr '[:upper:]' '[:lower:]' \
| sed 's/[^a-z0-9-]/-/g' \
| sed 's/--*/-/g;s/^-//;s/-$//' \
| cut -c1-30)
BASE_NAME="${ATTACK_TYPE}-${CANDIDATE_SLUG}"
# Avoid collisions with existing files by appending -v2, -v3, ...
SUFFIX=""
V=2
while [[ -f "$ATTACKS_DIR/${BASE_NAME}${SUFFIX}.jsonl" ]]; do
SUFFIX="-v${V}"
(( V++ ))
done
BASE_NAME="${BASE_NAME}${SUFFIX}"
DEST_RELPATH="onchain/script/backtesting/attacks/${BASE_NAME}.jsonl"
log "Destination: $DEST_RELPATH"
# ── Format ETH values for human-readable output ──────────────────────────────
ETH_X=$(python3 -c 'import sys; print(f"{int(sys.argv[1]) / 1e18:.4f}")' \
"$ETH_EXTRACTED" 2>/dev/null || echo "$ETH_EXTRACTED wei")
ETH_B=$(python3 -c 'import sys; print(f"{int(sys.argv[1]) / 1e18:.4f}")' \
"$ETH_BEFORE" 2>/dev/null || echo "$ETH_BEFORE wei")
# ── Git: create branch + commit in a temporary worktree ──────────────────────
DATE_TAG=$(date -u +%Y%m%d-%H%M%S)
BRANCH="red-team/${ATTACK_TYPE}-${CANDIDATE_SLUG}-${DATE_TAG}"
# Use mktemp -u: generate a unique path without creating it, so git worktree add
# can create the directory itself (pre-existing directories cause git to error).
TMPWT=$(mktemp -u)
cleanup_worktree() {
local rc=$?
cd "$REPO_ROOT" 2>/dev/null || true
git worktree remove --force "$TMPWT" 2>/dev/null || true
git worktree prune --quiet 2>/dev/null || true
git -C "$REPO_ROOT" branch -D "$BRANCH" 2>/dev/null || true
rm -rf "$TMPWT" 2>/dev/null || true
exit $rc
}
trap cleanup_worktree EXIT
log "Fetching origin/master ..."
git -C "$REPO_ROOT" fetch origin master --quiet 2>/dev/null \
|| warn "git fetch failed — using local origin/master state"
log "Creating worktree branch: $BRANCH ..."
git -C "$REPO_ROOT" worktree add -b "$BRANCH" "$TMPWT" "origin/master" --quiet
# Copy attack file into the isolated worktree
mkdir -p "$(dirname "$TMPWT/$DEST_RELPATH")"
cp "$ATTACKS_FILE" "$TMPWT/$DEST_RELPATH"
cd "$TMPWT"
git add "$DEST_RELPATH"
git commit --quiet -m "$(cat <<EOF
red-team: add ${ATTACK_TYPE} attack vector (${CANDIDATE})
Attack type : ${ATTACK_TYPE}
Optimizer : ${CANDIDATE}
Profile : ${PROFILE}
ETH extracted : ${ETH_X} ETH
LM ETH before : ${ETH_B} ETH
Source file : ${DEST_RELPATH}
EOF
)"
log "Pushing branch: $BRANCH ..."
git push origin "$BRANCH" --quiet
cd "$REPO_ROOT"
# ── Codeberg: create PR ──────────────────────────────────────────────────────
PR_TITLE="red-team: ${ATTACK_TYPE} attack via ${CANDIDATE} (${ETH_X} ETH extracted)"
PR_BODY=$(cat <<EOF
## Red-team Attack Discovery
| Field | Value |
|-------|-------|
| Attack type | \`${ATTACK_TYPE}\` |
| Optimizer tested | \`${CANDIDATE}\` |
| Optimizer profile | \`${PROFILE}\` |
| ETH extracted | **${ETH_X} ETH** |
| LM ETH before | ${ETH_B} ETH |
| Attack file | \`${DEST_RELPATH}\` |
## What this means
This attack vector successfully extracted ETH from the LiquidityManager when tested
against the \`${CANDIDATE}\` optimizer. Adding it to the attack suite raises the fitness
bar for evolution — future optimizer candidates must survive this attack to pass.
## Review checklist
- [ ] Attack operations are valid (no malformed ops)
- [ ] Attack type classification (\`${ATTACK_TYPE}\`) is accurate
- [ ] Not a duplicate of an existing attack (deduplication checked by promote-attacks.sh)
- [ ] Appropriate to add as a permanent regression test
---
🤖 Auto-generated by \`scripts/harb-evaluator/promote-attacks.sh\`
EOF
)
log "Creating Codeberg PR ..."
PR_JSON=$(jq -n \
--arg title "$PR_TITLE" \
--arg body "$PR_BODY" \
--arg head "$BRANCH" \
--arg base "master" \
'{title: $title, body: $body, head: $head, base: $base}')
PR_RESPONSE=$(curl -sf \
-H "Authorization: token ${API_TOKEN}" \
-H "Content-Type: application/json" \
"${CODEBERG_API}/repos/${CODEBERG_REPO}/pulls" \
-d "$PR_JSON" 2>&1) || die "curl failed when creating PR"
PR_NUMBER=$(printf '%s' "$PR_RESPONSE" | jq -r '.number // empty' 2>/dev/null || true)
PR_URL=$(printf '%s' "$PR_RESPONSE" | jq -r '.html_url // empty' 2>/dev/null || true)
if [[ -n "$PR_NUMBER" && "$PR_NUMBER" != "null" ]]; then
log "PR #${PR_NUMBER} created: ${PR_URL}"
else
warn "PR creation returned unexpected response:"
printf '%s' "$PR_RESPONSE" | head -c 400 >&2
die "PR creation failed"
fi