harb/scripts/harb-evaluator/promote-attacks.sh

#!/usr/bin/env bash
# promote-attacks.sh — Promote red-team attack vectors to onchain/script/backtesting/attacks/ via PR.
#
# After a red-team run that extracted ETH, this script:
#   1. Reads the discovered attack JSONL
#   2. Deduplicates against existing files (by op-type fingerprint)
#   3. Classifies the attack type from the op sequence
#   4. Creates a git branch, commits the file, pushes, and opens a Codeberg PR
#
# Usage:
#   promote-attacks.sh [OPTIONS]
#
# Options:
#   --attacks FILE        Path to attack JSONL (default: <repo>/tmp/red-team-attacks.jsonl)
#   --candidate NAME      Optimizer candidate name (default: $CANDIDATE_NAME or "unknown")
#   --profile PROFILE     Optimizer profile string (default: $OPTIMIZER_PROFILE or "unknown")
#   --eth-extracted DELTA ETH extracted in wei (default: 0)
#   --eth-before AMOUNT   LM ETH before attack in wei (default: 0)
#
# Env (all optional):
#   CODEBERG_TOKEN      Codeberg API token. If absent, ~/.netrc is tried.
#                       If neither is present, PR creation is skipped with exit 0.
#   CANDIDATE_NAME      Fallback for --candidate
#   OPTIMIZER_PROFILE   Fallback for --profile
#
# Exit codes:
#   0   PR created, or gracefully skipped (no novel attacks, no token, etc.)
#   1   Hard failure (git or API error)

set -euo pipefail

REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
ATTACKS_DIR="$REPO_ROOT/onchain/script/backtesting/attacks"
CODEBERG_REPO="johba/harb"
CODEBERG_API="https://codeberg.org/api/v1"

log()  { echo "[promote-attacks] $*"; }
warn() { echo "[promote-attacks] WARNING: $*" >&2; }
die()  { echo "[promote-attacks] ERROR: $*" >&2; exit 1; }

# ── Parse arguments ──────────────────────────────────────────────────────────
ATTACKS_FILE="$REPO_ROOT/tmp/red-team-attacks.jsonl"
CANDIDATE="${CANDIDATE_NAME:-unknown}"
PROFILE="${OPTIMIZER_PROFILE:-unknown}"
ETH_EXTRACTED="0"
ETH_BEFORE="0"

while [[ $# -gt 0 ]]; do
  case "$1" in
    --attacks)       ATTACKS_FILE="$2";  shift 2 ;;
    --candidate)     CANDIDATE="$2";     shift 2 ;;
    --profile)       PROFILE="$2";       shift 2 ;;
    --eth-extracted) ETH_EXTRACTED="$2"; shift 2 ;;
    --eth-before)    ETH_BEFORE="$2";    shift 2 ;;
    *) die "Unknown argument: $1" ;;
  esac
done

# ── Guard: file must exist and be non-empty ──────────────────────────────────
if [[ ! -f "$ATTACKS_FILE" ]]; then
  log "Attack file not found: $ATTACKS_FILE — nothing to promote"
  exit 0
fi
if [[ ! -s "$ATTACKS_FILE" ]]; then
  log "Attack file is empty — nothing to promote"
  exit 0
fi

OP_COUNT=$(wc -l < "$ATTACKS_FILE")
log "Processing $OP_COUNT ops from $ATTACKS_FILE"
log "  candidate : $CANDIDATE"
log "  profile   : $PROFILE"
log "  extracted : $ETH_EXTRACTED wei"

# ── Resolve Codeberg API token ───────────────────────────────────────────────
API_TOKEN="${CODEBERG_TOKEN:-}"
if [[ -z "$API_TOKEN" ]] && [[ -f "${HOME:-/home/debian}/.netrc" ]]; then
  API_TOKEN=$(awk '{for(i=1;i<=NF;i++){if($i=="machine")m=$(i+1); if($i=="password"&&m=="codeberg.org"){print $(i+1);exit}}}' \
    "${HOME:-/home/debian}/.netrc" 2>/dev/null || true)
fi
if [[ -z "$API_TOKEN" ]]; then
  warn "No Codeberg token found (set CODEBERG_TOKEN or configure ~/.netrc) — skipping PR"
  exit 0
fi

# ── Classify attack type and deduplicate ─────────────────────────────────────
CLASSIFY_OUT=$(python3 - "$ATTACKS_FILE" "$ATTACKS_DIR" <<'PYEOF'
import json
import sys
from pathlib import Path


def load_ops(path):
    ops = []
    with open(path) as f:
        for line in f:
            line = line.strip()
            if line:
                try:
                    ops.append(json.loads(line))
                except json.JSONDecodeError:
                    pass
    return ops


def fingerprint(ops):
    """Ordered tuple of op types — used for deduplication (ignores amounts)."""
    return tuple(op.get("op", "") for op in ops)


def classify(ops):
    """Classify attack type from the operation sequence."""
    types = [op.get("op", "") for op in ops]
    has_stake        = "stake"           in types
    has_unstake      = "unstake"         in types
    has_mint_lp      = "mint_lp"         in types
    has_burn_lp      = "burn_lp"         in types
    has_loop         = "buy_recenter_loop" in types
    buys      = types.count("buy")
    sells     = types.count("sell")
    recenters = types.count("recenter")

    if has_stake and has_unstake:
        return "staking"
    if has_mint_lp or has_burn_lp:
        return "lp-manipulation"
    # Compound loop op — il-crystallization with large count
    if has_loop:
        return "il-crystallization"
    # Oscillation: many buys and sells with frequent switching
    if buys >= 3 and sells >= 3:
        non_rc = [t for t in types if t not in ("recenter", "mine")]
        alternations = sum(
            1 for i in range(len(non_rc) - 1) if non_rc[i] != non_rc[i + 1]
        )
        if alternations >= 4:
            return "fee-drain-oscillation"
    # IL crystallisation: multiple buys then one final sell
    if buys >= 3 and sells == 1:
        return "il-crystallization"
    # Floor ratchet: many recenters triggered by buys
    if recenters >= 5 and buys >= 2:
        return "floor-ratchet"
    return "floor-attack"


attacks_file = sys.argv[1]
attacks_dir  = sys.argv[2]

new_ops = load_ops(attacks_file)
if not new_ops:
    print("EMPTY")
    sys.exit(0)

new_fp = fingerprint(new_ops)

# Deduplication: compare op-type fingerprint against every existing file
existing_dir = Path(attacks_dir)
if existing_dir.is_dir():
    for existing_file in sorted(existing_dir.glob("*.jsonl")):
        try:
            existing_ops = load_ops(existing_file)
            if fingerprint(existing_ops) == new_fp:
                print(f"DUPLICATE:{existing_file.name}")
                sys.exit(0)
        except Exception:
            pass

print(f"NOVEL:{classify(new_ops)}")
PYEOF
)

log "Classifier: $CLASSIFY_OUT"

case "$CLASSIFY_OUT" in
  EMPTY)
    log "No ops found in attack file — skipping"
    exit 0
    ;;
  DUPLICATE:*)
    log "Op sequence matches existing file: ${CLASSIFY_OUT#DUPLICATE:} — skipping"
    exit 0
    ;;
  NOVEL:*)
    ATTACK_TYPE="${CLASSIFY_OUT#NOVEL:}"
    ;;
  *)
    warn "Unexpected classifier output: $CLASSIFY_OUT"
    exit 0
    ;;
esac

log "Novel attack type: $ATTACK_TYPE"

# ── Determine destination filename ───────────────────────────────────────────
# Slug: lowercase, alphanumeric + hyphens, max 30 chars
CANDIDATE_SLUG=$(printf '%s' "$CANDIDATE" \
  | tr '[:upper:]' '[:lower:]' \
  | sed 's/[^a-z0-9-]/-/g' \
  | sed 's/--*/-/g;s/^-//;s/-$//' \
  | cut -c1-30)

BASE_NAME="${ATTACK_TYPE}-${CANDIDATE_SLUG}"

# Avoid collisions with existing files by appending -v2, -v3, ...
SUFFIX=""
V=2
while [[ -f "$ATTACKS_DIR/${BASE_NAME}${SUFFIX}.jsonl" ]]; do
  SUFFIX="-v${V}"
  (( V++ ))
done
BASE_NAME="${BASE_NAME}${SUFFIX}"
DEST_RELPATH="onchain/script/backtesting/attacks/${BASE_NAME}.jsonl"

log "Destination: $DEST_RELPATH"

# ── Format ETH values for human-readable output ──────────────────────────────
ETH_X=$(python3 -c 'import sys; print(f"{int(sys.argv[1]) / 1e18:.4f}")' \
  "$ETH_EXTRACTED" 2>/dev/null || echo "$ETH_EXTRACTED wei")
ETH_B=$(python3 -c 'import sys; print(f"{int(sys.argv[1]) / 1e18:.4f}")' \
  "$ETH_BEFORE" 2>/dev/null || echo "$ETH_BEFORE wei")

# ── Git: create branch + commit in a temporary worktree ──────────────────────
DATE_TAG=$(date -u +%Y%m%d-%H%M%S)
BRANCH="red-team/${ATTACK_TYPE}-${CANDIDATE_SLUG}-${DATE_TAG}"
# Use mktemp -u: generate a unique path without creating it, so git worktree add
# can create the directory itself (pre-existing directories cause git to error).
TMPWT=$(mktemp -u)

cleanup_worktree() {
  local rc=$?
  cd "$REPO_ROOT" 2>/dev/null || true
  git worktree remove --force "$TMPWT" 2>/dev/null || true
  git worktree prune --quiet 2>/dev/null || true
  git -C "$REPO_ROOT" branch -D "$BRANCH" 2>/dev/null || true
  rm -rf "$TMPWT" 2>/dev/null || true
  exit $rc
}
trap cleanup_worktree EXIT

log "Fetching origin/master ..."
git -C "$REPO_ROOT" fetch origin master --quiet 2>/dev/null \
  || warn "git fetch failed — using local origin/master state"

log "Creating worktree branch: $BRANCH ..."
git -C "$REPO_ROOT" worktree add -b "$BRANCH" "$TMPWT" "origin/master" --quiet

# Copy attack file into the isolated worktree
mkdir -p "$(dirname "$TMPWT/$DEST_RELPATH")"
cp "$ATTACKS_FILE" "$TMPWT/$DEST_RELPATH"

cd "$TMPWT"
git add "$DEST_RELPATH"
git commit --quiet -m "$(cat <<EOF
red-team: add ${ATTACK_TYPE} attack vector (${CANDIDATE})

Attack type   : ${ATTACK_TYPE}
Optimizer     : ${CANDIDATE}
Profile       : ${PROFILE}
ETH extracted : ${ETH_X} ETH
LM ETH before : ${ETH_B} ETH
Source file   : ${DEST_RELPATH}
EOF
)"

log "Pushing branch: $BRANCH ..."
git push origin "$BRANCH" --quiet

cd "$REPO_ROOT"

# ── Codeberg: create PR ──────────────────────────────────────────────────────
PR_TITLE="red-team: ${ATTACK_TYPE} attack via ${CANDIDATE} (${ETH_X} ETH extracted)"

PR_BODY=$(cat <<EOF
## Red-team Attack Discovery

| Field | Value |
|-------|-------|
| Attack type | \`${ATTACK_TYPE}\` |
| Optimizer tested | \`${CANDIDATE}\` |
| Optimizer profile | \`${PROFILE}\` |
| ETH extracted | **${ETH_X} ETH** |
| LM ETH before | ${ETH_B} ETH |
| Attack file | \`${DEST_RELPATH}\` |

## What this means

This attack vector successfully extracted ETH from the LiquidityManager when tested
against the \`${CANDIDATE}\` optimizer. Adding it to the attack suite raises the fitness
bar for evolution — future optimizer candidates must survive this attack to pass.

## Review checklist

- [ ] Attack operations are valid (no malformed ops)
- [ ] Attack type classification (\`${ATTACK_TYPE}\`) is accurate
- [ ] Not a duplicate of an existing attack (deduplication checked by promote-attacks.sh)
- [ ] Appropriate to add as a permanent regression test

---
🤖 Auto-generated by \`scripts/harb-evaluator/promote-attacks.sh\`
EOF
)

log "Creating Codeberg PR ..."
PR_JSON=$(jq -n \
  --arg title "$PR_TITLE" \
  --arg body  "$PR_BODY" \
  --arg head  "$BRANCH" \
  --arg base  "master" \
  '{title: $title, body: $body, head: $head, base: $base}')

PR_RESPONSE=$(curl -sf \
  -H "Authorization: token ${API_TOKEN}" \
  -H "Content-Type: application/json" \
  "${CODEBERG_API}/repos/${CODEBERG_REPO}/pulls" \
  -d "$PR_JSON" 2>&1) || die "curl failed when creating PR"

PR_NUMBER=$(printf '%s' "$PR_RESPONSE" | jq -r '.number // empty' 2>/dev/null || true)
PR_URL=$(printf '%s' "$PR_RESPONSE"    | jq -r '.html_url // empty' 2>/dev/null || true)

if [[ -n "$PR_NUMBER" && "$PR_NUMBER" != "null" ]]; then
  log "PR #${PR_NUMBER} created: ${PR_URL}"
else
  warn "PR creation returned unexpected response:"
  printf '%s' "$PR_RESPONSE" | head -c 400 >&2
  die "PR creation failed"
fi