harb/.claude/hooks/supervisor/on-error.sh

41 lines
1.3 KiB
Bash
Raw Normal View History

#!/bin/bash
# Claude Code PostToolUseFailure hook — fires when a tool call fails.
#
# Option D: bash pre-filter for known patterns, LLM triage for the rest.
#
# - API 500 → always triage (transient, likely needs nudge)
# - API 429 → log + wait (rate limit, will resolve)
# - Other tool errors → triage (might be important)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
SUPERVISOR_DIR="${SCRIPT_DIR%/hooks}"
INPUT=$(cat)
TOOL=$(echo "$INPUT" | jq -r '.tool_name // "unknown"')
ERROR=$(echo "$INPUT" | jq -r '.error // .tool_error // "unknown"' | head -c 500)
SESSION_ID=$(echo "$INPUT" | jq -r '.session_id // "unknown"')
CWD=$(echo "$INPUT" | jq -r '.cwd // "unknown"')
# Bash pre-filter
if echo "$ERROR" | grep -qi "429\|rate.limit"; then
# Rate limited — log it, don't triage. It'll resolve.
echo "[$(date -u +%FT%TZ)] RATE_LIMITED | $TOOL | $CWD" >> "${CCS_LOG_FILE:-/tmp/ccs-triage.log}"
exit 0
fi
if echo "$ERROR" | grep -qi "500\|internal.server.error\|api_error"; then
# API 500 — high chance agent is stuck on this. Triage.
"$SUPERVISOR_DIR/triage.sh" "error:api_500" "$CWD" \
"Tool: $TOOL | Error: $ERROR | Session: $SESSION_ID" &
exit 0
fi
# Other errors — could be anything. Let LLM decide.
"$SUPERVISOR_DIR/triage.sh" "error:$TOOL" "$CWD" \
"Tool: $TOOL | Error: $ERROR | Session: $SESSION_ID" &
exit 0