diff --git a/STATE.md b/STATE.md index 8ed2b90..2b001dd 100644 --- a/STATE.md +++ b/STATE.md @@ -38,3 +38,4 @@ - [2026-03-15] add evolution run 8 champion to seed pool (#781) - [2026-03-15] fix FitnessEvaluator.t.sol broken on Base mainnet fork (#780) - [2026-03-15] No generic flag dispatch: only `token_value_inflation` is ever zero-rated (#723) +- [2026-03-15] `llm`-origin entries in manifest have null fitness and no evaluation path (#724): evaluate-seeds.sh scores null-fitness seeds and writes results back to manifest.jsonl diff --git a/tools/push3-evolution/evaluate-seeds.sh b/tools/push3-evolution/evaluate-seeds.sh new file mode 100755 index 0000000..feeeb2a --- /dev/null +++ b/tools/push3-evolution/evaluate-seeds.sh @@ -0,0 +1,187 @@ +#!/usr/bin/env bash +# ============================================================================= +# evaluate-seeds.sh — Score null-fitness manifest entries via fitness.sh +# +# Reads manifest.jsonl, finds every entry with fitness: null, runs fitness.sh +# against the corresponding seed file, and writes the result back into +# manifest.jsonl (atomic temp-file rename). +# +# Usage: +# ./tools/push3-evolution/evaluate-seeds.sh [--dry-run] +# +# Options: +# --dry-run Print which seeds would be evaluated without running fitness.sh +# +# Environment: +# ANVIL_FORK_URL Passed through to fitness.sh when Anvil is not already +# running. Must point to a Base RPC endpoint. +# +# Exit codes: +# 0 All null-fitness entries evaluated (or nothing to do). +# 1 One or more evaluations failed (partial results may have been written). +# 2 Infrastructure error (missing tool, manifest not found, etc.). +# ============================================================================= + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +SEEDS_DIR="$SCRIPT_DIR/seeds" +MANIFEST="$SEEDS_DIR/manifest.jsonl" +FITNESS_SH="$SCRIPT_DIR/fitness.sh" + +DRY_RUN=false + +# ============================================================================= +# Argument parsing +# ============================================================================= + +while [ $# -gt 0 ]; do + case "$1" in + --dry-run) DRY_RUN=true; shift ;; + *) echo "Usage: $0 [--dry-run]" >&2; exit 2 ;; + esac +done + +# ============================================================================= +# Helpers +# ============================================================================= + +log() { echo " [evaluate-seeds] $*" >&2; } +fail() { echo " [evaluate-seeds] ERROR: $*" >&2; exit 2; } + +# ============================================================================= +# Pre-flight checks +# ============================================================================= + +[ -f "$MANIFEST" ] || fail "manifest.jsonl not found at $MANIFEST" +[ -f "$FITNESS_SH" ] || fail "fitness.sh not found at $FITNESS_SH" +command -v python3 &>/dev/null || fail "python3 not found in PATH" + +# ============================================================================= +# Find null-fitness entries +# ============================================================================= + +NULL_ENTRIES="$(python3 - "$MANIFEST" <<'PYEOF' +import json, sys +manifest_path = sys.argv[1] +with open(manifest_path) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + d = json.loads(line) + if d.get('fitness') is None: + print(d.get('file', '')) + except json.JSONDecodeError: + pass +PYEOF +)" + +if [ -z "$NULL_ENTRIES" ]; then + log "No null-fitness entries in manifest — nothing to do." + exit 0 +fi + +NULL_COUNT=$(printf '%s\n' "$NULL_ENTRIES" | grep -c '.') +log "Found $NULL_COUNT null-fitness entry/entries: $(printf '%s\n' "$NULL_ENTRIES" | tr '\n' ' ')" + +if $DRY_RUN; then + echo "Dry run — would evaluate:" + printf '%s\n' "$NULL_ENTRIES" | while IFS= read -r fname; do + echo " $fname" + done + exit 0 +fi + +# ============================================================================= +# Evaluate each null-fitness seed and collect results +# ============================================================================= + +FAILURES=0 + +# scores: associative array file -> score (bash 4+) +declare -A SCORES + +while IFS= read -r FNAME; do + [ -z "$FNAME" ] && continue + SEED_FILE="$SEEDS_DIR/$FNAME" + if [ ! -f "$SEED_FILE" ]; then + log "WARNING: seed file not found: $SEED_FILE — skipping" + FAILURES=$((FAILURES + 1)) + continue + fi + + log "Evaluating $FNAME …" + SCORE="" + FITNESS_EC=0 + SCORE=$("$FITNESS_SH" "$SEED_FILE") || FITNESS_EC=$? + + if [ "$FITNESS_EC" -ne 0 ] || [ -z "$SCORE" ]; then + log "WARNING: fitness.sh failed for $FNAME (exit $FITNESS_EC) — skipping" + FAILURES=$((FAILURES + 1)) + continue + fi + + log " $FNAME → fitness=$SCORE" + SCORES["$FNAME"]="$SCORE" +done <<< "$NULL_ENTRIES" + +if [ "${#SCORES[@]}" -eq 0 ]; then + log "No seeds were successfully evaluated." + exit 1 +fi + +# ============================================================================= +# Write results back to manifest.jsonl (atomic temp-file rename) +# ============================================================================= + +MANIFEST_TMP="$(mktemp "${MANIFEST}.XXXXXX")" +trap 'rm -f "$MANIFEST_TMP"' EXIT + +python3 - "$MANIFEST" "$MANIFEST_TMP" <