fix: \llm\-origin entries in manifest have null fitness and no evaluation path (#724)

- Add evaluate-seeds.sh: standalone script that reads manifest.jsonl, finds every entry with fitness: null, runs fitness.sh against each seed file, and atomically writes results back to manifest.jsonl. Supports --dry-run to preview without evaluating. - Add comment to --diverse-seeds sampling in evolve.sh documenting that null-fitness seeds are included with effective_fitness=0 and that evaluate-seeds.sh should be run to score them. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-15 03:08:29 +00:00 · 2026-03-15 03:08:29 +00:00 · cb6e6708b6
commit cb6e6708b6
parent 93b16f3b1f
3 changed files with 193 additions and 0 deletions
--- a/tools/push3-evolution/evaluate-seeds.sh
+++ b/tools/push3-evolution/evaluate-seeds.sh
@ -0,0 +1,187 @@
+#!/usr/bin/env bash
+# =============================================================================
+# evaluate-seeds.sh — Score null-fitness manifest entries via fitness.sh
+#
+# Reads manifest.jsonl, finds every entry with fitness: null, runs fitness.sh
+# against the corresponding seed file, and writes the result back into
+# manifest.jsonl (atomic temp-file rename).
+#
+# Usage:
+#   ./tools/push3-evolution/evaluate-seeds.sh [--dry-run]
+#
+# Options:
+#   --dry-run   Print which seeds would be evaluated without running fitness.sh
+#
+# Environment:
+#   ANVIL_FORK_URL   Passed through to fitness.sh when Anvil is not already
+#                    running.  Must point to a Base RPC endpoint.
+#
+# Exit codes:
+#   0  All null-fitness entries evaluated (or nothing to do).
+#   1  One or more evaluations failed (partial results may have been written).
+#   2  Infrastructure error (missing tool, manifest not found, etc.).
+# =============================================================================
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+SEEDS_DIR="$SCRIPT_DIR/seeds"
+MANIFEST="$SEEDS_DIR/manifest.jsonl"
+FITNESS_SH="$SCRIPT_DIR/fitness.sh"
+
+DRY_RUN=false
+
+# =============================================================================
+# Argument parsing
+# =============================================================================
+
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --dry-run) DRY_RUN=true; shift ;;
+        *) echo "Usage: $0 [--dry-run]" >&2; exit 2 ;;
+    esac
+done
+
+# =============================================================================
+# Helpers
+# =============================================================================
+
+log()   { echo "  [evaluate-seeds]  $*" >&2; }
+fail()  { echo "  [evaluate-seeds]  ERROR: $*" >&2; exit 2; }
+
+# =============================================================================
+# Pre-flight checks
+# =============================================================================
+
+[ -f "$MANIFEST" ]   || fail "manifest.jsonl not found at $MANIFEST"
+[ -f "$FITNESS_SH" ] || fail "fitness.sh not found at $FITNESS_SH"
+command -v python3 &>/dev/null || fail "python3 not found in PATH"
+
+# =============================================================================
+# Find null-fitness entries
+# =============================================================================
+
+NULL_ENTRIES="$(python3 - "$MANIFEST" <<'PYEOF'
+import json, sys
+manifest_path = sys.argv[1]
+with open(manifest_path) as f:
+    for line in f:
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            d = json.loads(line)
+            if d.get('fitness') is None:
+                print(d.get('file', ''))
+        except json.JSONDecodeError:
+            pass
+PYEOF
+)"
+
+if [ -z "$NULL_ENTRIES" ]; then
+    log "No null-fitness entries in manifest — nothing to do."
+    exit 0
+fi
+
+NULL_COUNT=$(printf '%s\n' "$NULL_ENTRIES" | grep -c '.')
+log "Found $NULL_COUNT null-fitness entry/entries: $(printf '%s\n' "$NULL_ENTRIES" | tr '\n' ' ')"
+
+if $DRY_RUN; then
+    echo "Dry run — would evaluate:"
+    printf '%s\n' "$NULL_ENTRIES" | while IFS= read -r fname; do
+        echo "  $fname"
+    done
+    exit 0
+fi
+
+# =============================================================================
+# Evaluate each null-fitness seed and collect results
+# =============================================================================
+
+FAILURES=0
+
+# scores: associative array  file -> score (bash 4+)
+declare -A SCORES
+
+while IFS= read -r FNAME; do
+    [ -z "$FNAME" ] && continue
+    SEED_FILE="$SEEDS_DIR/$FNAME"
+    if [ ! -f "$SEED_FILE" ]; then
+        log "WARNING: seed file not found: $SEED_FILE — skipping"
+        FAILURES=$((FAILURES + 1))
+        continue
+    fi
+
+    log "Evaluating $FNAME …"
+    SCORE=""
+    FITNESS_EC=0
+    SCORE=$("$FITNESS_SH" "$SEED_FILE") || FITNESS_EC=$?
+
+    if [ "$FITNESS_EC" -ne 0 ] || [ -z "$SCORE" ]; then
+        log "WARNING: fitness.sh failed for $FNAME (exit $FITNESS_EC) — skipping"
+        FAILURES=$((FAILURES + 1))
+        continue
+    fi
+
+    log "  $FNAME → fitness=$SCORE"
+    SCORES["$FNAME"]="$SCORE"
+done <<< "$NULL_ENTRIES"
+
+if [ "${#SCORES[@]}" -eq 0 ]; then
+    log "No seeds were successfully evaluated."
+    exit 1
+fi
+
+# =============================================================================
+# Write results back to manifest.jsonl (atomic temp-file rename)
+# =============================================================================
+
+MANIFEST_TMP="$(mktemp "${MANIFEST}.XXXXXX")"
+trap 'rm -f "$MANIFEST_TMP"' EXIT
+
+python3 - "$MANIFEST" "$MANIFEST_TMP" <<PYEOF
+import json, sys, os
+
+manifest_path = sys.argv[1]
+tmp_path      = sys.argv[2]
+
+# scores injected from shell (format: "file1\tscore1\nfile2\tscore2\n…")
+scores_raw = """$(for k in "${!SCORES[@]}"; do printf '%s\t%s\n' "$k" "${SCORES[$k]}"; done)"""
+
+scores = {}
+for line in scores_raw.strip().splitlines():
+    if '\t' in line:
+        fname, score = line.split('\t', 1)
+        scores[fname.strip()] = int(score.strip())
+
+lines_out = []
+with open(manifest_path) as f:
+    for line in f:
+        stripped = line.rstrip('\n')
+        if not stripped:
+            continue
+        try:
+            d = json.loads(stripped)
+            fname = d.get('file', '')
+            if fname in scores and d.get('fitness') is None:
+                d['fitness'] = scores[fname]
+            lines_out.append(json.dumps(d, separators=(',', ':')))
+        except json.JSONDecodeError:
+            lines_out.append(stripped)
+
+with open(tmp_path, 'w') as f:
+    for line in lines_out:
+        f.write(line + '\n')
+PYEOF
+
+mv "$MANIFEST_TMP" "$MANIFEST"
+trap - EXIT
+
+EVALUATED="${#SCORES[@]}"
+log "Done. Evaluated $EVALUATED seed(s); $FAILURES failure(s)."
+log "Results written to $MANIFEST"
+
+if [ "$FAILURES" -gt 0 ]; then
+    exit 1
+fi
+exit 0