fix: \llm\-origin entries in manifest have null fitness and no evaluation path (#724)
- Add evaluate-seeds.sh: standalone script that reads manifest.jsonl, finds every entry with fitness: null, runs fitness.sh against each seed file, and atomically writes results back to manifest.jsonl. Supports --dry-run to preview without evaluating. - Add comment to --diverse-seeds sampling in evolve.sh documenting that null-fitness seeds are included with effective_fitness=0 and that evaluate-seeds.sh should be run to score them. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
93b16f3b1f
commit
cb6e6708b6
3 changed files with 193 additions and 0 deletions
187
tools/push3-evolution/evaluate-seeds.sh
Executable file
187
tools/push3-evolution/evaluate-seeds.sh
Executable file
|
|
@ -0,0 +1,187 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# evaluate-seeds.sh — Score null-fitness manifest entries via fitness.sh
|
||||
#
|
||||
# Reads manifest.jsonl, finds every entry with fitness: null, runs fitness.sh
|
||||
# against the corresponding seed file, and writes the result back into
|
||||
# manifest.jsonl (atomic temp-file rename).
|
||||
#
|
||||
# Usage:
|
||||
# ./tools/push3-evolution/evaluate-seeds.sh [--dry-run]
|
||||
#
|
||||
# Options:
|
||||
# --dry-run Print which seeds would be evaluated without running fitness.sh
|
||||
#
|
||||
# Environment:
|
||||
# ANVIL_FORK_URL Passed through to fitness.sh when Anvil is not already
|
||||
# running. Must point to a Base RPC endpoint.
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 All null-fitness entries evaluated (or nothing to do).
|
||||
# 1 One or more evaluations failed (partial results may have been written).
|
||||
# 2 Infrastructure error (missing tool, manifest not found, etc.).
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SEEDS_DIR="$SCRIPT_DIR/seeds"
|
||||
MANIFEST="$SEEDS_DIR/manifest.jsonl"
|
||||
FITNESS_SH="$SCRIPT_DIR/fitness.sh"
|
||||
|
||||
DRY_RUN=false
|
||||
|
||||
# =============================================================================
|
||||
# Argument parsing
|
||||
# =============================================================================
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--dry-run) DRY_RUN=true; shift ;;
|
||||
*) echo "Usage: $0 [--dry-run]" >&2; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# =============================================================================
|
||||
# Helpers
|
||||
# =============================================================================
|
||||
|
||||
log() { echo " [evaluate-seeds] $*" >&2; }
|
||||
fail() { echo " [evaluate-seeds] ERROR: $*" >&2; exit 2; }
|
||||
|
||||
# =============================================================================
|
||||
# Pre-flight checks
|
||||
# =============================================================================
|
||||
|
||||
[ -f "$MANIFEST" ] || fail "manifest.jsonl not found at $MANIFEST"
|
||||
[ -f "$FITNESS_SH" ] || fail "fitness.sh not found at $FITNESS_SH"
|
||||
command -v python3 &>/dev/null || fail "python3 not found in PATH"
|
||||
|
||||
# =============================================================================
|
||||
# Find null-fitness entries
|
||||
# =============================================================================
|
||||
|
||||
NULL_ENTRIES="$(python3 - "$MANIFEST" <<'PYEOF'
|
||||
import json, sys
|
||||
manifest_path = sys.argv[1]
|
||||
with open(manifest_path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
d = json.loads(line)
|
||||
if d.get('fitness') is None:
|
||||
print(d.get('file', ''))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
PYEOF
|
||||
)"
|
||||
|
||||
if [ -z "$NULL_ENTRIES" ]; then
|
||||
log "No null-fitness entries in manifest — nothing to do."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
NULL_COUNT=$(printf '%s\n' "$NULL_ENTRIES" | grep -c '.')
|
||||
log "Found $NULL_COUNT null-fitness entry/entries: $(printf '%s\n' "$NULL_ENTRIES" | tr '\n' ' ')"
|
||||
|
||||
if $DRY_RUN; then
|
||||
echo "Dry run — would evaluate:"
|
||||
printf '%s\n' "$NULL_ENTRIES" | while IFS= read -r fname; do
|
||||
echo " $fname"
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Evaluate each null-fitness seed and collect results
|
||||
# =============================================================================
|
||||
|
||||
FAILURES=0
|
||||
|
||||
# scores: associative array file -> score (bash 4+)
|
||||
declare -A SCORES
|
||||
|
||||
while IFS= read -r FNAME; do
|
||||
[ -z "$FNAME" ] && continue
|
||||
SEED_FILE="$SEEDS_DIR/$FNAME"
|
||||
if [ ! -f "$SEED_FILE" ]; then
|
||||
log "WARNING: seed file not found: $SEED_FILE — skipping"
|
||||
FAILURES=$((FAILURES + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
log "Evaluating $FNAME …"
|
||||
SCORE=""
|
||||
FITNESS_EC=0
|
||||
SCORE=$("$FITNESS_SH" "$SEED_FILE") || FITNESS_EC=$?
|
||||
|
||||
if [ "$FITNESS_EC" -ne 0 ] || [ -z "$SCORE" ]; then
|
||||
log "WARNING: fitness.sh failed for $FNAME (exit $FITNESS_EC) — skipping"
|
||||
FAILURES=$((FAILURES + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
log " $FNAME → fitness=$SCORE"
|
||||
SCORES["$FNAME"]="$SCORE"
|
||||
done <<< "$NULL_ENTRIES"
|
||||
|
||||
if [ "${#SCORES[@]}" -eq 0 ]; then
|
||||
log "No seeds were successfully evaluated."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Write results back to manifest.jsonl (atomic temp-file rename)
|
||||
# =============================================================================
|
||||
|
||||
MANIFEST_TMP="$(mktemp "${MANIFEST}.XXXXXX")"
|
||||
trap 'rm -f "$MANIFEST_TMP"' EXIT
|
||||
|
||||
python3 - "$MANIFEST" "$MANIFEST_TMP" <<PYEOF
|
||||
import json, sys, os
|
||||
|
||||
manifest_path = sys.argv[1]
|
||||
tmp_path = sys.argv[2]
|
||||
|
||||
# scores injected from shell (format: "file1\tscore1\nfile2\tscore2\n…")
|
||||
scores_raw = """$(for k in "${!SCORES[@]}"; do printf '%s\t%s\n' "$k" "${SCORES[$k]}"; done)"""
|
||||
|
||||
scores = {}
|
||||
for line in scores_raw.strip().splitlines():
|
||||
if '\t' in line:
|
||||
fname, score = line.split('\t', 1)
|
||||
scores[fname.strip()] = int(score.strip())
|
||||
|
||||
lines_out = []
|
||||
with open(manifest_path) as f:
|
||||
for line in f:
|
||||
stripped = line.rstrip('\n')
|
||||
if not stripped:
|
||||
continue
|
||||
try:
|
||||
d = json.loads(stripped)
|
||||
fname = d.get('file', '')
|
||||
if fname in scores and d.get('fitness') is None:
|
||||
d['fitness'] = scores[fname]
|
||||
lines_out.append(json.dumps(d, separators=(',', ':')))
|
||||
except json.JSONDecodeError:
|
||||
lines_out.append(stripped)
|
||||
|
||||
with open(tmp_path, 'w') as f:
|
||||
for line in lines_out:
|
||||
f.write(line + '\n')
|
||||
PYEOF
|
||||
|
||||
mv "$MANIFEST_TMP" "$MANIFEST"
|
||||
trap - EXIT
|
||||
|
||||
EVALUATED="${#SCORES[@]}"
|
||||
log "Done. Evaluated $EVALUATED seed(s); $FAILURES failure(s)."
|
||||
log "Results written to $MANIFEST"
|
||||
|
||||
if [ "$FAILURES" -gt 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
exit 0
|
||||
Loading…
Add table
Add a link
Reference in a new issue