#!/usr/bin/env bash # ============================================================================= # evaluate-seeds.sh — Score null-fitness manifest entries via fitness.sh # # Reads manifest.jsonl, finds every entry with fitness: null, runs fitness.sh # against the corresponding seed file, and writes the result back into # manifest.jsonl (atomic temp-file rename). # # Usage: # ./tools/push3-evolution/evaluate-seeds.sh [--dry-run] # # Options: # --dry-run Print which seeds would be evaluated without running fitness.sh # # Environment: # ANVIL_FORK_URL Passed through to fitness.sh when Anvil is not already # running. Must point to a Base RPC endpoint. # # Exit codes: # 0 All null-fitness entries evaluated (or nothing to do). # 1 One or more evaluations failed (partial results may have been written). # 2 Infrastructure error (missing tool, manifest not found, etc.). # ============================================================================= set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" SEEDS_DIR="$SCRIPT_DIR/seeds" MANIFEST="$SEEDS_DIR/manifest.jsonl" FITNESS_SH="$SCRIPT_DIR/fitness.sh" DRY_RUN=false # ============================================================================= # Argument parsing # ============================================================================= while [ $# -gt 0 ]; do case "$1" in --dry-run) DRY_RUN=true; shift ;; *) echo "Usage: $0 [--dry-run]" >&2; exit 2 ;; esac done # ============================================================================= # Helpers # ============================================================================= log() { echo " [evaluate-seeds] $*" >&2; } fail() { echo " [evaluate-seeds] ERROR: $*" >&2; exit 2; } # ============================================================================= # Pre-flight checks # ============================================================================= [ -f "$MANIFEST" ] || fail "manifest.jsonl not found at $MANIFEST" [ -f "$FITNESS_SH" ] || fail "fitness.sh not found at $FITNESS_SH" command -v python3 &>/dev/null || fail "python3 not found in PATH" # ============================================================================= # Find null-fitness entries # ============================================================================= NULL_ENTRIES="$(python3 - "$MANIFEST" <<'PYEOF' import json, sys manifest_path = sys.argv[1] with open(manifest_path) as f: for line in f: line = line.strip() if not line: continue try: d = json.loads(line) if d.get('fitness') is None: print(d.get('file', '')) except json.JSONDecodeError: pass PYEOF )" if [ -z "$NULL_ENTRIES" ]; then log "No null-fitness entries in manifest — nothing to do." exit 0 fi NULL_COUNT=$(printf '%s\n' "$NULL_ENTRIES" | grep -c '.') log "Found $NULL_COUNT null-fitness entry/entries: $(printf '%s\n' "$NULL_ENTRIES" | tr '\n' ' ')" if $DRY_RUN; then echo "Dry run — would evaluate:" printf '%s\n' "$NULL_ENTRIES" | while IFS= read -r fname; do echo " $fname" done exit 0 fi # ============================================================================= # Evaluate each null-fitness seed and collect results # ============================================================================= FAILURES=0 # Scores are accumulated in a temp file as tab-separated "filename\tscore" # lines. Using a file (rather than a shell associative array embedded in a # heredoc) avoids injecting values into Python source code. SCORES_FILE="$(mktemp)" trap 'rm -f "$SCORES_FILE"' EXIT while IFS= read -r FNAME; do [ -z "$FNAME" ] && continue SEED_FILE="$SEEDS_DIR/$FNAME" if [ ! -f "$SEED_FILE" ]; then log "WARNING: seed file not found: $SEED_FILE — skipping" FAILURES=$((FAILURES + 1)) continue fi log "Evaluating $FNAME …" SCORE="" FITNESS_EC=0 SCORE=$("$FITNESS_SH" "$SEED_FILE") || FITNESS_EC=$? if [ "$FITNESS_EC" -eq 2 ]; then # Exit code 2 = infra error (Anvil down, missing tool, etc.). # All subsequent evaluations will fail for the same reason; abort early. log "ERROR: fitness.sh reported infra failure (exit 2) for $FNAME — aborting" exit 2 fi if [ "$FITNESS_EC" -ne 0 ] || [ -z "$SCORE" ]; then log "WARNING: fitness.sh failed for $FNAME (exit $FITNESS_EC) — skipping" FAILURES=$((FAILURES + 1)) continue fi log " $FNAME → fitness=$SCORE" printf '%s\t%s\n' "$FNAME" "$SCORE" >> "$SCORES_FILE" done <<< "$NULL_ENTRIES" if [ ! -s "$SCORES_FILE" ]; then log "No seeds were successfully evaluated." exit 1 fi # ============================================================================= # Write results back to manifest.jsonl (atomic temp-file rename) # ============================================================================= MANIFEST_TMP="$(mktemp "${MANIFEST}.XXXXXX")" # Update trap to clean up both temp files. trap 'rm -f "$SCORES_FILE" "$MANIFEST_TMP"' EXIT python3 - "$MANIFEST" "$MANIFEST_TMP" "$SCORES_FILE" <<'PYEOF' import json, sys manifest_path = sys.argv[1] tmp_path = sys.argv[2] scores_path = sys.argv[3] # Load scores from the tab-separated file written by the shell loop. # Values are plain integers produced by fitness.sh — no shell expansion here. scores = {} with open(scores_path) as sf: for line in sf: line = line.rstrip('\n') if '\t' in line: fname, score = line.split('\t', 1) scores[fname.strip()] = int(score.strip()) lines_out = [] with open(manifest_path) as f: for line in f: stripped = line.rstrip('\n') if not stripped: continue try: d = json.loads(stripped) fname = d.get('file', '') if fname in scores and d.get('fitness') is None: d['fitness'] = scores[fname] lines_out.append(json.dumps(d, separators=(',', ':'))) except json.JSONDecodeError: lines_out.append(stripped) with open(tmp_path, 'w') as f: for line in lines_out: f.write(line + '\n') PYEOF mv "$MANIFEST_TMP" "$MANIFEST" trap 'rm -f "$SCORES_FILE"' EXIT EVALUATED=$(wc -l < "$SCORES_FILE" | tr -d ' ') log "Done. Evaluated $EVALUATED seed(s); $FAILURES failure(s)." log "Results written to $MANIFEST" if [ "$FAILURES" -gt 0 ]; then exit 1 fi exit 0