fix: Old-format CIDs are warned but still silently dropped from the pool (#801)
- Change WARNING to explicitly state "legacy CID format ... migration not supported, skipping"
- Expand comment near the startswith('candidate_') guard to document the CID format
contract and explain why re-admission is intentionally out of scope (no surviving
generation_N.jsonl files from runs 1-6 exist in the repo)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a983c5cb16
commit
70ef0eb1bc
2 changed files with 8 additions and 2 deletions
|
|
@ -819,9 +819,14 @@ for fname in sorted(os.listdir(output_dir)):
|
|||
continue
|
||||
# Canonical CID format is "candidate_XXX" (zero-padded numeric suffix,
|
||||
# e.g. "candidate_001"); gen_idx is derived from the enclosing filename.
|
||||
# Old runs 1–6 used "gen{N}_c{MMM}" — see manifest.jsonl schema (#720).
|
||||
# Old runs 1–6 used "gen{N}_c{MMM}" — that format is intentionally not
|
||||
# re-admitted: no surviving generation_N.jsonl files from those runs exist
|
||||
# in the repo, so migration is out of scope. Any entry that does not match
|
||||
# "candidate_" is skipped permanently.
|
||||
if not cid.startswith('candidate_'):
|
||||
print(f'WARNING: skipping unrecognised CID format {cid!r} in {fname}')
|
||||
print(f'WARNING: legacy CID format {cid!r} in {fname} '
|
||||
f'(gen{{N}}_c{{MMM}} from runs 1-6) — '
|
||||
f'migration not supported, skipping')
|
||||
continue
|
||||
cand_str = cid[len('candidate_'):] # numeric suffix, e.g. "001"
|
||||
push3_path = os.path.join(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue