fix: feat: Push3 evolution — crossover operator (#639)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
openhands 2026-03-13 05:54:48 +00:00
parent 709dfccf7e
commit f8b765a9f8
2 changed files with 125 additions and 10 deletions

View file

@ -7,6 +7,7 @@ import {
swapOperator,
deleteInstruction,
insertInstruction,
subtreeCrossover,
crossover,
mutate,
isValid,
@ -236,6 +237,78 @@ describe('insertInstruction', () => {
});
});
// ---------------------------------------------------------------------------
// subtreeCrossover
// ---------------------------------------------------------------------------
describe('subtreeCrossover', () => {
it('produces a valid program from two programs with nested list blocks', () => {
const child = subtreeCrossover(WITH_IF, WITH_IF);
expect(isValid(child)).toBe(true);
});
it('falls back to flat crossover when programs have no nested lists', () => {
// FOUR_OUT is a flat list with no (…) sub-expressions.
const child = subtreeCrossover(FOUR_OUT, FOUR_OUT);
expect(isValid(child)).toBe(true);
});
it('returns `a` when `b` is not a list', () => {
const notList: Push3Program = { kind: 'int', value: 42n };
const child = subtreeCrossover(FOUR_OUT, notList);
expect(serialize(child)).toBe(serialize(FOUR_OUT));
});
it('returns `a` when `a` is not a list', () => {
const notList: Push3Program = { kind: 'int', value: 99n };
const child = subtreeCrossover(notList, FOUR_OUT);
expect(isValid(child)).toBe(false); // notList is an int, not valid on its own
expect(child).toBe(notList); // returns `a` unchanged
});
it('produces a valid program from two optimizer programs', () => {
const child = subtreeCrossover(optimizer, optimizer);
expect(isValid(child)).toBe(true);
});
it('can swap a list sub-expression from parent b into parent a', () => {
// Two programs differing only in their EXEC.IF true-branch constant.
// Swapping the true branch from B (contains 99) into A (contains 10) must
// produce a child different from A at least once in 30 trials.
const parentA = parse(
'( DYADIC.DUP 91000000000000000000 DYADIC.> EXEC.IF' +
' ( 10 DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )' +
' ( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP ) )',
);
const parentB = parse(
'( DYADIC.DUP 91000000000000000000 DYADIC.> EXEC.IF' +
' ( 99 DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )' +
' ( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP ) )',
);
let foundDifferent = false;
for (let i = 0; i < 30; i++) {
const child = subtreeCrossover(parentA, parentB);
expect(isValid(child)).toBe(true);
if (serialize(child) !== serialize(parentA)) {
foundDifferent = true;
break;
}
}
expect(foundDifferent).toBe(true);
});
it('produces diverse offspring across multiple calls on programs with many sub-expressions', () => {
const seen = new Set<string>();
for (let i = 0; i < 20; i++) {
const child = subtreeCrossover(optimizer, optimizer);
expect(isValid(child)).toBe(true);
seen.add(serialize(child));
}
// The optimizer has many nested (…) blocks; expect multiple distinct offspring.
expect(seen.size).toBeGreaterThanOrEqual(2);
});
});
// ---------------------------------------------------------------------------
// crossover
// ---------------------------------------------------------------------------
@ -319,6 +392,7 @@ describe('edge cases', () => {
expect(isValid(swapOperator(EMPTY))).toBe(true);
expect(isValid(deleteInstruction(EMPTY))).toBe(true);
expect(isValid(insertInstruction(EMPTY))).toBe(true);
expect(isValid(subtreeCrossover(EMPTY, EMPTY))).toBe(true);
expect(isValid(crossover(EMPTY, EMPTY))).toBe(true);
expect(isValid(mutate(EMPTY, 3))).toBe(true);
});
@ -328,6 +402,7 @@ describe('edge cases', () => {
expect(isValid(swapOperator(SINGLE_POP))).toBe(true);
expect(isValid(deleteInstruction(SINGLE_POP))).toBe(true);
expect(isValid(insertInstruction(SINGLE_POP))).toBe(true);
expect(isValid(subtreeCrossover(SINGLE_POP, SINGLE_POP))).toBe(true);
expect(isValid(crossover(SINGLE_POP, SINGLE_POP))).toBe(true);
expect(isValid(mutate(SINGLE_POP, 3))).toBe(true);
});