harb/tools/push3-evolution/test/mutate.test.ts

428 lines
16 KiB
TypeScript
Raw Permalink Normal View History

import { describe, it, expect } from 'vitest';
import { readFileSync } from 'fs';
import { join } from 'path';
import { parse } from '../../push3-transpiler/src/parser';
import {
mutateConstant,
swapOperator,
deleteInstruction,
insertInstruction,
subtreeCrossover,
crossover,
mutate,
isValid,
serialize,
getAt,
replaceAt,
} from '../mutate';
import type { Push3Program } from '../mutate';
// ---------------------------------------------------------------------------
// Test programs
// ---------------------------------------------------------------------------
// A minimal valid program that leaves 4 values on the DYADIC stack.
// The stack starts with 8 inputs; we pop 4 (arithmetic), leaving 4 on the stack.
// ( a b c d ) → stack after: inputs[4..7] + result on each step
// Simplest: discard top 4 inputs via POP, leaving the bottom 4.
const FOUR_OUT = parse(
'( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )',
);
// Program with arithmetic operators and constants — useful for swapOperator/mutateConstant.
// Leaves 4 values on stack: uses 4 inputs (slots 4-7 remain as outputs).
const WITH_ARITH = parse(
'( 50 DYADIC.POP 100 DYADIC.+ DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )',
);
// Program with a comparison → EXEC.IF block, leaving 4 outputs.
const WITH_IF = parse(
'( DYADIC.DUP 91000000000000000000 DYADIC.> EXEC.IF ( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP ) ( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP ) )',
);
// Empty program — stack has 8 inputs, transpile pops 4 with defaults.
const EMPTY = parse('( )');
// Single-instruction program.
const SINGLE_POP = parse('( DYADIC.POP )');
// Load the real optimizer_v3 program.
let optimizer: Push3Program;
try {
const src = readFileSync(
join(__dirname, '../../push3-transpiler/optimizer_v3.push3'),
'utf-8',
);
optimizer = parse(src);
} catch {
// Fall back to WITH_ARITH if file is unavailable.
optimizer = WITH_ARITH;
}
// ---------------------------------------------------------------------------
// isValid
// ---------------------------------------------------------------------------
describe('isValid', () => {
it('returns true for a valid program', () => {
expect(isValid(FOUR_OUT)).toBe(true);
});
it('returns true for an empty program (inputs remain on stack)', () => {
expect(isValid(EMPTY)).toBe(true);
});
it('returns false for a non-list node', () => {
const notList: Push3Program = { kind: 'int', value: 42n };
expect(isValid(notList)).toBe(false);
});
it('returns false for a program with stack underflow', () => {
// Tries to pop 9 times from a stack that only has 8 inputs.
const underflow = parse(
'( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )',
);
expect(isValid(underflow)).toBe(false);
});
it('returns false for a program with unknown instruction', () => {
// Inject an invalid instr node directly.
const bad: Push3Program = {
kind: 'list',
items: [{ kind: 'instr', name: 'DYADIC.NONEXISTENT' }],
};
expect(isValid(bad)).toBe(false);
});
});
// ---------------------------------------------------------------------------
// serialize / round-trip
// ---------------------------------------------------------------------------
describe('serialize', () => {
it('round-trips an integer literal', () => {
const p = parse('( 12345 DYADIC.POP )');
expect(serialize(p)).toBe('( 12345 DYADIC.POP )');
});
it('round-trips boolean literals', () => {
const p = parse('( TRUE FALSE )');
expect(serialize(p)).toBe('( TRUE FALSE )');
});
});
// ---------------------------------------------------------------------------
// mutateConstant
// ---------------------------------------------------------------------------
describe('mutateConstant', () => {
it('shifts a constant by the given delta', () => {
const prog = parse('( 50 DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )');
const mutated = mutateConstant(prog, 5);
// The program should change (the constant 50 should become 55).
expect(serialize(mutated)).not.toBe(serialize(prog));
expect(isValid(mutated)).toBe(true);
});
it('clamps to 0 when delta would make constant negative', () => {
const prog = parse('( 3 DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )');
const mutated = mutateConstant(prog, -100);
expect(isValid(mutated)).toBe(true);
// Value should be clamped to 0, not negative.
const s = serialize(mutated);
expect(s).not.toContain('-');
});
it('returns original when there are no integer literals', () => {
const noInts = parse('( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )');
const mutated = mutateConstant(noInts, 10);
expect(serialize(mutated)).toBe(serialize(noInts));
});
it('produces a valid program on optimizer_v3', () => {
const mutated = mutateConstant(optimizer, 7);
expect(isValid(mutated)).toBe(true);
});
});
// ---------------------------------------------------------------------------
// swapOperator
// ---------------------------------------------------------------------------
describe('swapOperator', () => {
it('swaps ADD to SUB or vice versa', () => {
const prog = parse('( 10 20 DYADIC.+ DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )');
const mutated = swapOperator(prog);
expect(isValid(mutated)).toBe(true);
// Should have changed (DYADIC.+ → DYADIC.-)
expect(serialize(mutated)).toContain('DYADIC.-');
});
it('swaps GT to LT', () => {
const prog = parse('( DYADIC.DUP 50 DYADIC.> DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )');
const mutated = swapOperator(prog);
expect(isValid(mutated)).toBe(true);
});
it('returns original when no swappable operators exist', () => {
const noSwap = parse('( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )');
const mutated = swapOperator(noSwap);
expect(serialize(mutated)).toBe(serialize(noSwap));
});
it('produces a valid program on optimizer_v3', () => {
const mutated = swapOperator(optimizer);
expect(isValid(mutated)).toBe(true);
});
});
// ---------------------------------------------------------------------------
// deleteInstruction
// ---------------------------------------------------------------------------
describe('deleteInstruction', () => {
it('produces a shorter or equal-length program', () => {
const prog = parse('( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.DUP DYADIC.POP )');
const mutated = deleteInstruction(prog);
expect(isValid(mutated)).toBe(true);
// Either deleted one instruction or returned original (if delete broke validity)
const origLen = serialize(prog).length;
const mutLen = serialize(mutated).length;
expect(mutLen).toBeLessThanOrEqual(origLen);
});
it('returns original for a program with no deletable instructions', () => {
// A program with only EXEC.IF and its branches — no stand-alone non-EXEC.IF instrs
// at a directly deletable position without breaking structure.
const noDelete: Push3Program = {
kind: 'list',
items: [],
};
const mutated = deleteInstruction(noDelete);
expect(serialize(mutated)).toBe(serialize(noDelete));
});
it('produces a valid program on optimizer_v3', () => {
const mutated = deleteInstruction(optimizer);
expect(isValid(mutated)).toBe(true);
});
it('handles single-instruction programs', () => {
// Deleting the only instruction may leave an empty but valid program.
const mutated = deleteInstruction(SINGLE_POP);
expect(isValid(mutated)).toBe(true);
});
});
// ---------------------------------------------------------------------------
// insertInstruction
// ---------------------------------------------------------------------------
describe('insertInstruction', () => {
it('produces a longer program', () => {
const mutated = insertInstruction(FOUR_OUT);
expect(isValid(mutated)).toBe(true);
// Should have grown by 2 tokens (push 0 + DYADIC.POP)
expect(serialize(mutated).length).toBeGreaterThan(serialize(FOUR_OUT).length);
});
it('produces a valid program on optimizer_v3', () => {
const mutated = insertInstruction(optimizer);
expect(isValid(mutated)).toBe(true);
});
it('handles an empty program', () => {
const mutated = insertInstruction(EMPTY);
expect(isValid(mutated)).toBe(true);
});
});
// ---------------------------------------------------------------------------
// subtreeCrossover
// ---------------------------------------------------------------------------
describe('subtreeCrossover', () => {
it('produces a valid program from two programs with nested list blocks', () => {
const child = subtreeCrossover(WITH_IF, WITH_IF);
expect(isValid(child)).toBe(true);
});
it('falls back to flat crossover when programs have no nested lists', () => {
// FOUR_OUT is a flat list with no (…) sub-expressions.
const child = subtreeCrossover(FOUR_OUT, FOUR_OUT);
expect(isValid(child)).toBe(true);
});
it('returns `a` when `b` is not a list', () => {
const notList: Push3Program = { kind: 'int', value: 42n };
const child = subtreeCrossover(FOUR_OUT, notList);
expect(serialize(child)).toBe(serialize(FOUR_OUT));
});
it('returns `a` when `a` is not a list', () => {
const notList: Push3Program = { kind: 'int', value: 99n };
const child = subtreeCrossover(notList, FOUR_OUT);
expect(isValid(child)).toBe(false); // notList is an int, not valid on its own
expect(child).toBe(notList); // returns `a` unchanged
});
it('produces a valid program from two optimizer programs', () => {
const child = subtreeCrossover(optimizer, optimizer);
expect(isValid(child)).toBe(true);
});
it('can swap a list sub-expression from parent b into parent a', () => {
// Two programs differing only in their EXEC.IF true-branch constant.
// Swapping the true branch from B (contains 99) into A (contains 10) must
// produce a child different from A at least once in 30 trials.
const parentA = parse(
'( DYADIC.DUP 91000000000000000000 DYADIC.> EXEC.IF' +
' ( 10 DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )' +
' ( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP ) )',
);
const parentB = parse(
'( DYADIC.DUP 91000000000000000000 DYADIC.> EXEC.IF' +
' ( 99 DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )' +
' ( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP ) )',
);
let foundDifferent = false;
for (let i = 0; i < 30; i++) {
const child = subtreeCrossover(parentA, parentB);
expect(isValid(child)).toBe(true);
if (serialize(child) !== serialize(parentA)) {
foundDifferent = true;
break;
}
}
expect(foundDifferent).toBe(true);
});
it('produces diverse offspring across multiple calls on programs with many sub-expressions', () => {
const seen = new Set<string>();
for (let i = 0; i < 20; i++) {
const child = subtreeCrossover(optimizer, optimizer);
expect(isValid(child)).toBe(true);
seen.add(serialize(child));
}
// The optimizer has many nested (…) blocks; expect multiple distinct offspring.
expect(seen.size).toBeGreaterThanOrEqual(2);
});
});
// ---------------------------------------------------------------------------
// crossover
// ---------------------------------------------------------------------------
describe('crossover', () => {
it('produces a valid combined program from two valid parents', () => {
const a = FOUR_OUT;
const b = parse('( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )');
// crossover may return a or fallback if combined is invalid — that is correct behaviour
const child = crossover(a, b);
expect(isValid(child)).toBe(true);
});
it('returns `a` when `a` is not a list', () => {
const notList: Push3Program = { kind: 'list', items: [] };
const b = FOUR_OUT;
const child = crossover(notList, b);
expect(isValid(child)).toBe(true);
});
it('returns `a` when combined result is invalid', () => {
// It's hard to guarantee invalidity without controlling the split points,
// so we just verify the return is always valid.
const child = crossover(FOUR_OUT, WITH_ARITH);
expect(isValid(child)).toBe(true);
});
it('produces a valid program with two optimizer programs', () => {
const seed = parse('( DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )');
const child = crossover(optimizer, seed);
expect(isValid(child)).toBe(true);
});
});
// ---------------------------------------------------------------------------
// mutate (meta-operator)
// ---------------------------------------------------------------------------
describe('mutate', () => {
it('returns a valid program with rate=0 (identity)', () => {
const result = mutate(FOUR_OUT, 0);
expect(isValid(result)).toBe(true);
expect(serialize(result)).toBe(serialize(FOUR_OUT));
});
it('returns a valid program with rate=1', () => {
const result = mutate(optimizer, 1);
expect(isValid(result)).toBe(true);
});
it('returns a valid program with rate=3', () => {
const result = mutate(optimizer, 3);
expect(isValid(result)).toBe(true);
});
it('returns a valid program with rate=10 (max stress)', () => {
const result = mutate(optimizer, 10);
expect(isValid(result)).toBe(true);
});
it('mutate(optimizer_v3, 3) produces at least 10 distinct valid variants', () => {
const seen = new Set<string>();
const TRIALS = 20;
for (let i = 0; i < TRIALS; i++) {
const variant = mutate(optimizer, 3);
expect(isValid(variant)).toBe(true);
seen.add(serialize(variant));
}
// With 20 trials, expect at least 10 distinct results (mutations are randomized).
expect(seen.size).toBeGreaterThanOrEqual(10);
});
});
// ---------------------------------------------------------------------------
// Edge cases
// ---------------------------------------------------------------------------
describe('edge cases', () => {
it('all operators handle an empty program gracefully', () => {
expect(isValid(mutateConstant(EMPTY, 5))).toBe(true);
expect(isValid(swapOperator(EMPTY))).toBe(true);
expect(isValid(deleteInstruction(EMPTY))).toBe(true);
expect(isValid(insertInstruction(EMPTY))).toBe(true);
expect(isValid(subtreeCrossover(EMPTY, EMPTY))).toBe(true);
expect(isValid(crossover(EMPTY, EMPTY))).toBe(true);
expect(isValid(mutate(EMPTY, 3))).toBe(true);
});
it('all operators handle a single-instruction program gracefully', () => {
expect(isValid(mutateConstant(SINGLE_POP, 1))).toBe(true);
expect(isValid(swapOperator(SINGLE_POP))).toBe(true);
expect(isValid(deleteInstruction(SINGLE_POP))).toBe(true);
expect(isValid(insertInstruction(SINGLE_POP))).toBe(true);
expect(isValid(subtreeCrossover(SINGLE_POP, SINGLE_POP))).toBe(true);
expect(isValid(crossover(SINGLE_POP, SINGLE_POP))).toBe(true);
expect(isValid(mutate(SINGLE_POP, 3))).toBe(true);
});
it('maintains validity across deep stack programs', () => {
// Push 7 extra values then do arithmetic — exercises deeper stack paths.
const deep = parse(
'( 1 2 3 4 5 6 7 DYADIC.+ DYADIC.+ DYADIC.+ DYADIC.+ DYADIC.+ DYADIC.+ DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )',
);
expect(isValid(deep)).toBe(true);
expect(isValid(mutate(deep, 5))).toBe(true);
});
it('getAt and replaceAt are inverse operations', () => {
const prog = parse('( 10 20 DYADIC.+ DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP DYADIC.POP )');
const path = [0]; // first item: int 10
const node = getAt(prog, path);
expect(node.kind).toBe('int');
const restored = replaceAt(prog, path, node);
expect(serialize(restored)).toBe(serialize(prog));
});
});