harb/tools/push3-evolution/mutate.ts

364 lines
11 KiB
TypeScript
Raw Normal View History

/**
* Push3 mutation operators for optimizer evolution.
*
* Implements five mutation operators on Push3 AST programs plus one meta-operator:
* 1. mutateConstant shift a random integer literal by ±δ
* 2. swapOperator replace an arithmetic opcode with its pair (ADDSUB, MULDIV, GTLT)
* 3. deleteInstruction remove a random instruction and validate
* 4. insertInstruction insert a stack-neutral sequence at a random position
* 5. crossover single-point crossover of two programs at instruction boundaries
* (meta) mutate apply N random mutations from operators 14
*
* All mutations validate the output via the transpiler's stack simulation.
* Invalid mutations silently return the original program.
*/
import { Node } from '../push3-transpiler/src/parser';
import { transpile } from '../push3-transpiler/src/transpiler';
export type Push3Program = Node;
// ---- Swap map: arithmetic and comparison operator pairs ----
const SWAP_PAIRS: Array<[string, string]> = [
['DYADIC.+', 'DYADIC.-'],
['DYADIC.-', 'DYADIC.+'],
['DYADIC.*', 'DYADIC./'],
['DYADIC./', 'DYADIC.*'],
['DYADIC.>', 'DYADIC.<'],
['DYADIC.<', 'DYADIC.>'],
['DYADIC.>=', 'DYADIC.<='],
['DYADIC.<=', 'DYADIC.>='],
];
const SWAP_MAP = new Map<string, string>(SWAP_PAIRS);
// ---- Random helpers ----
function rand(n: number): number {
return Math.floor(Math.random() * n);
}
function pick<T>(arr: T[]): T {
return arr[rand(arr.length)];
}
// ---- Immutable tree navigation and update ----
/**
* Navigate to the node at the given path (sequence of list indices from root).
*/
export function getAt(root: Node, path: number[]): Node {
let cur = root;
for (const idx of path) {
if (cur.kind !== 'list') throw new Error('getAt: not a list at path step');
cur = cur.items[idx];
}
return cur;
}
/**
* Return a new tree with the node at `path` replaced by `newNode`.
* Structurally shares all unchanged subtrees.
*/
export function replaceAt(root: Node, path: number[], newNode: Node): Node {
if (path.length === 0) return newNode;
if (root.kind !== 'list') throw new Error('replaceAt: not a list at path step');
const [head, ...rest] = path;
const newItems = root.items.map((item, i) =>
i === head ? replaceAt(item, rest, newNode) : item,
);
return { kind: 'list', items: newItems };
}
/**
* Return a new tree with the item at `index` removed from the list at `parentPath`.
*/
function deleteAt(root: Node, parentPath: number[], index: number): Node {
const parent = getAt(root, parentPath);
if (parent.kind !== 'list') throw new Error('deleteAt: parent is not a list');
const newParent: Node = {
kind: 'list',
items: parent.items.filter((_, i) => i !== index),
};
return replaceAt(root, parentPath, newParent);
}
/**
* Return a new tree with `inserted` spliced in at `index` within the list at `parentPath`.
*/
function insertAt(
root: Node,
parentPath: number[],
index: number,
inserted: Node[],
): Node {
const parent = getAt(root, parentPath);
if (parent.kind !== 'list') throw new Error('insertAt: parent is not a list');
const items = [...parent.items];
items.splice(index, 0, ...inserted);
const newParent: Node = { kind: 'list', items };
return replaceAt(root, parentPath, newParent);
}
// ---- Node collectors ----
/**
* Collect the path (root-to-node index sequence) of every node matching `test`.
*/
function collectPaths(
root: Node,
test: (n: Node) => boolean,
prefix: number[] = [],
): number[][] {
const results: number[][] = [];
if (test(root)) results.push(prefix);
if (root.kind === 'list') {
for (let i = 0; i < root.items.length; i++) {
results.push(...collectPaths(root.items[i], test, [...prefix, i]));
}
}
return results;
}
/**
* Collect {parentPath, index} for list items matching `test`.
* `parentPath` leads to the containing list; `index` is the item's position in it.
*/
function collectListPositions(
root: Node,
test: (item: Node, parentItems: Node[], index: number) => boolean,
currentPath: number[] = [],
): Array<{ parentPath: number[]; index: number }> {
const results: Array<{ parentPath: number[]; index: number }> = [];
if (root.kind === 'list') {
for (let i = 0; i < root.items.length; i++) {
if (test(root.items[i], root.items, i)) {
results.push({ parentPath: currentPath, index: i });
}
results.push(
...collectListPositions(root.items[i], test, [...currentPath, i]),
);
}
}
return results;
}
/**
* Collect the paths to every list node (including the root list itself).
*/
function collectListPaths(root: Node, prefix: number[] = []): number[][] {
const results: number[][] = [];
if (root.kind === 'list') {
results.push(prefix);
for (let i = 0; i < root.items.length; i++) {
results.push(...collectListPaths(root.items[i], [...prefix, i]));
}
}
return results;
}
// ---- Validation ----
/**
* Return true if the program is structurally and stack-semantically valid.
* Uses the transpiler's symbolic stack simulation for validation.
*/
export function isValid(program: Push3Program): boolean {
try {
transpile(program);
return true;
} catch (e) {
// Re-throw non-Error values (e.g. thrown primitives) to surface real bugs.
// All transpiler errors are proper Error instances (stack underflow, unknown
// instruction, etc.), so only those are caught and treated as invalid programs.
if (!(e instanceof Error)) throw e;
return false;
}
}
// ---- Serialiser (useful for testing / deduplication) ----
/**
* Serialise a Push3Program back to source text (round-trips through parse()).
*/
export function serialize(program: Push3Program): string {
switch (program.kind) {
case 'int':
return program.value.toString();
case 'bool':
return program.value ? 'TRUE' : 'FALSE';
case 'instr':
return program.name;
case 'name':
return program.text;
case 'list':
return `( ${program.items.map(serialize).join(' ')} )`;
}
}
// ---- Mutation operators ----
/**
* Constant perturbation: shift a randomly chosen integer literal by `delta`.
*
* @param program Source program (not mutated in-place).
* @param delta Amount to add to the chosen constant (may be negative).
* @returns Mutated program, or the original if no int nodes exist.
*/
export function mutateConstant(
program: Push3Program,
delta: number,
): Push3Program {
const paths = collectPaths(program, (n) => n.kind === 'int');
if (paths.length === 0) return program;
const path = pick(paths);
const node = getAt(program, path);
if (node.kind !== 'int') return program;
const newValue = node.value + BigInt(delta);
const clampedValue = newValue < 0n ? 0n : newValue;
const newNode: Node = { kind: 'int', value: clampedValue };
const mutated = replaceAt(program, path, newNode);
return isValid(mutated) ? mutated : program;
}
/**
* Operator swap: replace a randomly chosen arithmetic or comparison opcode with
* its pair (ADDSUB, MULDIV, GTLT, GTELTE).
*
* Stack depth is preserved because all swap pairs have identical stack effects.
*
* @returns Mutated program, or the original if no swappable ops exist.
*/
export function swapOperator(program: Push3Program): Push3Program {
const paths = collectPaths(
program,
(n) => n.kind === 'instr' && SWAP_MAP.has(n.name),
);
if (paths.length === 0) return program;
const path = pick(paths);
const node = getAt(program, path);
if (node.kind !== 'instr') return program;
const newName = SWAP_MAP.get(node.name)!;
const mutated = replaceAt(program, path, { kind: 'instr', name: newName });
return isValid(mutated) ? mutated : program;
}
/**
* Instruction deletion: remove a random non-EXEC.IF instruction and validate.
*
* Rejects mutations that produce invalid stack state (validation via transpiler).
*
* @returns Mutated program, or the original if deletion produces an invalid program.
*/
export function deleteInstruction(program: Push3Program): Push3Program {
// Collect only instr nodes (never EXEC.IF itself — that would orphan its branches).
// int/bool/name/list nodes are deliberately excluded to preserve program structure.
const positions = collectListPositions(
program,
(item) => item.kind === 'instr' && item.name !== 'EXEC.IF',
);
if (positions.length === 0) return program;
const { parentPath, index } = pick(positions);
const mutated = deleteAt(program, parentPath, index);
return isValid(mutated) ? mutated : program;
}
/**
* Instruction insertion: insert a stack-neutral sequence (push 0 + POP) at a
* random position within a random list in the program.
*
* The inserted sequence has no net effect on stack depth:
* DYADIC stack before: [..., X]
* After `0`: [..., X, 0]
* After `DYADIC.POP`: [..., X] (neutral)
*
* Rejects insertions that produce invalid stack state.
*
* @returns Mutated program, or the original if the insertion is invalid.
*/
export function insertInstruction(program: Push3Program): Push3Program {
const listPaths = collectListPaths(program);
if (listPaths.length === 0) return program;
const listPath = pick(listPaths);
const listNode = getAt(program, listPath);
if (listNode.kind !== 'list') return program;
// Random insertion index within the chosen list (including append at end)
const insertionIndex = rand(listNode.items.length + 1);
// Stack-neutral pair: push integer 0, then discard it
const neutralPair: Node[] = [
{ kind: 'int', value: 0n },
{ kind: 'instr', name: 'DYADIC.POP' },
];
const mutated = insertAt(program, listPath, insertionIndex, neutralPair);
return isValid(mutated) ? mutated : program;
}
/**
* Crossover: single-point crossover of two programs at instruction boundaries.
*
* Splits `a.items[0..splitA]` with `b.items[splitB..]` and validates the result.
* Both programs must be top-level list nodes.
*
* @returns Combined program, or `a` if either input is not a list or the result
* fails validation.
*/
export function crossover(a: Push3Program, b: Push3Program): Push3Program {
if (a.kind !== 'list') {
const validated = isValid(a) ? a : { kind: 'list', items: [] } as Node;
return validated;
}
if (b.kind !== 'list') return a;
const splitA = rand(a.items.length + 1);
const splitB = rand(b.items.length + 1);
const newItems = [...a.items.slice(0, splitA), ...b.items.slice(splitB)];
const mutated: Node = { kind: 'list', items: newItems };
return isValid(mutated) ? mutated : a;
}
/**
* Apply `rate` random mutations from the four single-program operators.
*
* Each mutation is applied to the result of the previous one.
* Mutations that produce invalid programs are skipped (program unchanged for
* that step), so the returned program is always valid.
*
* @param program Input program.
* @param rate Number of mutations to attempt.
* @returns Mutated program (always valid when given a valid input).
*/
export function mutate(program: Push3Program, rate: number): Push3Program {
let current = program;
for (let i = 0; i < rate; i++) {
switch (rand(4)) {
case 0:
current = mutateConstant(
current,
(rand(2) === 0 ? 1 : -1) * (rand(10) + 1),
);
break;
case 1:
current = swapOperator(current);
break;
case 2:
current = deleteInstruction(current);
break;
case 3:
current = insertInstruction(current);
break;
}
}
return current;
}