harb/tools/push3-evolution/mutate.ts
openhands f8b765a9f8 fix: feat: Push3 evolution — crossover operator (#639)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 05:54:48 +00:00

403 lines
13 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Push3 mutation operators for optimizer evolution.
*
* Implements five mutation operators on Push3 AST programs plus one meta-operator:
* 1. mutateConstant — shift a random integer literal by ±δ
* 2. swapOperator — replace an arithmetic opcode with its pair (ADD↔SUB, MUL↔DIV, GT↔LT)
* 3. deleteInstruction — remove a random instruction and validate
* 4. insertInstruction — insert a stack-neutral sequence at a random position
* 5. subtreeCrossover — sub-expression swap: graft a random (…) block from one parent into the other
* crossover — delegates to subtreeCrossover; falls back to single-point flat crossover
* (meta) mutate — apply N random mutations from operators 14
*
* All mutations validate the output via the transpiler's stack simulation.
* Invalid mutations silently return the original program.
*/
import { Node } from '../push3-transpiler/src/parser';
import { transpile } from '../push3-transpiler/src/transpiler';
export type Push3Program = Node;
// ---- Swap map: arithmetic and comparison operator pairs ----
const SWAP_PAIRS: Array<[string, string]> = [
['DYADIC.+', 'DYADIC.-'],
['DYADIC.-', 'DYADIC.+'],
['DYADIC.*', 'DYADIC./'],
['DYADIC./', 'DYADIC.*'],
['DYADIC.>', 'DYADIC.<'],
['DYADIC.<', 'DYADIC.>'],
['DYADIC.>=', 'DYADIC.<='],
['DYADIC.<=', 'DYADIC.>='],
];
const SWAP_MAP = new Map<string, string>(SWAP_PAIRS);
// ---- Random helpers ----
function rand(n: number): number {
return Math.floor(Math.random() * n);
}
function pick<T>(arr: T[]): T {
return arr[rand(arr.length)];
}
// ---- Immutable tree navigation and update ----
/**
* Navigate to the node at the given path (sequence of list indices from root).
*/
export function getAt(root: Node, path: number[]): Node {
let cur = root;
for (const idx of path) {
if (cur.kind !== 'list') throw new Error('getAt: not a list at path step');
cur = cur.items[idx];
}
return cur;
}
/**
* Return a new tree with the node at `path` replaced by `newNode`.
* Structurally shares all unchanged subtrees.
*/
export function replaceAt(root: Node, path: number[], newNode: Node): Node {
if (path.length === 0) return newNode;
if (root.kind !== 'list') throw new Error('replaceAt: not a list at path step');
const [head, ...rest] = path;
const newItems = root.items.map((item, i) =>
i === head ? replaceAt(item, rest, newNode) : item,
);
return { kind: 'list', items: newItems };
}
/**
* Return a new tree with the item at `index` removed from the list at `parentPath`.
*/
function deleteAt(root: Node, parentPath: number[], index: number): Node {
const parent = getAt(root, parentPath);
if (parent.kind !== 'list') throw new Error('deleteAt: parent is not a list');
const newParent: Node = {
kind: 'list',
items: parent.items.filter((_, i) => i !== index),
};
return replaceAt(root, parentPath, newParent);
}
/**
* Return a new tree with `inserted` spliced in at `index` within the list at `parentPath`.
*/
function insertAt(
root: Node,
parentPath: number[],
index: number,
inserted: Node[],
): Node {
const parent = getAt(root, parentPath);
if (parent.kind !== 'list') throw new Error('insertAt: parent is not a list');
const items = [...parent.items];
items.splice(index, 0, ...inserted);
const newParent: Node = { kind: 'list', items };
return replaceAt(root, parentPath, newParent);
}
// ---- Node collectors ----
/**
* Collect the path (root-to-node index sequence) of every node matching `test`.
*/
function collectPaths(
root: Node,
test: (n: Node) => boolean,
prefix: number[] = [],
): number[][] {
const results: number[][] = [];
if (test(root)) results.push(prefix);
if (root.kind === 'list') {
for (let i = 0; i < root.items.length; i++) {
results.push(...collectPaths(root.items[i], test, [...prefix, i]));
}
}
return results;
}
/**
* Collect {parentPath, index} for list items matching `test`.
* `parentPath` leads to the containing list; `index` is the item's position in it.
*/
function collectListPositions(
root: Node,
test: (item: Node, parentItems: Node[], index: number) => boolean,
currentPath: number[] = [],
): Array<{ parentPath: number[]; index: number }> {
const results: Array<{ parentPath: number[]; index: number }> = [];
if (root.kind === 'list') {
for (let i = 0; i < root.items.length; i++) {
if (test(root.items[i], root.items, i)) {
results.push({ parentPath: currentPath, index: i });
}
results.push(
...collectListPositions(root.items[i], test, [...currentPath, i]),
);
}
}
return results;
}
/**
* Collect the paths to every list node (including the root list itself).
*/
function collectListPaths(root: Node, prefix: number[] = []): number[][] {
const results: number[][] = [];
if (root.kind === 'list') {
results.push(prefix);
for (let i = 0; i < root.items.length; i++) {
results.push(...collectListPaths(root.items[i], [...prefix, i]));
}
}
return results;
}
// ---- Validation ----
/**
* Return true if the program is structurally and stack-semantically valid.
* Uses the transpiler's symbolic stack simulation for validation.
*/
export function isValid(program: Push3Program): boolean {
try {
transpile(program);
return true;
} catch (e) {
// Re-throw non-Error values (e.g. thrown primitives) to surface real bugs.
// All transpiler errors are proper Error instances (stack underflow, unknown
// instruction, etc.), so only those are caught and treated as invalid programs.
if (!(e instanceof Error)) throw e;
return false;
}
}
// ---- Serialiser (useful for testing / deduplication) ----
/**
* Serialise a Push3Program back to source text (round-trips through parse()).
*/
export function serialize(program: Push3Program): string {
switch (program.kind) {
case 'int':
return program.value.toString();
case 'bool':
return program.value ? 'TRUE' : 'FALSE';
case 'instr':
return program.name;
case 'name':
return program.text;
case 'list':
return `( ${program.items.map(serialize).join(' ')} )`;
}
}
// ---- Mutation operators ----
/**
* Constant perturbation: shift a randomly chosen integer literal by `delta`.
*
* @param program Source program (not mutated in-place).
* @param delta Amount to add to the chosen constant (may be negative).
* @returns Mutated program, or the original if no int nodes exist.
*/
export function mutateConstant(
program: Push3Program,
delta: number,
): Push3Program {
const paths = collectPaths(program, (n) => n.kind === 'int');
if (paths.length === 0) return program;
const path = pick(paths);
const node = getAt(program, path);
if (node.kind !== 'int') return program;
const newValue = node.value + BigInt(delta);
const clampedValue = newValue < 0n ? 0n : newValue;
const newNode: Node = { kind: 'int', value: clampedValue };
const mutated = replaceAt(program, path, newNode);
return isValid(mutated) ? mutated : program;
}
/**
* Operator swap: replace a randomly chosen arithmetic or comparison opcode with
* its pair (ADD↔SUB, MUL↔DIV, GT↔LT, GTE↔LTE).
*
* Stack depth is preserved because all swap pairs have identical stack effects.
*
* @returns Mutated program, or the original if no swappable ops exist.
*/
export function swapOperator(program: Push3Program): Push3Program {
const paths = collectPaths(
program,
(n) => n.kind === 'instr' && SWAP_MAP.has(n.name),
);
if (paths.length === 0) return program;
const path = pick(paths);
const node = getAt(program, path);
if (node.kind !== 'instr') return program;
const newName = SWAP_MAP.get(node.name)!;
const mutated = replaceAt(program, path, { kind: 'instr', name: newName });
return isValid(mutated) ? mutated : program;
}
/**
* Instruction deletion: remove a random non-EXEC.IF instruction and validate.
*
* Rejects mutations that produce invalid stack state (validation via transpiler).
*
* @returns Mutated program, or the original if deletion produces an invalid program.
*/
export function deleteInstruction(program: Push3Program): Push3Program {
// Collect only instr nodes (never EXEC.IF itself — that would orphan its branches).
// int/bool/name/list nodes are deliberately excluded to preserve program structure.
const positions = collectListPositions(
program,
(item) => item.kind === 'instr' && item.name !== 'EXEC.IF',
);
if (positions.length === 0) return program;
const { parentPath, index } = pick(positions);
const mutated = deleteAt(program, parentPath, index);
return isValid(mutated) ? mutated : program;
}
/**
* Instruction insertion: insert a stack-neutral sequence (push 0 + POP) at a
* random position within a random list in the program.
*
* The inserted sequence has no net effect on stack depth:
* DYADIC stack before: [..., X]
* After `0`: [..., X, 0]
* After `DYADIC.POP`: [..., X] (neutral)
*
* Rejects insertions that produce invalid stack state.
*
* @returns Mutated program, or the original if the insertion is invalid.
*/
export function insertInstruction(program: Push3Program): Push3Program {
const listPaths = collectListPaths(program);
if (listPaths.length === 0) return program;
const listPath = pick(listPaths);
const listNode = getAt(program, listPath);
if (listNode.kind !== 'list') return program;
// Random insertion index within the chosen list (including append at end)
const insertionIndex = rand(listNode.items.length + 1);
// Stack-neutral pair: push integer 0, then discard it
const neutralPair: Node[] = [
{ kind: 'int', value: 0n },
{ kind: 'instr', name: 'DYADIC.POP' },
];
const mutated = insertAt(program, listPath, insertionIndex, neutralPair);
return isValid(mutated) ? mutated : program;
}
/**
* Sub-expression swap crossover: graft a random list sub-tree from `b` into `a`.
*
* Push3 `(…)` blocks are self-contained sub-expressions and form natural crossover
* points. This operator:
* 1. Collects all non-root list paths in both programs (the nested `(…)` blocks).
* 2. Randomly pairs one path from `a` with one from `b`.
* 3. Replaces `a`'s sub-tree at that path with `b`'s sub-tree.
* 4. Validates the result via the transpiler; retries up to MAX_ATTEMPTS times.
* 5. Falls back to flat single-point crossover if no valid sub-tree swap is found
* (e.g. when neither parent has nested list nodes).
*
* @param a First parent (receives the grafted sub-tree).
* @param b Second parent (sub-tree donor).
* @returns Valid child program, or `a` if no valid crossing is found.
*/
export function subtreeCrossover(a: Push3Program, b: Push3Program): Push3Program {
if (a.kind !== 'list' || b.kind !== 'list') return a;
// Non-root list paths: the nested (…) blocks that are natural crossover points.
// Filtering out path=[] ensures we never try to wholesale replace the root program.
const listPathsA = collectListPaths(a).filter((p) => p.length > 0);
const listPathsB = collectListPaths(b).filter((p) => p.length > 0);
if (listPathsA.length > 0 && listPathsB.length > 0) {
const MAX_ATTEMPTS = 10;
for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
const pathA = pick(listPathsA);
const pathB = pick(listPathsB);
const subB = getAt(b, pathB);
const child = replaceAt(a, pathA, subB);
if (isValid(child)) return child;
}
}
// Flat single-point fallback: combine a.items[0..splitA] + b.items[splitB..].
const splitA = rand(a.items.length + 1);
const splitB = rand(b.items.length + 1);
const newItems = [...a.items.slice(0, splitA), ...b.items.slice(splitB)];
const fallback: Node = { kind: 'list', items: newItems };
return isValid(fallback) ? fallback : a;
}
/**
* Crossover: combine two Push3 programs via sub-expression swap (primary) or
* flat single-point crossover (fallback when no valid nested-list swap is found).
*
* Delegates to subtreeCrossover, which swaps random `(…)` blocks between parents.
* Both programs must be top-level list nodes.
*
* @returns Combined program, or `a` if either input is not a list or the result
* fails validation.
*/
export function crossover(a: Push3Program, b: Push3Program): Push3Program {
if (a.kind !== 'list') {
const validated = isValid(a) ? a : ({ kind: 'list', items: [] } as Node);
return validated;
}
if (b.kind !== 'list') return a;
return subtreeCrossover(a, b);
}
/**
* Apply `rate` random mutations from the four single-program operators.
*
* Each mutation is applied to the result of the previous one.
* Mutations that produce invalid programs are skipped (program unchanged for
* that step), so the returned program is always valid.
*
* @param program Input program.
* @param rate Number of mutations to attempt.
* @returns Mutated program (always valid when given a valid input).
*/
export function mutate(program: Push3Program, rate: number): Push3Program {
let current = program;
for (let i = 0; i < rate; i++) {
switch (rand(4)) {
case 0:
current = mutateConstant(
current,
(rand(2) === 0 ? 1 : -1) * (rand(10) + 1),
);
break;
case 1:
current = swapOperator(current);
break;
case 2:
current = deleteInstruction(current);
break;
case 3:
current = insertInstruction(current);
break;
}
}
return current;
}