81 lines
2.6 KiB
TypeScript
81 lines
2.6 KiB
TypeScript
/**
|
|
* Push3 parser — converts Push3 source text into an AST.
|
|
*
|
|
* Node types:
|
|
* { kind: 'int', value: bigint } — integer literal (1e18 etc.)
|
|
* { kind: 'bool', value: boolean } — TRUE / FALSE
|
|
* { kind: 'instr', name: string } — DYADIC.+, EXEC.IF, etc.
|
|
* { kind: 'name', text: string } — unbound identifier (TAXRATE etc.)
|
|
* { kind: 'list', items: Node[] } — ( ... )
|
|
*/
|
|
|
|
export type Node =
|
|
| { kind: 'int'; value: bigint }
|
|
| { kind: 'bool'; value: boolean }
|
|
| { kind: 'instr'; name: string }
|
|
| { kind: 'name'; text: string }
|
|
| { kind: 'list'; items: Node[] };
|
|
|
|
// Known instruction prefixes / exact names
|
|
const KNOWN_INSTR_PREFIXES = [
|
|
'DYADIC.', 'EXEC.', 'BOOLEAN.', 'CODE.', 'NAME.', 'INDEX.',
|
|
'INTVECTOR.', 'FLOATVECTOR.', 'BOOLVECTOR.', 'GRAPH.',
|
|
];
|
|
|
|
function isInstruction(token: string): boolean {
|
|
for (const pfx of KNOWN_INSTR_PREFIXES) {
|
|
if (token.startsWith(pfx)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function tokenize(src: string): string[] {
|
|
// Strip comments (;; to end of line)
|
|
const noComments = src.replace(/;;[^\n]*/g, ' ');
|
|
// Split on whitespace, treating ( and ) as separate tokens
|
|
const spaced = noComments.replace(/\(/g, ' ( ').replace(/\)/g, ' ) ');
|
|
return spaced.trim().split(/\s+/).filter(t => t.length > 0);
|
|
}
|
|
|
|
function parseTokens(tokens: string[], pos: number): [Node, number] {
|
|
const token = tokens[pos];
|
|
if (token === undefined) throw new Error('Unexpected end of tokens');
|
|
|
|
if (token === '(') {
|
|
// Parse list until matching ')'
|
|
const items: Node[] = [];
|
|
let i = pos + 1;
|
|
while (i < tokens.length && tokens[i] !== ')') {
|
|
const [node, next] = parseTokens(tokens, i);
|
|
items.push(node);
|
|
i = next;
|
|
}
|
|
if (tokens[i] !== ')') throw new Error('Unmatched (');
|
|
return [{ kind: 'list', items }, i + 1];
|
|
}
|
|
|
|
if (token === 'TRUE') return [{ kind: 'bool', value: true }, pos + 1];
|
|
if (token === 'FALSE') return [{ kind: 'bool', value: false }, pos + 1];
|
|
|
|
// Integer literal — may be large (BigInt)
|
|
if (/^-?\d+$/.test(token)) {
|
|
return [{ kind: 'int', value: BigInt(token) }, pos + 1];
|
|
}
|
|
|
|
if (isInstruction(token)) {
|
|
return [{ kind: 'instr', name: token }, pos + 1];
|
|
}
|
|
|
|
// Otherwise: unbound name (e.g. TAXRATE, STAKED, DELTAS, EFFIDX)
|
|
return [{ kind: 'name', text: token }, pos + 1];
|
|
}
|
|
|
|
export function parse(src: string): Node {
|
|
const tokens = tokenize(src);
|
|
if (tokens.length === 0) throw new Error('Empty program');
|
|
const [node, consumed] = parseTokens(tokens, 0);
|
|
if (consumed !== tokens.length) {
|
|
throw new Error(`Unexpected tokens after position ${consumed}: ${tokens[consumed]}`);
|
|
}
|
|
return node;
|
|
}
|