harb/tools/push3-transpiler/src/parser.ts

81 lines
2.6 KiB
TypeScript

/**
* Push3 parser — converts Push3 source text into an AST.
*
* Node types:
* { kind: 'int', value: bigint } — integer literal (1e18 etc.)
* { kind: 'bool', value: boolean } — TRUE / FALSE
* { kind: 'instr', name: string } — DYADIC.+, EXEC.IF, etc.
* { kind: 'name', text: string } — unbound identifier (TAXRATE etc.)
* { kind: 'list', items: Node[] } — ( ... )
*/
export type Node =
| { kind: 'int'; value: bigint }
| { kind: 'bool'; value: boolean }
| { kind: 'instr'; name: string }
| { kind: 'name'; text: string }
| { kind: 'list'; items: Node[] };
// Known instruction prefixes / exact names
const KNOWN_INSTR_PREFIXES = [
'DYADIC.', 'EXEC.', 'BOOLEAN.', 'CODE.', 'NAME.', 'INDEX.',
'INTVECTOR.', 'FLOATVECTOR.', 'BOOLVECTOR.', 'GRAPH.',
];
function isInstruction(token: string): boolean {
for (const pfx of KNOWN_INSTR_PREFIXES) {
if (token.startsWith(pfx)) return true;
}
return false;
}
function tokenize(src: string): string[] {
// Strip comments (;; to end of line)
const noComments = src.replace(/;;[^\n]*/g, ' ');
// Split on whitespace, treating ( and ) as separate tokens
const spaced = noComments.replace(/\(/g, ' ( ').replace(/\)/g, ' ) ');
return spaced.trim().split(/\s+/).filter(t => t.length > 0);
}
function parseTokens(tokens: string[], pos: number): [Node, number] {
const token = tokens[pos];
if (token === undefined) throw new Error('Unexpected end of tokens');
if (token === '(') {
// Parse list until matching ')'
const items: Node[] = [];
let i = pos + 1;
while (i < tokens.length && tokens[i] !== ')') {
const [node, next] = parseTokens(tokens, i);
items.push(node);
i = next;
}
if (tokens[i] !== ')') throw new Error('Unmatched (');
return [{ kind: 'list', items }, i + 1];
}
if (token === 'TRUE') return [{ kind: 'bool', value: true }, pos + 1];
if (token === 'FALSE') return [{ kind: 'bool', value: false }, pos + 1];
// Integer literal — may be large (BigInt)
if (/^-?\d+$/.test(token)) {
return [{ kind: 'int', value: BigInt(token) }, pos + 1];
}
if (isInstruction(token)) {
return [{ kind: 'instr', name: token }, pos + 1];
}
// Otherwise: unbound name (e.g. TAXRATE, STAKED, DELTAS, EFFIDX)
return [{ kind: 'name', text: token }, pos + 1];
}
export function parse(src: string): Node {
const tokens = tokenize(src);
if (tokens.length === 0) throw new Error('Empty program');
const [node, consumed] = parseTokens(tokens, 0);
if (consumed !== tokens.length) {
throw new Error(`Unexpected tokens after position ${consumed}: ${tokens[consumed]}`);
}
return node;
}