Divyansh Prakash, September 2023
NOTE: Please read the previous post to understand the context of this post.
Now that we have learnt how to write stackless recursive functions using trampoline
, and we know that JS's eval
is not stack-safe, it's time to write our own stackless eval
!
The idea is that if the language's evaluator itself is written in a stackless manner, then the implemented language will be free from stack overflow errors.
We will implement a very simple language in which we can define the factorial function and call it.
Here's how our code would look:
["do",
["def", "fact",
["fn", ["x"],
["if", ["<", "x", 2],
1,
["*", "x", ["fact", ["-", "x", 1]]]]]],
["fact", 5]]
The language has the following characteristics:
- syntax represented as JSON
- builtin types:
- int (arbitrary precision integers)
- string (identifiers)
- fn (first class functions)
- special forms:
do
,def
,fn
,if
- builtin fns:
<
,*
,-
// Environment
// ===========
// builtin functions
let TopEnv = {
'<': (x, y) => x < y,
'*': (x, y) => x * y,
'-': (x, y) => x - y,
};
// returns value bound to identifier
// in current scope, else in ancestor scopes
// throws error if not found
function lookup(env, identifier) {
if (env.hasOwnProperty(identifier))
return env[identifier];
else if (env.hasOwnProperty('__parent__'))
return lookup(env.__parent__, identifier);
else
throw new Error(`${identifier} is not defined!`);
}
// Evaluator
// =========
function eval(expr, env) {
// env defaults to TopEnv
env = env || TopEnv;
if (typeof expr === 'number') {
// numbers are parsed as BigInt
return BigInt(expr);
} else if (typeof expr === 'string') {
// strings are treated as identifiers (variable names)
return lookup(env, expr);
} else if (Array.isArray(expr)) {
// arrays are treated as "forms"
// with the first element as the operator
// and the remaining elements as the operands
let [op, ...args] = expr;
if (op === 'fn') {
// user defined function
let [params, body] = args;
return {type: 'function', params, body, env};
} else if (op === 'do') {
// runs several expressions sequentially
// value of the last expression is returned
return args.map((subexpr) => eval(subexpr, env)).at(-1);
} else if (op === 'def') {
// defines a variable in the current env
// and sets it to the given value
let [identifier, val_expr] = args;
env[identifier] = eval(val_expr, env);
} else if (op === 'if') {
// if / else
let [cond_expr, then_expr, else_expr] = args;
if (eval(cond_expr, env))
return eval(then_expr, env);
else
return eval(else_expr, env);
} else {
// function call
let f = eval(op, env);
let arg_vals = args.map((subexpr) => eval(subexpr, env));
return apply(f, arg_vals);
}
} else throw new Error(`invalid expression: ${expr}`);
}
function apply(f, args) {
if (typeof f === 'function')
// builtin function
return f(...args);
else if (f.type === 'function') {
// user defined function
let {params, body, env} = f;
let newEnv = {__parent__: env};
for(let i = 0; i < params.length; i++)
newEnv[params[i]] = args[i];
return eval(body, newEnv);
} else
throw new Error(`${f} is not a function!`);
}
The code should be fairly self-explanatory.
Important things to note:
lookup
is self-tail-recursive and can be written with a loop insteadeval
is highly self-recursiveeval
/apply
are mutually tail-recursive
We can test out our factorial code at the top as follows:
let code =
["do",
["def", "fact",
["fn", ["x"],
["if", ["<", "x", 2],
1,
["*", "x", ["fact", ["-", "x", 1]]]]]],
["fact", 5]];
eval(code); // => 120n
Changing 5
to 20000
gives a stack overflow error as expected.
We now have our work cut out for us:
- TCO
lookup
- write
t_eval
andt_apply
- call
t_eval
withtrampoline
to get stacklesseval
Here's the code:
// Trampoline
// ==========
class Thunk {
constructor(f) {
this.f = f;
}
}
function trampoline(f, ...args) {
let t = new Thunk(() => f(...args));
while(t instanceof Thunk)
t = t.f();
return t;
}
// Environment
// ===========
// builtin functions
let TopEnv = {
'<': (x, y) => x < y,
'*': (x, y) => x * y,
'-': (x, y) => x - y,
};
// TCOd lookup function
// returns value bound to identifier
// in current scope, else in ancestor scopes
// throws error if not found
function lookup(env, identifier) {
while(env) {
if (env.hasOwnProperty(identifier))
return env[identifier];
else
env = env.__parent__;
}
throw new Error(`${identifier} is not defined!`);
}
// Util
// ====
// stackless map
// with_mapped is the callback
// t_mapper is a stackless function:
// (with_fx, x) => thunk
function t_map(with_mapped, t_mapper, arr) {
if (arr.length == 0)
return new Thunk(() => with_mapped([]));
return new Thunk(() => {
let [x, ...more] = arr;
return t_mapper(
(fx) => t_map(
(mapped_more) => new Thunk(() =>
with_mapped([fx].concat(mapped_more))),
t_mapper,
more
),
x)});
}
// Evaluator
// =========
// stackless eval
// with_evaled is the callback
function t_eval(with_evaled, expr, env) {
// env defaults to TopEnv
env = env || TopEnv;
if (typeof expr === 'number') {
// numbers are parsed as BigInt
return new Thunk(() => with_evaled(BigInt(expr)));
} else if (typeof expr === 'string') {
// strings are treated as identifiers (variable names)
return new Thunk(() => with_evaled(lookup(env, expr)));
} else if (Array.isArray(expr)) {
// arrays are treated as "forms"
// with the first element as the operator
// and the remaining elements as the operands
let [op, ...args] = expr;
if (op === 'fn') {
// user defined function
let [params, body] = args;
return new Thunk(() =>
with_evaled(
{type: 'function', params, body, env}));
} else if (op === 'do') {
// runs several expressions sequentially,
// then gives the value of the last expression
// to the callback
function with_subexpr_vals(subexpr_vals) {
return new Thunk(() =>
with_evaled(subexpr_vals.at(-1)));
}
function t_mapper(with_subexpr_val, subexpr) {
return new Thunk(() =>
t_eval(with_subexpr_val, subexpr, env));
}
return new Thunk(() =>
t_map(with_subexpr_vals, t_mapper, args));
} else if (op === 'def') {
// defines a variable in the current env
// and sets it to the given value
let [identifier, val_expr] = args;
function with_val(val) {
return new Thunk(() => {
env[identifier] = val;
return with_evaled();
});
}
return new Thunk(() => t_eval(with_val, val_expr, env));
} else if (op === 'if') {
// if / else
let [cond_expr, then_expr, else_expr] = args;
function with_cond_val(cond_val) {
return new Thunk(() =>
t_eval(with_evaled,
cond_val? then_expr : else_expr,
env));
}
return new Thunk(() =>
t_eval(with_cond_val, cond_expr, env));
} else {
// function call
function with_f(f) {
function with_arg_vals(arg_vals) {
return new Thunk(() =>
t_apply(with_evaled, f, arg_vals));
}
function t_mapper(with_arg_val, arg_expr) {
return new Thunk(() =>
t_eval(with_arg_val, arg_expr, env));
}
return new Thunk(() =>
t_map(with_arg_vals, t_mapper, args));
}
return new Thunk(() => t_eval(with_f, op, env));
}
} else throw new Error(`invalid expression: ${expr}`);
}
function t_apply(with_res, f, args) {
if (typeof f === 'function')
// builtin function
return new Thunk(() => with_res(f(...args)));
else if (f.type === 'function') {
// user defined function
let {params, body, env} = f;
let newEnv = {__parent__: env};
for(let i = 0; i < params.length; i++)
newEnv[params[i]] = args[i];
return new Thunk(() => t_eval(with_res, body, newEnv));
} else
throw new Error(`${f} is not a function!`);
}
// trampolined stackless eval
function eval(expr, env) {
let id = (x) => x;
return trampoline(t_eval, id, expr, env);
}
Let's take it for a spin!
let code =
["do",
["def", "fact",
["fn", ["x"],
["if", ["<", "x", 2],
1,
["*", "x", ["fact", ["-", "x", 1]]]]]],
["fact", 20000]];
eval(code); // => 181920632...0000000000n
et voila! 🎉
Now that our VM itself is stackless, it is not constrained by our machine's stack space.
So now in this language we can write our programs naturally and recursively, without fear of some weird error that's an artefact of physical machine architecture.
Languages such as Scheme have had this feature for ages.
I hope major implementations of JS and Java will also one day be stackless. They are established platforms, and it would be great if they could be better hosts to guest languages like Clojure, and people could finally use concat
without fear.
In a future post, we will see how CPS transforming an evaluator gives mind-bending abilities to the implemented language.
You can play with the interpreter in your browser here.
An async, non-browser-freezing interpreter is also available with trampoline
implemented differently.
Writing a stackless QuickJS fork is left is as an excercise for the reader.
Secret: a subset of JS is stackless
- u/pauseless for pointing out that Go sets hard limits on stack size, even though the stack is dynamic and not machine-dependent. An earlier viersion of this article incorrectly claimed that Go doesn't stack overflow.
- u/moon-chilled for pointing me to Stopify!