tldr; I'm working on a AOT-compiled Javascript implementation called jsc.
Many dynamically typed programming languages have implementations that compile to native binaries:
- Python: Cython
- Common Lisp: SBCL
- Scheme: Chicken Scheme
The benefits of compiling dynamically typed languages are similar to those of compiling statically typed languages:
- Simplified deployment via a single binary
- Simplified foreign-function interfaces
- Predictable performance compared to JIT compiling interpreters
- Performance gains compared to non-JIT compiling interpreters
I (re)discovered a common technique for compiling dynamic languages while developing BSDScheme, an interpreter and compiler for Scheme. In this technique, you use core parts of the runtime code as a library that is imported and referenced by compiled code.
You save time building object-memory representations, memory management, operations, interacting with existing libraries, etc. when an interpreter already exists. The runtime as a library (plus existing parser frontends) allows you to focus solely on code generation of control flow.
The first pass
I wrote the initial version of jsc in Rust using Dave Herman's esprit parser (supports a subset of ES6 that includes all of ES5).
The interesting parts of the runtime are taken care of by V8, e.g.:
V8::String
- a Javascript string objectV8::String::NewFromUtf8(isolate, "hello world!")
- C++ string to Javascript string object
V8::Number
- a Javascript number objectV8::Number::New(isolate, 10)
- C++ double to Javascript number object
- Heap allocations
- Calling convention
And so on.
An example
This first version of jsc could take the following Javascript:
function fib(n, a, b) {
if (n == 0) {
return a;
}
if (n == 1) {
return b;
}
return fib(n - 1, b, a + b);
}
function main() {
console.log(fib(50, 0, 1));
}
And produce the following C++:
#include <string>
#include <iostream>
#include <node.h>
using v8::Array;
using v8::Boolean;
using v8::Context;
using v8::Exception;
using v8::Function;
using v8::FunctionTemplate;
using v8::FunctionCallbackInfo;
using v8::Isolate;
using v8::Local;
using v8::Null;
using v8::Number;
using v8::Object;
using v8::String;
using v8::False;
using v8::True;
using v8::Value;
void fib_0(const FunctionCallbackInfo<Value>& args) {
Isolate* isolate = args.GetIsolate();
Local<Value> n_1 = args[0];
Local<Value> a_2 = args[1];
Local<Value> b_3 = args[2];
tail_recurse_4:
Local<Context> ctx_5 = isolate->GetCurrentContext();
Local<Object> global_6 = ctx_5->Global();
Local<Function> Boolean_7 = Local<Function>::Cast(global_6->Get(String::NewFromUtf8(isolate, "Boolean")));
String::Utf8Value utf8value_tmp_8(n_1);
std::string string_tmp_9(*utf8value_tmp_8);
String::Utf8Value utf8value_tmp_10(Number::New(isolate, 0));
std::string string_tmp_11(*utf8value_tmp_10);
Local<Value> argv_12[] = { (n_1->IsBoolean() || Number::New(isolate, 0)->IsBoolean()) ? Boolean::New(isolate, n_1->ToBoolean(isolate)->Value() == Number::New(isolate, 0)->ToBoolean(isolate)->Value()) : ((n_1->IsNumber() || Number::New(isolate, 0)->IsNumber()) ? Boolean::New(isolate, n_1->ToNumber(isolate)->Value() == Number::New(isolate, 0)->ToNumber(isolate)->Value()) : ((n_1->IsString() || Number::New(isolate, 0)->IsString()) ? Boolean::New(isolate, string_tmp_9 == string_tmp_11) : (False(isolate)))) };
Local<Value> result_13 = Boolean_7->Call(Null(isolate), 1, argv_12);
if (result_13->ToBoolean()->Value()) {
// return a;
args.GetReturnValue().Set(a_2);
return;
}
Local<Context> ctx_14 = isolate->GetCurrentContext();
Local<Object> global_15 = ctx_14->Global();
Local<Function> Boolean_16 = Local<Function>::Cast(global_15->Get(String::NewFromUtf8(isolate, "Boolean")));
String::Utf8Value utf8value_tmp_17(n_1);
std::string string_tmp_18(*utf8value_tmp_17);
String::Utf8Value utf8value_tmp_19(Number::New(isolate, 1));
std::string string_tmp_20(*utf8value_tmp_19);
Local<Value> argv_21[] = { (n_1->IsBoolean() || Number::New(isolate, 1)->IsBoolean()) ? Boolean::New(isolate, n_1->ToBoolean(isolate)->Value() == Number::New(isolate, 1)->ToBoolean(isolate)->Value()) : ((n_1->IsNumber() || Number::New(isolate, 1)->IsNumber()) ? Boolean::New(isolate, n_1->ToNumber(isolate)->Value() == Number::New(isolate, 1)->ToNumber(isolate)->Value()) : ((n_1->IsString() || Number::New(isolate, 1)->IsString()) ? Boolean::New(isolate, string_tmp_18 == string_tmp_20) : (False(isolate)))) };
Local<Value> result_22 = Boolean_16->Call(Null(isolate), 1, argv_21);
if (result_22->ToBoolean()->Value()) {
// return b;
args.GetReturnValue().Set(b_3);
return;
}
// return fib(n - 1, b, a + b);
Local<Value> arg_23 = (n_1->IsNumber() || Number::New(isolate, 1)->IsNumber()) ? (Number::New(isolate, n_1->ToNumber(isolate)->Value() - Number::New(isolate, 1)->ToNumber(isolate)->Value())) : Local<Number>::Cast(Null(isolate));
Local<Value> arg_24 = b_3;
Local<Value> arg_25 = (a_2->IsString() || b_3->IsString()) ? Local<Value>::Cast(String::Concat(a_2->ToString(), b_3->ToString())) : Local<Value>::Cast((a_2->IsNumber() || b_3->IsNumber()) ? (Number::New(isolate, a_2->ToNumber(isolate)->Value() + b_3->ToNumber(isolate)->Value())) : Local<Number>::Cast(Null(isolate)));
Local<FunctionTemplate> ftpl_27 = FunctionTemplate::New(isolate, fib_0);
Local<Function> fn_26 = ftpl_27->GetFunction();
fn_26->SetName(String::NewFromUtf8(isolate, "fib_0"));
n_1 = arg_23;
a_2 = arg_24;
b_3 = arg_25;
goto tail_recurse_4;
}
void jsc_main(const FunctionCallbackInfo<Value>& args) {
Isolate* isolate = args.GetIsolate();
tail_recurse_5:
// console.log(fib(50, 0, 1))
Local<Value> dot_parent_7 = isolate->GetCurrentContext()->Global()->Get(String::NewFromUtf8(isolate, "console"));
Local<String> property_8 = String::NewFromUtf8(isolate, "log");
while (dot_parent_7->IsObject() && !dot_parent_7.As<Object>()->HasOwnProperty(isolate->GetCurrentContext(), property_8).ToChecked()) {
dot_parent_7 = dot_parent_7.As<Object>()->GetPrototype();
}
Local<Value> dot_result_6 = dot_parent_7.As<Object>()->Get(isolate->GetCurrentContext(), property_8).ToLocalChecked();
Local<Value> arg_9 = Number::New(isolate, 50);
Local<Value> arg_10 = Number::New(isolate, 0);
Local<Value> arg_11 = Number::New(isolate, 1);
Local<FunctionTemplate> ftpl_13 = FunctionTemplate::New(isolate, fib_0);
Local<Function> fn_12 = ftpl_13->GetFunction();
fn_12->SetName(String::NewFromUtf8(isolate, "fib_0"));
Local<Value> argv_14[] = { arg_9, arg_10, arg_11 };
Local<Value> result_15 = fn_12->Call(Null(isolate), 3, argv_14);
Local<Value> arg_16 = result_15;
Local<Function> fn_17 = Local<Function>::Cast(dot_result_6);
Local<Value> argv_18[] = { arg_16 };
Local<Value> result_19 = fn_17->Call(dot_parent_7, 1, argv_18);
result_19;
}
void Init(Local<Object> exports) {
NODE_SET_METHOD(exports, "jsc_main", jsc_main);
}
NODE_MODULE(NODE_GYP_MODULE_NAME, Init)
This output gets compiled (by jsc) as a Node addon using node-gyp.
The compiled addon is loaded by a single-line Javascript file generated by jsc:
$ rm -rf build
$ jsc fib.js
$ cat build/fib.js
require("build/Release/fib.node").jsc_main()
$ node build/fib.js
12586269025
Analysis
The code was a mess of bad formatting, unnecessary locals, inefficient basic operations (e.g. huge, often unnecessary Boolean conversions), and so on. The unnecessary locals was partially a by-product of single-pass code generation. And the unnecessary conversions was partly due to ignoring types (even types of literals that you don't need Typescript/Flow to provide).
After I got this proof-of-concept working for basic examples, I wanted to rewrite it around destination-driven code generation, a technique by Kent Dybvig used in V8's baseline compiler. And after a few weeks not getting far in a refactor in Rust, I rewrote the compiler in Typescript.
The second pass
Written in Typescript and using the Typescript compiler
API,
this second iteration was built to do destination-driven code
generation and leaf type propagation. Destination-driven code
generation allows a single-pass code generator to reduce redundant
reassignments. And leaf type propagation allows simple, obvious
optimizations such as just calling V8::Boolean::IsTrue()
on a statically-known boolean rather than calling
V8::Value::Equals()
.
Example
Given the same fibonacci Javascript program from before, this iteration produces the following C++:
#include "lib.cc"
void tco_fib(const FunctionCallbackInfo<Value>& _args) {
Isolate* isolate = _args.GetIsolate();
std::vector<Local<Value>> args(_args.Length());;
for (int i = 0; i < _args.Length(); i++) args[i] = _args[i];
tail_recurse_0:
Local<Number> sym_rhs_4 = Number::New(isolate, 0);
Local<Boolean> sym_anon_2 = args[0]->StrictEquals(sym_rhs_4) ? True(isolate) : False(isolate);
if (sym_anon_2->IsTrue()) {
_args.GetReturnValue().Set(args[1]);
return;
}
Local<Number> sym_rhs_11 = Number::New(isolate, 1);
Local<Boolean> sym_anon_9 = args[0]->StrictEquals(sym_rhs_11) ? True(isolate) : False(isolate);
if (sym_anon_9->IsTrue()) {
_args.GetReturnValue().Set(args[2]);
return;
}
Local<Number> sym_rhs_19 = Number::New(isolate, 1);
Local<Value> sym_arg_17 = genericMinus(isolate, args[0], sym_rhs_19);
Local<Value> sym_arg_21 = genericPlus(isolate, args[1], args[2]);
args[0] = sym_arg_17;
args[1] = args[2];
args[2] = sym_arg_21;
goto tail_recurse_0;
return;
}
void jsc_main(const FunctionCallbackInfo<Value>& _args) {
Isolate* isolate = _args.GetIsolate();
std::vector<Local<Value>> args(_args.Length());;
for (int i = 0; i < _args.Length(); i++) args[i] = _args[i];
tail_recurse_1:
Local<Number> sym_arg_29 = Number::New(isolate, 100);
Local<Number> sym_arg_30 = Number::New(isolate, 0);
Local<Number> sym_arg_31 = Number::New(isolate, 1);
Local<Value> sym_args_32[] = { sym_arg_29, sym_arg_30, sym_arg_31 };
Local<Function> sym_fn_33 = FunctionTemplate::New(isolate, tco_fib)->GetFunction();
sym_fn_33->SetName(String::NewFromUtf8(isolate, "tco_fib"));
Local<Value> sym_arg_28 = sym_fn_33->Call(sym_fn_33, 3, sym_args_32);
Local<Value> sym_args_34[] = { sym_arg_28 };
Local<Value> sym_parent_37 = isolate->GetCurrentContext()->Global()->Get(String::NewFromUtf8(isolate, "console"));
Local<Value> sym_anon_36 = sym_parent_37.As<Object>()->Get(String::NewFromUtf8(isolate, "log"));
Local<Function> sym_fn_35 = Local<Function>::Cast(sym_anon_36);
Local<Value> sym_anon_27 = sym_fn_35->Call(sym_fn_35, 1, sym_args_34);
return;
}
void Init(Local<Object> exports) {
NODE_SET_METHOD(exports, "jsc_main", jsc_main);
}
NODE_MODULE(NODE_GYP_MODULE_NAME, Init)
Analysis
Common code (genericPlus
, genericMinus
) and
all imports have been pulled into lib.cc
for clarity. And
the entire result is run through
clang-format if it is
present on the system.
The benefit of leaf type propagation can be seen everywhere a local is
declared that is not Local
and specifically in if
tests on statically known booleans:
...
Local<Boolean> sym_anon_2 = args[0]->StrictEquals(sym_rhs_4) ? True(isolate) : False(isolate);
if (sym_anon_2->IsTrue()) {
...
It's obvious to a human that there is another optimization you could
do here by not wrapping this check in a V8::Boolean
at
all. The only types tracked in destinations are V8 types, not yet C++
types. But not needing to passing this through a bool
toBoolean(Value v)
wrapper is still an improvement.
In general, unboxing has not really been explore. But the ultimate goal is to use Typescript types to produce function- or block-level unboxed versions -- perhaps using a toggle in code to specify safety à la Common Lisp.
Next steps
I broke tests and regressed on syntax support in the Typescript port, so that's the first step. The second step is enough syntax to support more interesting benchmarks than the fibonacci example (which has comparative performance to Node.js/V8 but isn't saying much).
After that:
- Unboxed expressions
- Unboxed blocks
- Foreign-function interface
- Self-hosting
- Node-API compatible runtime without Node
Companion blog post to my talk on an AOT-compiled Javascript implementation built on Typescript https://t.co/0aHVJ9UzYh
— Phil Eaton (@phil_eaton) February 26, 2019