birth: Prebirth self-hosting
This completes bootstrapping for Prebirth Lisp. The next step will be Rebirth, which will replace libprebirth.js, removing hand-written JavaScript entirely.master
parent
a5b53fedf5
commit
35fa13a8a0
|
@ -29,11 +29,17 @@
|
|||
;;; is successful if the output of Birth compiling itself is byte-for-byte
|
||||
;;; identical to the output of compiling Birth with Prebirth.
|
||||
;;;
|
||||
;;; This is largely a 1:1 translation of `prebirth.js'.
|
||||
;;; This is largely a 1:1 translation of `prebirth.js'. See that file for
|
||||
;;; terminology.
|
||||
;;;
|
||||
;;; Note that we're dealing with a small subset of Scheme here, so certain
|
||||
;;; things might be done differently given a proper implementation. See
|
||||
;;; that file for terminology.
|
||||
;;; things might be done differently given a proper implementation.
|
||||
;;;
|
||||
;;; The next step after this is ``Rebirth'': both Prebirth and Birth require
|
||||
;;; the manually written `libprebirth.js' to function. Rebirth will remove
|
||||
;;; that completely, which bootstraps the runtime in its entirety. At that
|
||||
;;; point, all development will be exclusively in Scheme and we can get on
|
||||
;;; with Gibble.
|
||||
|
||||
;; pair selection
|
||||
(define (cadr xs)
|
||||
|
@ -43,7 +49,14 @@
|
|||
(define (caddr xs)
|
||||
(car (cdr (cdr xs))))
|
||||
(define (cadddr xs)
|
||||
(car (cdr (cdr ( cdr xs)))))
|
||||
(car (cdr (cdr (cdr xs)))))
|
||||
(define (caddddr xs)
|
||||
(car (cdr (cdr (cdr (cdr xs))))))
|
||||
(define (cddr xs)
|
||||
(cdr (cdr xs)))
|
||||
|
||||
(define (not x)
|
||||
(if x #f #t))
|
||||
|
||||
;; for convenience
|
||||
(define (js:match-regexp re s)
|
||||
|
@ -58,8 +71,7 @@
|
|||
;;
|
||||
;; This procedure is mutually recursive with `token'. It expects that
|
||||
;; the source SRC will be left-truncated as input is processed. POS exists
|
||||
;; for producing metadata for error reporting---it has no impact on
|
||||
;; parsing.
|
||||
;; for producing metadata for error reporting---it has no impact on parsing.
|
||||
;;
|
||||
;; The result is a list of tokens. See `token' for the format.
|
||||
(define (lex src pos)
|
||||
|
@ -68,7 +80,7 @@
|
|||
(list "")))
|
||||
(ws-len (string-length (car ws)))
|
||||
(trim (substring src ws-len)) ; ignore whitespace, if any
|
||||
(newpos (+ pos ws-len)))
|
||||
(newpos (+ pos ws-len))) ; adj pos to account for removed ws
|
||||
|
||||
(if (string=? "" trim)
|
||||
(list) ; EOF and we're done
|
||||
|
@ -80,12 +92,12 @@
|
|||
(case ch
|
||||
;; comments extend until the end of the line
|
||||
((";") (let ((eol (js:match-regexp "^(.*?)(\\n|$)" trim)))
|
||||
(token "comment" (cadr eol) trim newpos)))
|
||||
(make-token "comment" (cadr eol) trim newpos)))
|
||||
|
||||
;; left and right parenthesis are handled in the same manner:
|
||||
;; they produce distinct tokens with single-character lexemes
|
||||
(("(") (token "open" ch trim newpos))
|
||||
((")") (token "close" ch trim newpos))
|
||||
(("(") (make-token "open" ch trim newpos))
|
||||
((")") (make-token "close" ch trim newpos))
|
||||
|
||||
;; strings are delimited by opening and closing ASCII double
|
||||
;; quotes, which can be escaped with a backslash
|
||||
|
@ -97,14 +109,14 @@
|
|||
;; including quotes as its lexeme, but its value will
|
||||
;; be the value of the string without quotes due to
|
||||
;; the `str' match group (see `token')
|
||||
(token "string" str trim newpos)))
|
||||
(make-token "string" str trim newpos)))
|
||||
|
||||
(else
|
||||
;; anything else is considered a symbol up until whitespace or
|
||||
;; any of the aforementioned delimiters
|
||||
(let ((symbol (js:match-regexp "^[^\\s()\"]+"
|
||||
trim)))
|
||||
(token "symbol" symbol trim newpos))))))))
|
||||
(make-token "symbol" symbol trim newpos))))))))
|
||||
|
||||
|
||||
|
||||
|
@ -135,7 +147,7 @@
|
|||
;; The expected arguments are: the token type TYPE, the match group or
|
||||
;; string MATCH, left-truncated source code SRC, and the position POS
|
||||
;; relative to the original source code.
|
||||
(define (token type match src pos)
|
||||
(define (make-token type match src pos)
|
||||
(let* ((parts (if (list? match) match (list match match)))
|
||||
(lexeme (car parts))
|
||||
;; the value is the first group of the match (indicating what we
|
||||
|
@ -148,17 +160,18 @@
|
|||
|
||||
;; produce token and recurse on `lex', left-truncating the source
|
||||
;; string to discard what we have already processed
|
||||
(cons (list type lexeme value pos)
|
||||
(cons (list (quote token) type lexeme value pos)
|
||||
(lex (substring src len)
|
||||
(+ pos len)))))
|
||||
|
||||
|
||||
;; various accessor procedures for token lists (we're Prebirth Lisp here,
|
||||
;; so no record support or anything fancy!)
|
||||
(define (token-type t) (car t))
|
||||
(define (token-lexeme t) (cadr t))
|
||||
(define (token-value t) (caddr t))
|
||||
(define (token-pos t) (cadddr t))
|
||||
(define (token? t) (and (pair? t) (symbol=? (quote token) (car t))))
|
||||
(define (token-type t) (cadr t))
|
||||
(define (token-lexeme t) (caddr t))
|
||||
(define (token-value t) (cadddr t))
|
||||
(define (token-pos t) (caddddr t))
|
||||
|
||||
|
||||
|
||||
|
@ -175,8 +188,9 @@
|
|||
(define (ast-tree ast) (cadr ast))
|
||||
(define (ast-stack ast) (caddr ast))
|
||||
|
||||
;; perform a leftmost reduction on the token string
|
||||
(define (toks->ast toks)
|
||||
(fold ; then a leftmost reduction on the token string
|
||||
(fold
|
||||
(lambda (token result)
|
||||
(let ((depth (ast-depth result))
|
||||
(xs (ast-tree result))
|
||||
|
@ -228,12 +242,287 @@
|
|||
(ast (toks->ast toks)))
|
||||
(if (zero? (ast-depth ast))
|
||||
(ast-tree ast)
|
||||
;; if we terminate at a non-zero depth, that means there ar still
|
||||
;; open sexps
|
||||
(error (string-append
|
||||
"unexpected end of input at depth "
|
||||
(ast-depth ast))))))
|
||||
|
||||
|
||||
;; Compile Prebirth Lisp AST into ECMAScript.
|
||||
;;
|
||||
;; The AST can be generated with `parse-lisp'.
|
||||
(define (prebirth->ecmascript ast)
|
||||
;; Generate ECMAScript-friendly name from the given id.
|
||||
;;
|
||||
;; A subset of special characters that are acceptable in Scheme are
|
||||
;; converted in an identifiable manner; others are simply converted to `$'
|
||||
;; in a catch-all and therefore could result in conflicts and cannot be
|
||||
;; reliably distinguished from one-another. Remember: this is temporary
|
||||
;; code.
|
||||
(define (tname->id name)
|
||||
(if (js:match (js:regexp "^\\d+$") name)
|
||||
name
|
||||
(string-append
|
||||
"$$" (js:replace (js:regexp "[^a-zA-Z0-9_]" "g")
|
||||
(lambda (c)
|
||||
(case c
|
||||
(("-") "$_$")
|
||||
(("?") "$7$")
|
||||
(("@") "$a$")
|
||||
(("!") "$b$")
|
||||
((">") "$g$")
|
||||
(("#") "$h$")
|
||||
(("*") "$k$")
|
||||
(("<") "$l$")
|
||||
(("&") "$n$")
|
||||
(("%") "$o$")
|
||||
(("+") "$p$")
|
||||
(("=") "$q$")
|
||||
(("^") "$v$")
|
||||
(("/") "$w$")
|
||||
(("$") "$$")
|
||||
(else "$")))
|
||||
name))))
|
||||
|
||||
;; Join a list of strings XS on a delimiter DELIM
|
||||
(define (join delim xs)
|
||||
(if (pair? xs)
|
||||
(fold (lambda (x str)
|
||||
(string-append str delim x))
|
||||
(car xs)
|
||||
(cdr xs))
|
||||
""))
|
||||
|
||||
|
||||
;; Compile parameter list.
|
||||
;;
|
||||
;; This simply takes the value of the symbol and outputs it (formatted),
|
||||
;; delimited by commas.
|
||||
(define (params->es params)
|
||||
(join ", " (map (lambda (t)
|
||||
(tname->id (token-value t)))
|
||||
params)))
|
||||
|
||||
|
||||
;; Compile body s-expressions into ECMAScript
|
||||
;;
|
||||
;; This produces a 1:1 mapping of body XS s-expressions to ES statements,
|
||||
;; recursively. The heavy lifting is done by `sexp->es'.
|
||||
(define (body->es xs)
|
||||
;; recursively process body XS until we're out of pairs
|
||||
(if (not (pair? xs))
|
||||
""
|
||||
(let* ((x (car xs))
|
||||
(rest (cdr xs))
|
||||
(more? (pair? rest)))
|
||||
;; the result is a semicolon-delimited string of statements, with
|
||||
;; the final statement prefixed with `return'
|
||||
(string-append
|
||||
" "
|
||||
(if more? "" "return ") ; prefix with `return' if last body exp
|
||||
(sexp->es x) ";" ; process current body expression
|
||||
(if more? "\n" "")
|
||||
(body->es rest))))) ; recurse
|
||||
|
||||
|
||||
;; Compile procedure definition into an ES function definition
|
||||
;;
|
||||
;; This will fail if the given token is not a `define'.
|
||||
(define (cdfn t)
|
||||
;; e.g. (define (foo ...) body)
|
||||
(let* ((dfn (cadr t))
|
||||
(id (tname->id (token-value (car dfn))))
|
||||
(params (params->es (cdr dfn)))
|
||||
(body (body->es (cddr t))))
|
||||
;; this is the final format---each procedure becomes its own function
|
||||
;; definition in ES
|
||||
(string-append
|
||||
"function " id "(" params ")\n{\n" body "\n};")))
|
||||
|
||||
|
||||
;; Function/procedure aliases and special forms
|
||||
;;
|
||||
;; And here we have what is probably the most grotesque part of this file.
|
||||
;;
|
||||
;; This map allows for a steady transition---items can be removed as they
|
||||
;; are written in Prebirth Lisp. This should give us a sane (but still
|
||||
;; simple) environment with which we can start to self-host.
|
||||
;;
|
||||
;; String values are simple function aliases. Function values take over
|
||||
;; the compilation of that function and allow for defining special forms
|
||||
;; (in place of macro support). The first argument FN is the name of the
|
||||
;; function/procedure/form, and ARS is the list of arguments.
|
||||
;;
|
||||
;; These are by no means meant to be solid implementations; notable
|
||||
;; deficiencies are documented, but don't expect this to work properly in
|
||||
;; every case. They will be replaced with proper R7RS implementations in
|
||||
;; the future (Rebirth).
|
||||
(define (fnmap fn args t)
|
||||
(case fn
|
||||
(("js:console")
|
||||
(string-append "console.log(" (map sexp->es args) ")"))
|
||||
|
||||
;; fortunately ES6+ has native symbol support :)
|
||||
;; we don't (yet?) need list quoting in Prebirth
|
||||
(("quote")
|
||||
(if (pair? (cdr args))
|
||||
(error "quoting lists is not yet supported; sorry!")
|
||||
(string-append "Symbol.for('" (sexp->es args) "')")))
|
||||
|
||||
(("define") (cdfn t))
|
||||
|
||||
(("lambda")
|
||||
(let ((fnargs (car args))
|
||||
(body (cdr args)))
|
||||
(string-append
|
||||
"function(" (join ", " (map sexp->es fnargs)) "){\n"
|
||||
(body->es body)
|
||||
"}")))
|
||||
|
||||
;; simple if statement with optional else, wrapped in a self-executing
|
||||
;; function to simplify code generation (e.g. returning an if)
|
||||
(("if")
|
||||
(let ((pred (car args))
|
||||
(t (cadr args))
|
||||
(f (and (pair? (cddr args))
|
||||
(caddr args))))
|
||||
(string-append
|
||||
"(function(){"
|
||||
"if (_truep(" (sexp->es pred) ")){return " (sexp->es t) ";}"
|
||||
(if (pair? f)
|
||||
(string-append "else{return " (sexp->es f) ";}")
|
||||
"")
|
||||
"})()")))
|
||||
|
||||
;; and short-circuits, so we need to implement it as a special form
|
||||
;; rather than an alias
|
||||
(("and")
|
||||
(string-append
|
||||
"(function(__and){\n"
|
||||
(join "" (map (lambda (expr)
|
||||
(string-append
|
||||
"__and = " (sexp->es expr) "; "
|
||||
"if (!_truep(__and)) return false;\n"))
|
||||
args))
|
||||
"return __and;})()"))
|
||||
|
||||
;; or short-circuits, so we need to implement it as a special form
|
||||
;; rather than an alias
|
||||
(("or")
|
||||
(string-append
|
||||
"(function(__or){\n"
|
||||
(join "" (map (lambda (expr)
|
||||
(string-append
|
||||
"__or = " (sexp->es expr) "; "
|
||||
"if (_truep(__or)) return __or;\n"))
|
||||
args))
|
||||
"return false;})()"))
|
||||
|
||||
;; (let ((binding val) ...) ...body), compiled as a self-executing
|
||||
;; function which allows us to easily represent the return value of
|
||||
;; the entire expression while maintaining local scope
|
||||
(("let*")
|
||||
(let ((bindings (car args))
|
||||
(body (cdr args)))
|
||||
(string-append
|
||||
"(function(){\n"
|
||||
(join "" (map (lambda (binding)
|
||||
(let ((var (car binding))
|
||||
(init (cadr binding)))
|
||||
(string-append " const " (sexp->es var)
|
||||
" = " (sexp->es init) ";\n")))
|
||||
bindings))
|
||||
(body->es body) "\n"
|
||||
" })()")))
|
||||
|
||||
;; similar to the above, but variables cannot reference one-another
|
||||
(("let")
|
||||
(let* ((bindings (car args))
|
||||
(body (cdr args))
|
||||
(fparams (join ", " (map sexp->es
|
||||
(map car bindings))))
|
||||
(fargs (join ", " (map sexp->es
|
||||
(map cadr bindings)))))
|
||||
(string-append "(function(" fparams "){\n"
|
||||
(body->es body) "\n"
|
||||
"})(" fargs ")")))
|
||||
|
||||
;; and here I thought Prebirth Lisp would be simple...but having
|
||||
;; `case' support really keeps things much more tidy, so here we are
|
||||
;; (note that it doesn't support the arrow form, nor does it support
|
||||
;; expressions as data)
|
||||
(("case")
|
||||
(let ((key (car args))
|
||||
(clauses (cdr args)))
|
||||
(string-append
|
||||
"(function(){const _key=" (sexp->es key) ";\n"
|
||||
"switch (_key){\n"
|
||||
(join ""
|
||||
(map (lambda (data exprs)
|
||||
(string-append
|
||||
(if (and (token? data)
|
||||
(string=? "else" (token-lexeme data)))
|
||||
"default:\n"
|
||||
(join ""
|
||||
(map (lambda (datum)
|
||||
(string-append
|
||||
"case " (sexp->es datum) ":\n"))
|
||||
data)))
|
||||
(body->es exprs) "\n"))
|
||||
(map car clauses)
|
||||
(map cdr clauses)))
|
||||
"}})()")))
|
||||
|
||||
;; normal procedure application
|
||||
(else (let* ((idfn (tname->id fn))
|
||||
(argstr (join ", " (map sexp->es args))))
|
||||
(string-append idfn "(" argstr ")")))))
|
||||
|
||||
|
||||
;; Convert s-expressions or scalar into ECMAScript
|
||||
;;
|
||||
;; T may be either an array of tokens or a primitive token (e.g. string,
|
||||
;; symbol). This procedure is applied recursively to T as needed if T is
|
||||
;; a list.
|
||||
;;
|
||||
;; XXX: We hit stack limits ._. we want to compile in browser too! Until
|
||||
;; then, increase Node.js' stack when invoking it from the command line
|
||||
;; (`--stack-size').
|
||||
(define (sexp->es t)
|
||||
(if (not (list? t))
|
||||
(error "unexpected non-list for sexp->es token"))
|
||||
|
||||
(if (token? t)
|
||||
(case (token-type t)
|
||||
;; strings output as-is (note that we don't escape double quotes,
|
||||
;; because the method of escaping them is the same in Scheme as it
|
||||
;; is in ECMAScript---a backslash)
|
||||
(("string") (string-append "\"" (token-value t) "\""))
|
||||
|
||||
;; symbols have the same concerns as procedure definitions: the
|
||||
;; identifiers generated need to be ES-friendly
|
||||
(("symbol") (tname->id (token-value t)))
|
||||
|
||||
(else (error
|
||||
(string-append
|
||||
"cannot compile unknown token `" (token-type t) "'"))))
|
||||
|
||||
;; otherwise, process the expression
|
||||
(fnmap (token-value (car t))
|
||||
(cdr t)
|
||||
t)))
|
||||
|
||||
;; output libprebirth and compiled output, wrapped in a self-executing
|
||||
;; function to limit scope
|
||||
(string-append "(function(){"
|
||||
(js:file->string "build-aux/bootstrap/libprebirth.js") "\n\n"
|
||||
(join "\n\n" (map sexp->es ast))
|
||||
"})();"))
|
||||
|
||||
|
||||
;; at this point, this program can parse itself and output a CST (sans
|
||||
;; whitespace)
|
||||
(js:console
|
||||
(parse-lisp (js:stdin->string)))
|
||||
(js:console (prebirth->ecmascript
|
||||
(parse-lisp
|
||||
(js:file->string "/dev/stdin"))))
|
||||
|
|
|
@ -34,10 +34,15 @@
|
|||
* By convention, everything is prefixed with `js:' in Prebirth (not `es:',
|
||||
* because we're using JavaScript-specific features). For character
|
||||
* transformation rules, see `Compiler#_idFromName' in `prebirth.js'.
|
||||
*
|
||||
* These implementations are largely flawed in some manner, but that's okay,
|
||||
* because they do their job. Some flaws are noted.
|
||||
*/
|
||||
|
||||
const $h$t = true;
|
||||
const $h$f = false;
|
||||
const $$$h$t = true;
|
||||
const $$$h$f = false;
|
||||
|
||||
const $$symbol$q$$7$ = ( a, b ) => ( ( typeof a === 'symbol' ) && ( a === b ) );
|
||||
|
||||
const argToArr = args => Array.prototype.slice.call( args );
|
||||
|
||||
|
@ -120,17 +125,25 @@ const $$zero$7$ = x => x === 0;
|
|||
// warning: fold here only supports one list
|
||||
const $$fold = ( f, init, xs ) =>
|
||||
xs.reduce( ( prev, x ) => f( x, prev ), init );
|
||||
// warning: map here uses the length of the first list, not the shortest
|
||||
// (we implement this in ES for now so that we don't have to augment
|
||||
// Prebirth Lisp to support the "rest" procedure definition syntax)
|
||||
const $$map = ( f, ...xs ) =>
|
||||
xs[ 0 ].map(
|
||||
( _, i ) => f.apply( null,
|
||||
xs.map( x => x[ i ] ) ) );
|
||||
|
||||
|
||||
// Node.js stuff
|
||||
const fs = require( 'fs' );
|
||||
|
||||
// stdin->string
|
||||
const $$js$stdin$_$$g$string = () =>
|
||||
fs.readFileSync( '/dev/stdin' ).toString();
|
||||
const $$js$file$_$$g$string = ( path ) =>
|
||||
fs.readFileSync( path ).toString();
|
||||
|
||||
const $$js$regexp = ( s, opts ) => new RegExp( s, opts );
|
||||
const $$js$match = ( r, s ) => s.match( r ) || false;
|
||||
const $$js$regexp = ( s, opts ) => new RegExp( s, opts );
|
||||
const $$js$match = ( r, s ) => s.match( r ) || false;
|
||||
const $$js$replace = ( r, repl, s ) => s.replace( r, repl );
|
||||
|
||||
|
||||
/** =============== end of libprebirth =============== **/
|
||||
|
|
|
@ -330,12 +330,12 @@ class Compiler
|
|||
// map every definition to a ES function definition and delimit them
|
||||
// (for readability) by two newlines
|
||||
return tree.map( this._sexpToEs.bind( this ) )
|
||||
.join( "\n\n" ) + "\n";
|
||||
.join( "\n\n" );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compile function definition into a ES function definition
|
||||
* Compile procedure definition into a ES function definition
|
||||
*
|
||||
* This will fail if the given token is not a `define'.
|
||||
*
|
||||
|
@ -345,13 +345,6 @@ class Compiler
|
|||
*/
|
||||
_cdfn( t )
|
||||
{
|
||||
// an application must be an s-expression
|
||||
if ( !Array.isArray( t ) ) {
|
||||
throw Error(
|
||||
`\`${name}' application expected, found symbol \`${t.value}'`
|
||||
);
|
||||
}
|
||||
|
||||
// e.g. (define (foo ...) body)
|
||||
const [ , [ { value: name }, ...params ], ...body ] = t;
|
||||
|
||||
|
@ -359,7 +352,7 @@ class Compiler
|
|||
const paramjs = this._paramsToEs( params );
|
||||
const bodyjs = this._bodyToEs( body );
|
||||
|
||||
// this is the final format---each function becomes its own function
|
||||
// this is the final format---each procedure becomes its own function
|
||||
// definition in ES
|
||||
return `function ${id}(${paramjs})\n{\n${bodyjs}\n};`;
|
||||
}
|
||||
|
@ -525,7 +518,7 @@ class Compiler
|
|||
|
||||
|
||||
/**
|
||||
* Function aliases and special forms
|
||||
* Function/procedure aliases and special forms
|
||||
*
|
||||
* And here we have what is probably the most grotesque part of this
|
||||
* file. Saved the best for last.
|
||||
|
@ -542,7 +535,7 @@ class Compiler
|
|||
*
|
||||
* These are by no means meant to be solid implementations; notable
|
||||
* deficiencies are documented, but don't expect this to work properly in
|
||||
* any case. They will be replaced with proper R7RS implementations in the
|
||||
* every case. They will be replaced with proper R7RS implementations in the
|
||||
* future (after Birth).
|
||||
*
|
||||
* @type {Object}
|
||||
|
@ -574,20 +567,20 @@ const fnmap = {
|
|||
// and short-circuits, so we need to implement it as a special form
|
||||
// rather than an alias
|
||||
'and': ( args, stoes ) =>
|
||||
"(function(){\n" +
|
||||
"(function(__and){\n" +
|
||||
args.map( ( expr, i ) =>
|
||||
`const _and${i} = ${stoes(expr)}; ` +
|
||||
`if (!_truep(_and${i})) return false;\n`
|
||||
`__and = ${stoes(expr)}; ` +
|
||||
`if (!_truep(__and)) return false;\n`
|
||||
).join( '' ) +
|
||||
`return _and${args.length-1};})()`,
|
||||
`return __and;})()`,
|
||||
|
||||
// or short-circuits, so we need to implement it as a special form
|
||||
// rather than an alias
|
||||
'or': ( args, stoes ) =>
|
||||
"(function(){\n" +
|
||||
"(function(__or){\n" +
|
||||
args.map( ( expr, i ) =>
|
||||
`const _or${i} = ${stoes(expr)}; ` +
|
||||
`if (_truep(_or${i})) return _or${i};\n`
|
||||
`__or = ${stoes(expr)}; ` +
|
||||
`if (_truep(__or)) return __or;\n`
|
||||
).join( '' ) +
|
||||
"return false;})()",
|
||||
|
||||
|
|
Loading…
Reference in New Issue