birth: AST generation

Just about ready for that sloppy code generation!

* build-aux/bootstrap/birth.scm: Update file header documentation.
  Add some whitespace between existing procedures.
  Invoke `parse-lisp' as the program in place of `lex', producing an AST as
    output to the console.
  (cadddr): Add procedure.
  (token-{type,lexeme,value,pos}): Add procedures.
  (parse-lisp): Add procedure (contains other procedures).
* build-aux/bootstrap/libprebirth.js
  ($$append): Add function (append).
  ($$$_$): Correct implementation (-).
  ($$zero$7$): Add predicate (zero?).
  ($$fold): Add function (fold).
* build-aux/bootstrap/prebirth.js (parseLisp): Lowercase some errors.
  (Compiler): Update class docblock.
  (fnmap)[labmda]: Add `lambda' form.
master
Mike Gerwitz 2017-09-02 01:30:13 -04:00
parent dd34498808
commit 3310241a94
Signed by: mikegerwitz
GPG Key ID: 8C917B7F5DC51BA2
3 changed files with 122 additions and 11 deletions

View File

@ -25,12 +25,15 @@
;;; This is the Prebirth Lisp implementation of the JavaScript Prebirth
;;; compiler, found in `prebirth.js'---that compiler can be used to compile
;;; this compiler, which can then be used to compile itself, completing the
;;; bootstrapping process. This process is termed "Birth".
;;; bootstrapping process. This process is termed "Birth", and the process
;;; is successful if the output of Birth compiling itself is byte-for-byte
;;; identical to the output of compiling Birth with Prebirth.
;;;
;;; This is largely a 1:1 translation of `prebirth.js'. Note that we're
;;; dealing with a small subset of Scheme here, so certain things might be
;;; done differently given a proper implementation. See that file for
;;; terminology.
;;; This is largely a 1:1 translation of `prebirth.js'.
;;;
;;; Note that we're dealing with a small subset of Scheme here, so certain
;;; things might be done differently given a proper implementation. See
;;; that file for terminology.
;; pair selection
(define (cadr xs)
@ -39,7 +42,10 @@
(car (car (cdr xs))))
(define (caddr xs)
(car (cdr (cdr xs))))
(define (cadddr xs)
(car (cdr (cdr ( cdr xs)))))
;; for convenience
(define (js:match-regexp re s)
(js:match (js:regexp re) s))
@ -101,6 +107,7 @@
(token "symbol" symbol trim newpos))))))))
;; Throw an error with a window of surrounding source code.
;;
;; The "window" is simply ten characters to the left and right of the
@ -112,6 +119,7 @@
src)))
;; Produce a token and recurse.
;;
;; The token will be concatenated with the result of the mutually
@ -145,7 +153,87 @@
(+ pos len)))))
;; various accessor procedures for token lists (we're Prebirth Lisp here,
;; so no record support or anything fancy!)
(define (token-type t) (car t))
(define (token-lexeme t) (cadr t))
(define (token-value t) (caddr t))
(define (token-pos t) (cadddr t))
;; Produce an AST from the given string SRC of sexps
;;
;; This is essentially the CST with whitespace removed. It first invokes
;; the lexer to produce a token string from the input sexps SRC. From this,
;; it verifies only proper nesting (that SRC does not close sexps too early
;; and that EOF isn't reached before all sexps are closed) and produces an
;; AST that is an isomorphism of the original sexps.
(define (parse-lisp src)
;; accessor methods to make you and me less consfused
(define (ast-depth ast) (car ast))
(define (ast-tree ast) (cadr ast))
(define (ast-stack ast) (caddr ast))
(define (toks->ast toks)
(fold ; then a leftmost reduction on the token string
(lambda (token result)
(let ((depth (ast-depth result))
(xs (ast-tree result))
(stack (ast-stack result))
(type (token-type token))
(pos (token-pos token)))
;; there are very few token types to deal with (again, this is a
;; very simple bootstrap lisp)
(case type
;; ignore comments
(("comment") result)
;; when beginning a new expression, place the expression
;; currently being processed onto a stack, allocate a new list,
;; and we'll continue processing into that new list
(("open") (list (+ depth 1)
(list)
(cons xs stack)))
;; once we reach the end of the expression, pop the parent off of
;; the stack and append the new list to it
(("close") (if (zero? depth)
(parse-error src pos
"unexpected closing parenthesis")
(list (- depth 1)
(append (car stack) (list xs))
(cdr stack))))
;; strings and symbols (we cheat and just consider everything,
;; including numbers and such, to be symbols) are just copied
;; in place
(("string" "symbol") (list depth
(append xs (list token))
stack))
;; we should never encounter anything else unless there's a bug
;; in the tokenizer or we forget a token type above
(else (parse-error
src pos (string-append
"unexpected token `" type "'"))))))
(list 0 (list) (list)) ; initial 0 depth; empty tree; expr stack
toks))
;; lex the input SRC and pass it to `toks->ast' to generate the AST;
;; if the depth is non-zero after we're done, then we're unbalanced.
(let* ((toks (lex src 0))
(ast (toks->ast toks)))
(if (zero? (ast-depth ast))
(ast-tree ast)
(error (string-append
"unexpected end of input at depth "
(ast-depth ast))))))
;; at this point, this program can parse itself and output a CST (sans
;; whitespace)
(js:console
(lex (js:stdin->string) 0))
(parse-lisp (js:stdin->string)))

View File

@ -78,6 +78,14 @@ const $$cons = ( item, list ) => _assertList( list ) && [ item ].concat( list )
const $$car = xs => _assertPair( xs ) && xs[ 0 ];
const $$cdr = xs => _assertPair( xs ) && xs.slice( 1 );
// warning: blows up if any items are non-lists, whereas the proper RnRS
// implementation will set the cdr to the final item even if it's not a pair
function $$append()
{
return argToArr( arguments )
.reduce( ( xs, x ) => xs.concat( _assertList( x) && x ) );
}
const $$list$7$ = xs => Array.isArray( xs );
const $$pair$7$ = xs => Array.isArray( xs ) && ( xs.length > 0 );
@ -101,8 +109,17 @@ function $$$p$()
}
function $$$_$()
{
return argToArr( arguments ).reduce( ( ( x, y ) => x - y ), 0 );
const args = argToArr( arguments );
const first = args.shift();
return args.reduce( ( ( x, y ) => x - y ), first );
}
const $$zero$7$ = x => x === 0;
// SRFI-1
// warning: fold here only supports one list
const $$fold = ( f, init, xs ) =>
xs.reduce( ( prev, x ) => f( x, prev ), init );
// Node.js stuff

View File

@ -105,7 +105,7 @@ class Parser
case 'close':
if ( depth === 0 ) {
this._error(
src, pos, `Unexpected closing parenthesis`
src, pos, `unexpected closing parenthesis`
);
}
@ -129,7 +129,7 @@ class Parser
// should never happen unless there's a bug in the tokenizer
// or we forget a token type above
default:
this._error( src, pos, `Unexpected token '${type}'` );
this._error( src, pos, `unexpected token '${type}'` );
}
}, [ 0, [], [] ] );
@ -137,7 +137,7 @@ class Parser
// are still open sexps
if ( depth > 0 ) {
throw SyntaxError(
`Unexpected end of input at depth ${depth}`
`unexpected end of input at depth ${depth}`
);
}
@ -289,7 +289,8 @@ class Parser
* Dumb compiler to transform AST into ECMAScript
*
* This is a really dumb code generator: it takes the AST and essentially
* transforms it 1:1 wherever possible into the target language.
* transforms it 1:1 wherever possible into the target language. There is
* no intermediate representation (e.g. an ES AST).
*
* This is nothing like what we actually want the _ultimate_ compiler to do
* after Birth, but it gets us to a point where we can self-host on a basic
@ -547,6 +548,11 @@ class Compiler
const fnmap = {
'js:console': 'console.log',
'lambda': ( [ args, ...body ], stoes, btoes ) =>
"function(" + args.map( stoes ).join( ", " ) + "){\n" +
btoes( body ) +
"}",
// simple if statement with optional else, wrapped in a self-executing
// function to simplify code generation (e.g. returning an if)
'if': ( [ pred, t, f ], stoes ) =>