birth: AST generation
Just about ready for that sloppy code generation! * build-aux/bootstrap/birth.scm: Update file header documentation. Add some whitespace between existing procedures. Invoke `parse-lisp' as the program in place of `lex', producing an AST as output to the console. (cadddr): Add procedure. (token-{type,lexeme,value,pos}): Add procedures. (parse-lisp): Add procedure (contains other procedures). * build-aux/bootstrap/libprebirth.js ($$append): Add function (append). ($$$_$): Correct implementation (-). ($$zero$7$): Add predicate (zero?). ($$fold): Add function (fold). * build-aux/bootstrap/prebirth.js (parseLisp): Lowercase some errors. (Compiler): Update class docblock. (fnmap)[labmda]: Add `lambda' form.master
parent
dd34498808
commit
3310241a94
|
@ -25,12 +25,15 @@
|
|||
;;; This is the Prebirth Lisp implementation of the JavaScript Prebirth
|
||||
;;; compiler, found in `prebirth.js'---that compiler can be used to compile
|
||||
;;; this compiler, which can then be used to compile itself, completing the
|
||||
;;; bootstrapping process. This process is termed "Birth".
|
||||
;;; bootstrapping process. This process is termed "Birth", and the process
|
||||
;;; is successful if the output of Birth compiling itself is byte-for-byte
|
||||
;;; identical to the output of compiling Birth with Prebirth.
|
||||
;;;
|
||||
;;; This is largely a 1:1 translation of `prebirth.js'. Note that we're
|
||||
;;; dealing with a small subset of Scheme here, so certain things might be
|
||||
;;; done differently given a proper implementation. See that file for
|
||||
;;; terminology.
|
||||
;;; This is largely a 1:1 translation of `prebirth.js'.
|
||||
;;;
|
||||
;;; Note that we're dealing with a small subset of Scheme here, so certain
|
||||
;;; things might be done differently given a proper implementation. See
|
||||
;;; that file for terminology.
|
||||
|
||||
;; pair selection
|
||||
(define (cadr xs)
|
||||
|
@ -39,7 +42,10 @@
|
|||
(car (car (cdr xs))))
|
||||
(define (caddr xs)
|
||||
(car (cdr (cdr xs))))
|
||||
(define (cadddr xs)
|
||||
(car (cdr (cdr ( cdr xs)))))
|
||||
|
||||
;; for convenience
|
||||
(define (js:match-regexp re s)
|
||||
(js:match (js:regexp re) s))
|
||||
|
||||
|
@ -101,6 +107,7 @@
|
|||
(token "symbol" symbol trim newpos))))))))
|
||||
|
||||
|
||||
|
||||
;; Throw an error with a window of surrounding source code.
|
||||
;;
|
||||
;; The "window" is simply ten characters to the left and right of the
|
||||
|
@ -112,6 +119,7 @@
|
|||
src)))
|
||||
|
||||
|
||||
|
||||
;; Produce a token and recurse.
|
||||
;;
|
||||
;; The token will be concatenated with the result of the mutually
|
||||
|
@ -145,7 +153,87 @@
|
|||
(+ pos len)))))
|
||||
|
||||
|
||||
;; various accessor procedures for token lists (we're Prebirth Lisp here,
|
||||
;; so no record support or anything fancy!)
|
||||
(define (token-type t) (car t))
|
||||
(define (token-lexeme t) (cadr t))
|
||||
(define (token-value t) (caddr t))
|
||||
(define (token-pos t) (cadddr t))
|
||||
|
||||
|
||||
|
||||
;; Produce an AST from the given string SRC of sexps
|
||||
;;
|
||||
;; This is essentially the CST with whitespace removed. It first invokes
|
||||
;; the lexer to produce a token string from the input sexps SRC. From this,
|
||||
;; it verifies only proper nesting (that SRC does not close sexps too early
|
||||
;; and that EOF isn't reached before all sexps are closed) and produces an
|
||||
;; AST that is an isomorphism of the original sexps.
|
||||
(define (parse-lisp src)
|
||||
;; accessor methods to make you and me less consfused
|
||||
(define (ast-depth ast) (car ast))
|
||||
(define (ast-tree ast) (cadr ast))
|
||||
(define (ast-stack ast) (caddr ast))
|
||||
|
||||
(define (toks->ast toks)
|
||||
(fold ; then a leftmost reduction on the token string
|
||||
(lambda (token result)
|
||||
(let ((depth (ast-depth result))
|
||||
(xs (ast-tree result))
|
||||
(stack (ast-stack result))
|
||||
(type (token-type token))
|
||||
(pos (token-pos token)))
|
||||
|
||||
;; there are very few token types to deal with (again, this is a
|
||||
;; very simple bootstrap lisp)
|
||||
(case type
|
||||
;; ignore comments
|
||||
(("comment") result)
|
||||
|
||||
;; when beginning a new expression, place the expression
|
||||
;; currently being processed onto a stack, allocate a new list,
|
||||
;; and we'll continue processing into that new list
|
||||
(("open") (list (+ depth 1)
|
||||
(list)
|
||||
(cons xs stack)))
|
||||
|
||||
;; once we reach the end of the expression, pop the parent off of
|
||||
;; the stack and append the new list to it
|
||||
(("close") (if (zero? depth)
|
||||
(parse-error src pos
|
||||
"unexpected closing parenthesis")
|
||||
(list (- depth 1)
|
||||
(append (car stack) (list xs))
|
||||
(cdr stack))))
|
||||
|
||||
;; strings and symbols (we cheat and just consider everything,
|
||||
;; including numbers and such, to be symbols) are just copied
|
||||
;; in place
|
||||
(("string" "symbol") (list depth
|
||||
(append xs (list token))
|
||||
stack))
|
||||
|
||||
;; we should never encounter anything else unless there's a bug
|
||||
;; in the tokenizer or we forget a token type above
|
||||
(else (parse-error
|
||||
src pos (string-append
|
||||
"unexpected token `" type "'"))))))
|
||||
(list 0 (list) (list)) ; initial 0 depth; empty tree; expr stack
|
||||
toks))
|
||||
|
||||
|
||||
;; lex the input SRC and pass it to `toks->ast' to generate the AST;
|
||||
;; if the depth is non-zero after we're done, then we're unbalanced.
|
||||
(let* ((toks (lex src 0))
|
||||
(ast (toks->ast toks)))
|
||||
(if (zero? (ast-depth ast))
|
||||
(ast-tree ast)
|
||||
(error (string-append
|
||||
"unexpected end of input at depth "
|
||||
(ast-depth ast))))))
|
||||
|
||||
|
||||
;; at this point, this program can parse itself and output a CST (sans
|
||||
;; whitespace)
|
||||
(js:console
|
||||
(lex (js:stdin->string) 0))
|
||||
(parse-lisp (js:stdin->string)))
|
||||
|
|
|
@ -78,6 +78,14 @@ const $$cons = ( item, list ) => _assertList( list ) && [ item ].concat( list )
|
|||
const $$car = xs => _assertPair( xs ) && xs[ 0 ];
|
||||
const $$cdr = xs => _assertPair( xs ) && xs.slice( 1 );
|
||||
|
||||
// warning: blows up if any items are non-lists, whereas the proper RnRS
|
||||
// implementation will set the cdr to the final item even if it's not a pair
|
||||
function $$append()
|
||||
{
|
||||
return argToArr( arguments )
|
||||
.reduce( ( xs, x ) => xs.concat( _assertList( x) && x ) );
|
||||
}
|
||||
|
||||
const $$list$7$ = xs => Array.isArray( xs );
|
||||
const $$pair$7$ = xs => Array.isArray( xs ) && ( xs.length > 0 );
|
||||
|
||||
|
@ -101,8 +109,17 @@ function $$$p$()
|
|||
}
|
||||
function $$$_$()
|
||||
{
|
||||
return argToArr( arguments ).reduce( ( ( x, y ) => x - y ), 0 );
|
||||
const args = argToArr( arguments );
|
||||
const first = args.shift();
|
||||
|
||||
return args.reduce( ( ( x, y ) => x - y ), first );
|
||||
}
|
||||
const $$zero$7$ = x => x === 0;
|
||||
|
||||
// SRFI-1
|
||||
// warning: fold here only supports one list
|
||||
const $$fold = ( f, init, xs ) =>
|
||||
xs.reduce( ( prev, x ) => f( x, prev ), init );
|
||||
|
||||
|
||||
// Node.js stuff
|
||||
|
|
|
@ -105,7 +105,7 @@ class Parser
|
|||
case 'close':
|
||||
if ( depth === 0 ) {
|
||||
this._error(
|
||||
src, pos, `Unexpected closing parenthesis`
|
||||
src, pos, `unexpected closing parenthesis`
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -129,7 +129,7 @@ class Parser
|
|||
// should never happen unless there's a bug in the tokenizer
|
||||
// or we forget a token type above
|
||||
default:
|
||||
this._error( src, pos, `Unexpected token '${type}'` );
|
||||
this._error( src, pos, `unexpected token '${type}'` );
|
||||
}
|
||||
}, [ 0, [], [] ] );
|
||||
|
||||
|
@ -137,7 +137,7 @@ class Parser
|
|||
// are still open sexps
|
||||
if ( depth > 0 ) {
|
||||
throw SyntaxError(
|
||||
`Unexpected end of input at depth ${depth}`
|
||||
`unexpected end of input at depth ${depth}`
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -289,7 +289,8 @@ class Parser
|
|||
* Dumb compiler to transform AST into ECMAScript
|
||||
*
|
||||
* This is a really dumb code generator: it takes the AST and essentially
|
||||
* transforms it 1:1 wherever possible into the target language.
|
||||
* transforms it 1:1 wherever possible into the target language. There is
|
||||
* no intermediate representation (e.g. an ES AST).
|
||||
*
|
||||
* This is nothing like what we actually want the _ultimate_ compiler to do
|
||||
* after Birth, but it gets us to a point where we can self-host on a basic
|
||||
|
@ -547,6 +548,11 @@ class Compiler
|
|||
const fnmap = {
|
||||
'js:console': 'console.log',
|
||||
|
||||
'lambda': ( [ args, ...body ], stoes, btoes ) =>
|
||||
"function(" + args.map( stoes ).join( ", " ) + "){\n" +
|
||||
btoes( body ) +
|
||||
"}",
|
||||
|
||||
// simple if statement with optional else, wrapped in a self-executing
|
||||
// function to simplify code generation (e.g. returning an if)
|
||||
'if': ( [ pred, t, f ], stoes ) =>
|
||||
|
|
Loading…
Reference in New Issue