birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack limits. * build-aux/bootstrap/birth.scm (lex): Non-recursive strategy (loop with mutable list). (make-token): Update doc. Produce list of token, new string, and position. Don't recurse. (body->es): Add `ret' param. Only produce `return' statement if new param is set. (cdfn): Use it. (fnmap) [js:while, js:break]: Add forms. [lambda, let, case]: Use new `body->es' `ret' param. [let*]: Define JS variables in output using `let' instead of `const' to permit mutating with new `set!' form. Use new `body->es' `ret' param. [set!]: Add form. (prebirth->ecmascript): Adjust libprebirth path to be relative to self. * build-aux/bootstrap/libprebirth.js ($$append$b$): Add `append!' procedure. ($$js$regexp, $$js$match, $$js$replace): Move a few lines up. (fs): Provide stub if `require' is not defined. * build-aux/bootstrap/prebirth.js (_lex): Non-recursive strategy (loop with array-appending). (_token): No more mutual recursion with `#_lex'. Return new string and position. (_bodyToEs): Add `ret' param. Only produce `return' statement if new param is set. (fnmap) [js:while, js:break]: Add forms. [let*]: Define JS variables in output using `let' instead of `const' to permit mutating with new `set!' form. Use new `body->es' `ret' param. [set!]: Add form.master
parent
35fa13a8a0
commit
431b18e1df
|
@ -69,54 +69,75 @@
|
||||||
;; This is the lexer. Whitespace is ignored. The grammar consists of
|
;; This is the lexer. Whitespace is ignored. The grammar consists of
|
||||||
;; simple s-expressions.
|
;; simple s-expressions.
|
||||||
;;
|
;;
|
||||||
;; This procedure is mutually recursive with `token'. It expects that
|
;; Tokens are produced with `make-token'. The source SRC will be
|
||||||
;; the source SRC will be left-truncated as input is processed. POS exists
|
;; left-truncated as input is processed. POS exists for producing metadata
|
||||||
;; for producing metadata for error reporting---it has no impact on parsing.
|
;; for error reporting---it has no impact on parsing.
|
||||||
|
;;
|
||||||
|
;; This implementation was originally recursive and immutable, but the stack
|
||||||
|
;; was being exhausted, so it was refactored into an inferior
|
||||||
|
;; implementation. Note the use of `js:while' and `js:break'---these are
|
||||||
|
;; quick fixes to the problem of stack exhaustion in browsers (where we have
|
||||||
|
;; no control over the stack limit); proper tail call support will come
|
||||||
|
;; later when we have a decent architecture in place.
|
||||||
;;
|
;;
|
||||||
;; The result is a list of tokens. See `token' for the format.
|
;; The result is a list of tokens. See `token' for the format.
|
||||||
(define (lex src pos)
|
(define (lex src pos)
|
||||||
(let* ((ws (or (js:match-regexp "^\\s+"
|
(let ((toks (list)))
|
||||||
src)
|
(js:while #t ; browser stack workaround
|
||||||
(list "")))
|
(let* ((ws (or (js:match-regexp "^\\s+"
|
||||||
(ws-len (string-length (car ws)))
|
src)
|
||||||
(trim (substring src ws-len)) ; ignore whitespace, if any
|
(list "")))
|
||||||
(newpos (+ pos ws-len))) ; adj pos to account for removed ws
|
(ws-len (string-length (car ws)))
|
||||||
|
(trim (substring src ws-len)) ; ignore whitespace, if any
|
||||||
|
(newpos (+ pos ws-len))) ; adj pos to account for removed ws
|
||||||
|
|
||||||
(if (string=? "" trim)
|
(if (string=? "" trim)
|
||||||
(list) ; EOF and we're done
|
(js:break) ; EOF and we're done
|
||||||
|
|
||||||
;; normally we'd use `string-ref' here, but then we'd have to
|
;; normally we'd use `string-ref' here, but then we'd have to
|
||||||
;; implement Scheme characters, so let's keep this simple and keep
|
;; implement Scheme characters, so let's keep this simple and keep
|
||||||
;; with strings
|
;; with strings
|
||||||
(let ((ch (substring trim 0 1)))
|
(let* ((ch (substring trim 0 1))
|
||||||
(case ch
|
(t (case ch
|
||||||
;; comments extend until the end of the line
|
;; comments extend until the end of the line
|
||||||
((";") (let ((eol (js:match-regexp "^(.*?)(\\n|$)" trim)))
|
((";") (let ((eol (js:match-regexp "^(.*?)(\\n|$)" trim)))
|
||||||
(make-token "comment" (cadr eol) trim newpos)))
|
(make-token "comment" (cadr eol) trim newpos)))
|
||||||
|
|
||||||
;; left and right parenthesis are handled in the same manner:
|
;; left and right parenthesis are handled in the same
|
||||||
;; they produce distinct tokens with single-character lexemes
|
;; manner: they produce distinct tokens with
|
||||||
(("(") (make-token "open" ch trim newpos))
|
;; single-character lexemes
|
||||||
((")") (make-token "close" ch trim newpos))
|
(("(") (make-token "open" ch trim newpos))
|
||||||
|
((")") (make-token "close" ch trim newpos))
|
||||||
|
|
||||||
;; strings are delimited by opening and closing ASCII double
|
;; strings are delimited by opening and closing ASCII
|
||||||
;; quotes, which can be escaped with a backslash
|
;; double quotes, which can be escaped with a
|
||||||
(("\"") (let ((str (js:match-regexp "^\"(|.*?[^\\\\])\""
|
;; backslash
|
||||||
trim)))
|
(("\"") (let ((str (js:match-regexp "^\"(|.*?[^\\\\])\""
|
||||||
(or str (parse-error
|
trim)))
|
||||||
src pos "missing closing string delimiter"))
|
(or str (parse-error
|
||||||
;; a string token consists of the entire string
|
src pos
|
||||||
;; including quotes as its lexeme, but its value will
|
"missing closing string delimiter"))
|
||||||
;; be the value of the string without quotes due to
|
;; a string token consists of the entire
|
||||||
;; the `str' match group (see `token')
|
;; string including quotes as its lexeme,
|
||||||
(make-token "string" str trim newpos)))
|
;; but its value will be the value of the
|
||||||
|
;; string without quotes due to the `str'
|
||||||
|
;; match group (see `token')
|
||||||
|
(make-token "string" str trim newpos)))
|
||||||
|
|
||||||
(else
|
(else
|
||||||
;; anything else is considered a symbol up until whitespace or
|
;; anything else is considered a symbol up until
|
||||||
;; any of the aforementioned delimiters
|
;; whitespace or any of the aforementioned
|
||||||
(let ((symbol (js:match-regexp "^[^\\s()\"]+"
|
;; delimiters
|
||||||
trim)))
|
(let ((symbol (js:match-regexp "^[^\\s()\"]+"
|
||||||
(make-token "symbol" symbol trim newpos))))))))
|
trim)))
|
||||||
|
(make-token "symbol" symbol trim newpos))))))
|
||||||
|
|
||||||
|
;; yikes---see notes in docblock with regards to why
|
||||||
|
;; we're using mutators here
|
||||||
|
(set! toks (append toks (list (car t))))
|
||||||
|
(set! src (cadr t))
|
||||||
|
(set! pos (caddr t))))))
|
||||||
|
toks))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -132,14 +153,7 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; Produce a token and recurse.
|
;; Produce a token, left-truncate src, and update pos.
|
||||||
;;
|
|
||||||
;; The token will be concatenated with the result of the mutually
|
|
||||||
;; recursive procedure `lex'.
|
|
||||||
;;
|
|
||||||
;; For the record: I'm not fond of mutual recursion from a clarity
|
|
||||||
;; standpoint, but this is how the abstraction evolved to de-duplicate
|
|
||||||
;; code, and I don't much feel like refactoring it.
|
|
||||||
;;
|
;;
|
||||||
;; Unlike the JS Prebirth implementation which uses a key/value object,
|
;; Unlike the JS Prebirth implementation which uses a key/value object,
|
||||||
;; we're just using a simple list.
|
;; we're just using a simple list.
|
||||||
|
@ -160,9 +174,9 @@
|
||||||
|
|
||||||
;; produce token and recurse on `lex', left-truncating the source
|
;; produce token and recurse on `lex', left-truncating the source
|
||||||
;; string to discard what we have already processed
|
;; string to discard what we have already processed
|
||||||
(cons (list (quote token) type lexeme value pos)
|
(list (list (quote token) type lexeme value pos)
|
||||||
(lex (substring src len)
|
(substring src len)
|
||||||
(+ pos len)))))
|
(+ pos len))))
|
||||||
|
|
||||||
|
|
||||||
;; various accessor procedures for token lists (we're Prebirth Lisp here,
|
;; various accessor procedures for token lists (we're Prebirth Lisp here,
|
||||||
|
@ -309,21 +323,21 @@
|
||||||
;;
|
;;
|
||||||
;; This produces a 1:1 mapping of body XS s-expressions to ES statements,
|
;; This produces a 1:1 mapping of body XS s-expressions to ES statements,
|
||||||
;; recursively. The heavy lifting is done by `sexp->es'.
|
;; recursively. The heavy lifting is done by `sexp->es'.
|
||||||
(define (body->es xs)
|
(define (body->es xs ret)
|
||||||
;; recursively process body XS until we're out of pairs
|
;; recursively process body XS until we're out of pairs
|
||||||
(if (not (pair? xs))
|
(if (not (pair? xs))
|
||||||
""
|
""
|
||||||
(let* ((x (car xs))
|
(let* ((x (car xs))
|
||||||
(rest (cdr xs))
|
(rest (cdr xs))
|
||||||
(more? (pair? rest)))
|
(more? (or (not ret) (pair? rest))))
|
||||||
;; the result is a semicolon-delimited string of statements, with
|
;; the result is a semicolon-delimited string of statements, with
|
||||||
;; the final statement prefixed with `return'
|
;; the final statement prefixed with `return' unless (not ret)
|
||||||
(string-append
|
(string-append
|
||||||
" "
|
" "
|
||||||
(if more? "" "return ") ; prefix with `return' if last body exp
|
(if more? "" "return ") ; prefix with `return' if last body exp
|
||||||
(sexp->es x) ";" ; process current body expression
|
(sexp->es x) ";" ; process current body expression
|
||||||
(if more? "\n" "")
|
(if (pair? rest) "\n" "")
|
||||||
(body->es rest))))) ; recurse
|
(body->es rest ret))))) ; recurse
|
||||||
|
|
||||||
|
|
||||||
;; Compile procedure definition into an ES function definition
|
;; Compile procedure definition into an ES function definition
|
||||||
|
@ -334,7 +348,7 @@
|
||||||
(let* ((dfn (cadr t))
|
(let* ((dfn (cadr t))
|
||||||
(id (tname->id (token-value (car dfn))))
|
(id (tname->id (token-value (car dfn))))
|
||||||
(params (params->es (cdr dfn)))
|
(params (params->es (cdr dfn)))
|
||||||
(body (body->es (cddr t))))
|
(body (body->es (cddr t) #t)))
|
||||||
;; this is the final format---each procedure becomes its own function
|
;; this is the final format---each procedure becomes its own function
|
||||||
;; definition in ES
|
;; definition in ES
|
||||||
(string-append
|
(string-append
|
||||||
|
@ -363,6 +377,19 @@
|
||||||
(("js:console")
|
(("js:console")
|
||||||
(string-append "console.log(" (map sexp->es args) ")"))
|
(string-append "console.log(" (map sexp->es args) ")"))
|
||||||
|
|
||||||
|
;; yes, there are more important things to do until we get to the
|
||||||
|
;; point where it's worth implementing proper tail calls
|
||||||
|
(("js:while")
|
||||||
|
(let ((pred (car args))
|
||||||
|
(body (cdr args)))
|
||||||
|
(string-append
|
||||||
|
"(function(__whilebrk){"
|
||||||
|
"while (" (sexp->es pred) "){\n"
|
||||||
|
(body->es body #f) " if (__whilebrk) break;\n"
|
||||||
|
"}\n"
|
||||||
|
"})(false)")))
|
||||||
|
(("js:break") "__whilebrk=true")
|
||||||
|
|
||||||
;; fortunately ES6+ has native symbol support :)
|
;; fortunately ES6+ has native symbol support :)
|
||||||
;; we don't (yet?) need list quoting in Prebirth
|
;; we don't (yet?) need list quoting in Prebirth
|
||||||
(("quote")
|
(("quote")
|
||||||
|
@ -377,7 +404,7 @@
|
||||||
(body (cdr args)))
|
(body (cdr args)))
|
||||||
(string-append
|
(string-append
|
||||||
"function(" (join ", " (map sexp->es fnargs)) "){\n"
|
"function(" (join ", " (map sexp->es fnargs)) "){\n"
|
||||||
(body->es body)
|
(body->es body #t)
|
||||||
"}")))
|
"}")))
|
||||||
|
|
||||||
;; simple if statement with optional else, wrapped in a self-executing
|
;; simple if statement with optional else, wrapped in a self-executing
|
||||||
|
@ -430,10 +457,10 @@
|
||||||
(join "" (map (lambda (binding)
|
(join "" (map (lambda (binding)
|
||||||
(let ((var (car binding))
|
(let ((var (car binding))
|
||||||
(init (cadr binding)))
|
(init (cadr binding)))
|
||||||
(string-append " const " (sexp->es var)
|
(string-append " let " (sexp->es var)
|
||||||
" = " (sexp->es init) ";\n")))
|
" = " (sexp->es init) ";\n")))
|
||||||
bindings))
|
bindings))
|
||||||
(body->es body) "\n"
|
(body->es body #t) "\n"
|
||||||
" })()")))
|
" })()")))
|
||||||
|
|
||||||
;; similar to the above, but variables cannot reference one-another
|
;; similar to the above, but variables cannot reference one-another
|
||||||
|
@ -445,7 +472,7 @@
|
||||||
(fargs (join ", " (map sexp->es
|
(fargs (join ", " (map sexp->es
|
||||||
(map cadr bindings)))))
|
(map cadr bindings)))))
|
||||||
(string-append "(function(" fparams "){\n"
|
(string-append "(function(" fparams "){\n"
|
||||||
(body->es body) "\n"
|
(body->es body #t) "\n"
|
||||||
"})(" fargs ")")))
|
"})(" fargs ")")))
|
||||||
|
|
||||||
;; and here I thought Prebirth Lisp would be simple...but having
|
;; and here I thought Prebirth Lisp would be simple...but having
|
||||||
|
@ -469,11 +496,16 @@
|
||||||
(string-append
|
(string-append
|
||||||
"case " (sexp->es datum) ":\n"))
|
"case " (sexp->es datum) ":\n"))
|
||||||
data)))
|
data)))
|
||||||
(body->es exprs) "\n"))
|
(body->es exprs #t) "\n"))
|
||||||
(map car clauses)
|
(map car clauses)
|
||||||
(map cdr clauses)))
|
(map cdr clauses)))
|
||||||
"}})()")))
|
"}})()")))
|
||||||
|
|
||||||
|
(("set!")
|
||||||
|
(let ((varid (car args))
|
||||||
|
(val (cadr args)))
|
||||||
|
(string-append (sexp->es varid) " = " (sexp->es val))))
|
||||||
|
|
||||||
;; normal procedure application
|
;; normal procedure application
|
||||||
(else (let* ((idfn (tname->id fn))
|
(else (let* ((idfn (tname->id fn))
|
||||||
(argstr (join ", " (map sexp->es args))))
|
(argstr (join ", " (map sexp->es args))))
|
||||||
|
@ -516,7 +548,7 @@
|
||||||
;; output libprebirth and compiled output, wrapped in a self-executing
|
;; output libprebirth and compiled output, wrapped in a self-executing
|
||||||
;; function to limit scope
|
;; function to limit scope
|
||||||
(string-append "(function(){"
|
(string-append "(function(){"
|
||||||
(js:file->string "build-aux/bootstrap/libprebirth.js") "\n\n"
|
(js:file->string "libprebirth.js") "\n\n"
|
||||||
(join "\n\n" (map sexp->es ast))
|
(join "\n\n" (map sexp->es ast))
|
||||||
"})();"))
|
"})();"))
|
||||||
|
|
||||||
|
|
|
@ -83,6 +83,11 @@ const $$cons = ( item, list ) => _assertList( list ) && [ item ].concat( list )
|
||||||
const $$car = xs => _assertPair( xs ) && xs[ 0 ];
|
const $$car = xs => _assertPair( xs ) && xs[ 0 ];
|
||||||
const $$cdr = xs => _assertPair( xs ) && xs.slice( 1 );
|
const $$cdr = xs => _assertPair( xs ) && xs.slice( 1 );
|
||||||
|
|
||||||
|
// warning: this should technically set the cdr to the next element, and
|
||||||
|
// should accept any number of arguments, but that's not what we're doing
|
||||||
|
// here (note that an empty list is not a pair and therefore has no cdr)
|
||||||
|
const $$append$b$ = ( dest, xs ) => ( dest.length === 0 ) ? [] : dest.push( xs );
|
||||||
|
|
||||||
// warning: blows up if any items are non-lists, whereas the proper RnRS
|
// warning: blows up if any items are non-lists, whereas the proper RnRS
|
||||||
// implementation will set the cdr to the final item even if it's not a pair
|
// implementation will set the cdr to the final item even if it's not a pair
|
||||||
function $$append()
|
function $$append()
|
||||||
|
@ -134,16 +139,18 @@ const $$map = ( f, ...xs ) =>
|
||||||
xs.map( x => x[ i ] ) ) );
|
xs.map( x => x[ i ] ) ) );
|
||||||
|
|
||||||
|
|
||||||
// Node.js stuff
|
|
||||||
const fs = require( 'fs' );
|
|
||||||
|
|
||||||
// stdin->string
|
|
||||||
const $$js$file$_$$g$string = ( path ) =>
|
|
||||||
fs.readFileSync( path ).toString();
|
|
||||||
|
|
||||||
const $$js$regexp = ( s, opts ) => new RegExp( s, opts );
|
const $$js$regexp = ( s, opts ) => new RegExp( s, opts );
|
||||||
const $$js$match = ( r, s ) => s.match( r ) || false;
|
const $$js$match = ( r, s ) => s.match( r ) || false;
|
||||||
const $$js$replace = ( r, repl, s ) => s.replace( r, repl );
|
const $$js$replace = ( r, repl, s ) => s.replace( r, repl );
|
||||||
|
|
||||||
|
|
||||||
|
const fs = ( typeof require !== 'undefined' )
|
||||||
|
? require( 'fs' )
|
||||||
|
: { readFileSync: path => window.fsdata[ path ] };
|
||||||
|
|
||||||
|
// stdin->string
|
||||||
|
const $$js$file$_$$g$string = ( path ) =>
|
||||||
|
fs.readFileSync( path ).toString();
|
||||||
|
|
||||||
|
|
||||||
/** =============== end of libprebirth =============== **/
|
/** =============== end of libprebirth =============== **/
|
||||||
|
|
|
@ -177,10 +177,13 @@ class Parser
|
||||||
* This is the lexer. Whitespace is ignored. The grammar consists of
|
* This is the lexer. Whitespace is ignored. The grammar consists of
|
||||||
* simple s-expressions.
|
* simple s-expressions.
|
||||||
*
|
*
|
||||||
* This function is mutually recursive with `#_token'. It expects that
|
* Tokens are produced with `#_token'. The source SRC will be
|
||||||
* the source SRC will be left-truncated as input is
|
* left-truncated as input is processed. POS exists for producing
|
||||||
* processed. POS exists for producing metadata for error
|
* metadata for error reporting---it has no impact on parsing.
|
||||||
* reporting---it has no impact on parsing.
|
*
|
||||||
|
* This implementation was originally recursive and immutable, but the
|
||||||
|
* stack was being exhausted, so it was refactored into an inferior
|
||||||
|
* implementation.
|
||||||
*
|
*
|
||||||
* @param {string} src source code
|
* @param {string} src source code
|
||||||
* @param {number} pos position (character offset) in source
|
* @param {number} pos position (character offset) in source
|
||||||
|
@ -189,63 +192,74 @@ class Parser
|
||||||
*/
|
*/
|
||||||
_lex( src, pos = 0 )
|
_lex( src, pos = 0 )
|
||||||
{
|
{
|
||||||
// ignore whitespace, if any
|
const toks = [];
|
||||||
const ws = src.match( /^\s+/ ) || [ "" ];
|
|
||||||
const trim = src.substr( ws[ 0 ].length );
|
|
||||||
|
|
||||||
// adjust position to account for any removed whitespace
|
// process until EOF (see break)
|
||||||
pos += ws[ 0 ].length;
|
while ( true ) {
|
||||||
|
// ignore whitespace, if any
|
||||||
|
const ws = src.match( /^\s+/ ) || [ "" ];
|
||||||
|
const trim = src.substr( ws[ 0 ].length );
|
||||||
|
|
||||||
// EOF and we're done
|
// adjust position to account for any removed whitespace
|
||||||
if ( trim === '' ) {
|
pos += ws[ 0 ].length;
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// comment until end of line
|
// EOF and we're done
|
||||||
if ( trim[ 0 ] === ';' ) {
|
if ( trim === '' ) {
|
||||||
const eol = trim.match( /^(.*?)(\n|$)/ );
|
break;
|
||||||
return this._token( 'comment', eol[ 1 ], trim, pos );
|
|
||||||
}
|
|
||||||
|
|
||||||
// left and right parenthesis are handled in the same manner: they
|
|
||||||
// produce distinct tokens with single-character lexemes
|
|
||||||
if ( trim[ 0 ] === '(' ) {
|
|
||||||
return this._token( 'open', '(', trim, pos );
|
|
||||||
}
|
|
||||||
if ( trim[ 0 ] === ')' ) {
|
|
||||||
return this._token( 'close', ')', trim, pos );
|
|
||||||
}
|
|
||||||
|
|
||||||
// strings are delimited by opening and closing ASCII double quotes,
|
|
||||||
// which can be escaped with a backslash
|
|
||||||
if ( trim[ 0 ] === '"' ) {
|
|
||||||
const str = trim.match( /^"(|.*?[^\\])"/ );
|
|
||||||
if ( !str ) {
|
|
||||||
this._error( src, pos, "missing closing string delimiter" );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// a string token consists of the entire string including quotes
|
let t = null;
|
||||||
// as its lexeme, but its value will be the value of the string
|
|
||||||
// without quotes due to the `str' match group (see `#_token')
|
// comment until end of line
|
||||||
return this._token( 'string', str, trim, pos );
|
if ( trim[ 0 ] === ';' ) {
|
||||||
|
const eol = trim.match( /^(.*?)(\n|$)/ );
|
||||||
|
t = this._token( 'comment', eol[ 1 ], trim, pos );
|
||||||
|
}
|
||||||
|
|
||||||
|
// left and right parenthesis are handled in the same manner: they
|
||||||
|
// produce distinct tokens with single-character lexemes
|
||||||
|
else if ( trim[ 0 ] === '(' ) {
|
||||||
|
t = this._token( 'open', '(', trim, pos );
|
||||||
|
}
|
||||||
|
else if ( trim[ 0 ] === ')' ) {
|
||||||
|
t = this._token( 'close', ')', trim, pos );
|
||||||
|
}
|
||||||
|
|
||||||
|
// strings are delimited by opening and closing ASCII double quotes,
|
||||||
|
// which can be escaped with a backslash
|
||||||
|
else if ( trim[ 0 ] === '"' ) {
|
||||||
|
const str = trim.match( /^"(|.*?[^\\])"/ );
|
||||||
|
if ( !str ) {
|
||||||
|
this._error( src, pos, "missing closing string delimiter" );
|
||||||
|
}
|
||||||
|
|
||||||
|
// a string token consists of the entire string including quotes
|
||||||
|
// as its lexeme, but its value will be the value of the string
|
||||||
|
// without quotes due to the `str' match group (see `#_token')
|
||||||
|
t = this._token( 'string', str, trim, pos );
|
||||||
|
}
|
||||||
|
|
||||||
|
// anything else is considered a symbol up until whitespace or any
|
||||||
|
// of the aforementioned delimiters
|
||||||
|
else {
|
||||||
|
const symbol = trim.match( /^[^\s()"]+/ );
|
||||||
|
t = this._token( 'symbol', symbol, trim, pos );
|
||||||
|
}
|
||||||
|
|
||||||
|
const [ tok, newsrc, newpos ] = t;
|
||||||
|
|
||||||
|
// add token, left-truncate src, update pos
|
||||||
|
toks.push( tok );
|
||||||
|
src = newsrc;
|
||||||
|
pos = newpos;
|
||||||
}
|
}
|
||||||
|
|
||||||
// anything else is considered a symbol up until whitespace or any
|
return toks;
|
||||||
// of the aforementioned delimiters
|
|
||||||
const symbol = trim.match( /^[^\s()"]+/ );
|
|
||||||
return this._token( 'symbol', symbol, trim, pos );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Produce a token and recurse
|
* Produce a token, left-truncate src, and update pos
|
||||||
*
|
|
||||||
* The token will be concatenated with the result of the mutually
|
|
||||||
* recursive method `_lex'.
|
|
||||||
*
|
|
||||||
* For the record: I'm not fond of mutual recursion from a clarity
|
|
||||||
* standpoint, but this is how the abstraction evolved to de-duplicate
|
|
||||||
* code, and I don't much feel like refactoring it.
|
|
||||||
*
|
*
|
||||||
* @param {string} type token type
|
* @param {string} type token type
|
||||||
* @param {string|Array} match lexeme match
|
* @param {string|Array} match lexeme match
|
||||||
|
@ -272,14 +286,13 @@ class Parser
|
||||||
pos: pos
|
pos: pos
|
||||||
};
|
};
|
||||||
|
|
||||||
// continue producing tokens by recursing, left-truncating the
|
// produce token, left-truncating the source string to discard what
|
||||||
// source string to discard what we have already processed
|
// we have already processed
|
||||||
return [ token ].concat(
|
return [
|
||||||
this._lex(
|
token,
|
||||||
src.substr( lexeme.length ),
|
src.substr( lexeme.length ),
|
||||||
( pos + lexeme.length )
|
( pos + lexeme.length ),
|
||||||
)
|
];
|
||||||
);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -431,7 +444,7 @@ class Compiler
|
||||||
*
|
*
|
||||||
* @return {string} compiled BODY
|
* @return {string} compiled BODY
|
||||||
*/
|
*/
|
||||||
_bodyToEs( body )
|
_bodyToEs( body, ret = true )
|
||||||
{
|
{
|
||||||
// the body must be an array of expressions (this should always be
|
// the body must be an array of expressions (this should always be
|
||||||
// the case unless we have a bug in the compiler)
|
// the case unless we have a bug in the compiler)
|
||||||
|
@ -447,8 +460,8 @@ class Compiler
|
||||||
// the final expression
|
// the final expression
|
||||||
return js.map( ( s, i ) =>
|
return js.map( ( s, i ) =>
|
||||||
{
|
{
|
||||||
const ret = ( i === ( js.length - 1 ) ) ? "return " : "";
|
const retstmt = ( ret && i === ( js.length - 1 ) ) ? "return " : "";
|
||||||
return ` ${ret}${s};`;
|
return ` ${retstmt}${s};`;
|
||||||
} ).join( '\n' );
|
} ).join( '\n' );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -543,6 +556,16 @@ class Compiler
|
||||||
const fnmap = {
|
const fnmap = {
|
||||||
'js:console': 'console.log',
|
'js:console': 'console.log',
|
||||||
|
|
||||||
|
// yes, there are more important things to do until we get to the point
|
||||||
|
// where it's worth implementing proper tail calls
|
||||||
|
'js:while': ( [ pred, ...body ], stoes, btoes ) =>
|
||||||
|
"(function(__whilebrk){" +
|
||||||
|
`while (${stoes(pred)}){\n` +
|
||||||
|
`${btoes(body, false)} if (__whilebrk) break;\n` +
|
||||||
|
"}\n" +
|
||||||
|
"})(false)",
|
||||||
|
'js:break': () => '__whilebrk=true',
|
||||||
|
|
||||||
// fortunately ES6+ has native symbol support :)
|
// fortunately ES6+ has native symbol support :)
|
||||||
// we don't (yet?) need list quoting in Prebirth
|
// we don't (yet?) need list quoting in Prebirth
|
||||||
'quote': ( x, stoes ) => Array.isArray( x.value )
|
'quote': ( x, stoes ) => Array.isArray( x.value )
|
||||||
|
@ -590,7 +613,7 @@ const fnmap = {
|
||||||
'let*': ( [ bindings, ...body ], stoes, btoes ) =>
|
'let*': ( [ bindings, ...body ], stoes, btoes ) =>
|
||||||
"(function(){\n" +
|
"(function(){\n" +
|
||||||
bindings
|
bindings
|
||||||
.map( ([ x, val ]) => ` const ${stoes(x)} = ${stoes(val)};\n` )
|
.map( ([ x, val ]) => ` let ${stoes(x)} = ${stoes(val)};\n` )
|
||||||
.join( '' ) +
|
.join( '' ) +
|
||||||
btoes( body ) + "\n" +
|
btoes( body ) + "\n" +
|
||||||
" })()",
|
" })()",
|
||||||
|
@ -624,7 +647,11 @@ const fnmap = {
|
||||||
btoes( exprs ) + "\n"
|
btoes( exprs ) + "\n"
|
||||||
).join( '' ) +
|
).join( '' ) +
|
||||||
"}" +
|
"}" +
|
||||||
"})()"
|
"})()",
|
||||||
|
|
||||||
|
// basic mutator (variable assignment)
|
||||||
|
'set!': ( [ varid, val ], stoes ) =>
|
||||||
|
`${stoes(varid)} = ${stoes(val)}`,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue