2017-08-21 02:20:03 -04:00
|
|
|
|
/**
|
|
|
|
|
* Bootstrap Gibble Lisp ("Prebirth")
|
|
|
|
|
*
|
|
|
|
|
* Copyright (C) 2017 Mike Gerwitz
|
|
|
|
|
*
|
|
|
|
|
* This file is part of Gibble.
|
|
|
|
|
*
|
|
|
|
|
* Gibble is free software: you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU Affero General Public License as
|
|
|
|
|
* published by the Free Software Foundation, either version 3 of the
|
|
|
|
|
* License, or (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
*
|
|
|
|
|
* THIS IS TEMPORARY CODE that will be REWRITTEN IN GIBBLE LISP ITSELF after
|
|
|
|
|
* a very basic bootstrap is complete. It is retained as an important
|
|
|
|
|
* artifact for those who wish to build Gibble from scratch without using
|
|
|
|
|
* another version of Gibble itself. This is called "self-hosting".
|
|
|
|
|
*
|
|
|
|
|
* Rather than producing a sophisticated self-hosting language, this
|
|
|
|
|
* language will be a terribly incomplete and inadequate version of what
|
|
|
|
|
* will ultimately become a formidable and competent language.
|
|
|
|
|
*
|
|
|
|
|
* I refer to this entire complication process as "Prebirth".¹ The "Birth"
|
|
|
|
|
* of Gibble is the act of reimplementing this Prebirth in a Prebirth
|
|
|
|
|
* version of Gibble Lisp itself. It's the chicken-and-egg paradox, without
|
|
|
|
|
* the paradox.²
|
|
|
|
|
*
|
|
|
|
|
* Gibble Lisp is _not_ the most primitive language that will be understood
|
|
|
|
|
* by the system---it is too high-level. After Birth, the language can
|
|
|
|
|
* devolve into something more powerful and workable.
|
|
|
|
|
*
|
|
|
|
|
* Some minor terminology:
|
|
|
|
|
* - AST: Abstract Syntax Tree, a processed form of the CST.
|
|
|
|
|
* - CST: Concrete Syntax Tree, a 1-1 conversion of source input to
|
|
|
|
|
* tokens.
|
|
|
|
|
* - token: an object produced by the lexer that represents a portion of
|
|
|
|
|
* the input language
|
|
|
|
|
* - lexer: sometimes called a ``tokenizer''---produces tokens by applying
|
|
|
|
|
* the grammar to a string of input.
|
|
|
|
|
* - grammar: a definition of the language (syntax).
|
|
|
|
|
* - lexeme: the portion of the original source string associated with a
|
|
|
|
|
* given token.
|
|
|
|
|
* - LL(0): Left-to-right, Leftmost derivation, 0 tokens lookahead
|
|
|
|
|
* - sexp: symbolic expression, (involving (lots (of (((parentheses))))))
|
|
|
|
|
*
|
|
|
|
|
* Excited? Great! My extemporaneous rambling is costing me more time than
|
|
|
|
|
* I spent making this damn thing! (No, really, it is.)
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
'use strict';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A very rudimentary (and extremely permissive) LL(0) Lisp parser
|
|
|
|
|
*
|
|
|
|
|
* This provides just enough to get by. It transforms lists into nested
|
|
|
|
|
* arrays of tokens with some very basic error checking (e.g. for proper
|
|
|
|
|
* nesting). This is not a general-purpose lisp parser.
|
|
|
|
|
*/
|
|
|
|
|
class Parser
|
|
|
|
|
{
|
|
|
|
|
/**
|
|
|
|
|
* Produce an AST from the given string SRC of sexps
|
|
|
|
|
*
|
|
|
|
|
* This is essentially the CST with whitespace removed. It first
|
|
|
|
|
* invokes the lexer to produce a token string from the input
|
|
|
|
|
* sexps SRC. From this, it verifies only proper nesting (that SRC does
|
|
|
|
|
* not close sexps too early and that EOF isn't reached before all sexps
|
|
|
|
|
* are closed) and produces an AST that is an isomorphism of the
|
|
|
|
|
* original sexps.
|
|
|
|
|
*
|
|
|
|
|
* @param {string} src input Lisp
|
|
|
|
|
*
|
|
|
|
|
* @throws {SyntaxError} on improper sexp nesting
|
|
|
|
|
*
|
|
|
|
|
* @return {Array} primitive abstract syntax tree of SRC
|
|
|
|
|
*/
|
|
|
|
|
parseLisp( src )
|
|
|
|
|
{
|
|
|
|
|
// token string from lexing
|
|
|
|
|
const toks = this._lex( src );
|
|
|
|
|
|
|
|
|
|
// perform a leftmost reduction on the token string
|
|
|
|
|
const [ depth, ast ] = toks.reduce( ( result, token ) =>
|
|
|
|
|
{
|
|
|
|
|
const [ depth, xs, stack ] = result;
|
|
|
|
|
const { type, pos } = token;
|
|
|
|
|
|
|
|
|
|
// there are very few token types to deal with (again, this is
|
|
|
|
|
// a very simple bootstrap lisp)
|
|
|
|
|
switch ( type )
|
|
|
|
|
{
|
2017-08-29 01:27:54 -04:00
|
|
|
|
// ignore comments
|
|
|
|
|
case 'comment':
|
|
|
|
|
return result;
|
|
|
|
|
|
2017-08-21 02:20:03 -04:00
|
|
|
|
// closing parenthesis (end of sexp)
|
|
|
|
|
case 'close':
|
|
|
|
|
if ( depth === 0 ) {
|
|
|
|
|
this._error(
|
2017-09-02 01:30:13 -04:00
|
|
|
|
src, pos, `unexpected closing parenthesis`
|
2017-08-21 02:20:03 -04:00
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// the sexp is complete; add to the AST, reduce depth
|
|
|
|
|
const top = stack.pop();
|
|
|
|
|
top.push( xs );
|
|
|
|
|
|
|
|
|
|
return [ ( depth - 1 ), top, stack ];
|
|
|
|
|
|
|
|
|
|
// opening parenthesis (start of sexp)
|
|
|
|
|
case 'open':
|
|
|
|
|
stack.push( xs );
|
|
|
|
|
return [ ( depth + 1 ), [], stack ];
|
|
|
|
|
|
|
|
|
|
// symbol or primitive; just copy the token in place
|
|
|
|
|
case 'string':
|
|
|
|
|
case 'symbol':
|
|
|
|
|
xs.push( token );
|
|
|
|
|
return [ depth, xs, stack ];
|
|
|
|
|
|
|
|
|
|
// should never happen unless there's a bug in the tokenizer
|
|
|
|
|
// or we forget a token type above
|
|
|
|
|
default:
|
2017-09-02 01:30:13 -04:00
|
|
|
|
this._error( src, pos, `unexpected token '${type}'` );
|
2017-08-21 02:20:03 -04:00
|
|
|
|
}
|
|
|
|
|
}, [ 0, [], [] ] );
|
|
|
|
|
|
|
|
|
|
// if we terminate at a non-zero depth, that means there
|
|
|
|
|
// are still open sexps
|
|
|
|
|
if ( depth > 0 ) {
|
|
|
|
|
throw SyntaxError(
|
2017-09-02 01:30:13 -04:00
|
|
|
|
`unexpected end of input at depth ${depth}`
|
2017-08-21 02:20:03 -04:00
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// the result is a set of tokens organized into ES arrays
|
|
|
|
|
// isomorphic to the original sexp structure (the same structure)
|
|
|
|
|
return ast;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Throw a SyntaxError with a window of surrounding source code
|
|
|
|
|
*
|
|
|
|
|
* The "window" is simply ten characters to the left and right of the
|
|
|
|
|
* first character of the source input SRC that resulted in the error.
|
|
|
|
|
* It's a little more than useless.
|
|
|
|
|
*
|
|
|
|
|
* @param {string} src source code (sexps)
|
|
|
|
|
* @param {number} pos position of error
|
|
|
|
|
* @param {string} msg error message
|
|
|
|
|
*
|
|
|
|
|
* @throws {SyntaxError}}
|
|
|
|
|
*
|
|
|
|
|
* @return {undefined}
|
|
|
|
|
*/
|
|
|
|
|
_error( src, pos, msg )
|
|
|
|
|
{
|
|
|
|
|
const window = src.substr( pos - 10, pos + 10 )
|
|
|
|
|
.replace( "\n", " " );
|
|
|
|
|
|
|
|
|
|
throw new SyntaxError( `${msg}: '${window}'` );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert source input into a string of tokens
|
|
|
|
|
*
|
|
|
|
|
* This is the lexer. Whitespace is ignored. The grammar consists of
|
|
|
|
|
* simple s-expressions.
|
|
|
|
|
*
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
* Tokens are produced with `#_token'. The source SRC will be
|
|
|
|
|
* left-truncated as input is processed. POS exists for producing
|
|
|
|
|
* metadata for error reporting---it has no impact on parsing.
|
|
|
|
|
*
|
|
|
|
|
* This implementation was originally recursive and immutable, but the
|
|
|
|
|
* stack was being exhausted, so it was refactored into an inferior
|
|
|
|
|
* implementation.
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*
|
|
|
|
|
* @param {string} src source code
|
|
|
|
|
* @param {number} pos position (character offset) in source
|
|
|
|
|
*
|
|
|
|
|
* @return {Array} string of tokens
|
|
|
|
|
*/
|
|
|
|
|
_lex( src, pos = 0 )
|
|
|
|
|
{
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
const toks = [];
|
2017-08-21 02:20:03 -04:00
|
|
|
|
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
// process until EOF (see break)
|
|
|
|
|
while ( true ) {
|
|
|
|
|
// ignore whitespace, if any
|
|
|
|
|
const ws = src.match( /^\s+/ ) || [ "" ];
|
|
|
|
|
const trim = src.substr( ws[ 0 ].length );
|
2017-08-21 02:20:03 -04:00
|
|
|
|
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
// adjust position to account for any removed whitespace
|
|
|
|
|
pos += ws[ 0 ].length;
|
2017-08-29 01:27:54 -04:00
|
|
|
|
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
// EOF and we're done
|
|
|
|
|
if ( trim === '' ) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2017-08-21 02:20:03 -04:00
|
|
|
|
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
let t = null;
|
2017-08-21 02:20:03 -04:00
|
|
|
|
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
// comment until end of line
|
|
|
|
|
if ( trim[ 0 ] === ';' ) {
|
|
|
|
|
const eol = trim.match( /^(.*?)(\n|$)/ );
|
|
|
|
|
t = this._token( 'comment', eol[ 1 ], trim, pos );
|
2017-08-21 02:20:03 -04:00
|
|
|
|
}
|
|
|
|
|
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
// left and right parenthesis are handled in the same manner: they
|
|
|
|
|
// produce distinct tokens with single-character lexemes
|
|
|
|
|
else if ( trim[ 0 ] === '(' ) {
|
|
|
|
|
t = this._token( 'open', '(', trim, pos );
|
|
|
|
|
}
|
|
|
|
|
else if ( trim[ 0 ] === ')' ) {
|
|
|
|
|
t = this._token( 'close', ')', trim, pos );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// strings are delimited by opening and closing ASCII double quotes,
|
|
|
|
|
// which can be escaped with a backslash
|
|
|
|
|
else if ( trim[ 0 ] === '"' ) {
|
|
|
|
|
const str = trim.match( /^"(|.*?[^\\])"/ );
|
|
|
|
|
if ( !str ) {
|
|
|
|
|
this._error( src, pos, "missing closing string delimiter" );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// a string token consists of the entire string including quotes
|
|
|
|
|
// as its lexeme, but its value will be the value of the string
|
|
|
|
|
// without quotes due to the `str' match group (see `#_token')
|
|
|
|
|
t = this._token( 'string', str, trim, pos );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// anything else is considered a symbol up until whitespace or any
|
|
|
|
|
// of the aforementioned delimiters
|
|
|
|
|
else {
|
|
|
|
|
const symbol = trim.match( /^[^\s()"]+/ );
|
|
|
|
|
t = this._token( 'symbol', symbol, trim, pos );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const [ tok, newsrc, newpos ] = t;
|
|
|
|
|
|
|
|
|
|
// add token, left-truncate src, update pos
|
|
|
|
|
toks.push( tok );
|
|
|
|
|
src = newsrc;
|
|
|
|
|
pos = newpos;
|
2017-08-21 02:20:03 -04:00
|
|
|
|
}
|
|
|
|
|
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
return toks;
|
2017-08-21 02:20:03 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
* Produce a token, left-truncate src, and update pos
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*
|
|
|
|
|
* @param {string} type token type
|
|
|
|
|
* @param {string|Array} match lexeme match
|
|
|
|
|
* @param {string} src source code string, left-truncated
|
|
|
|
|
* @param {number} pos offset relative to original src
|
|
|
|
|
*
|
|
|
|
|
* @return {Array} string of tokens
|
|
|
|
|
*/
|
|
|
|
|
_token( type, match, src, pos )
|
|
|
|
|
{
|
|
|
|
|
const parts = ( Array.isArray( match ) )
|
|
|
|
|
? match
|
|
|
|
|
: [ match ];
|
|
|
|
|
|
|
|
|
|
// the value is the first group of the match (indicating what we
|
|
|
|
|
// are actually interested in), and the lexeme is the full match,
|
|
|
|
|
// which might include, for example, string delimiters
|
|
|
|
|
const [ lexeme, value ] = parts;
|
|
|
|
|
|
|
|
|
|
const token = {
|
|
|
|
|
type: type,
|
|
|
|
|
lexeme: lexeme,
|
2017-08-30 00:20:54 -04:00
|
|
|
|
value: ( value === undefined ) ? lexeme : value,
|
2017-08-21 02:20:03 -04:00
|
|
|
|
pos: pos
|
|
|
|
|
};
|
|
|
|
|
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
// produce token, left-truncating the source string to discard what
|
|
|
|
|
// we have already processed
|
|
|
|
|
return [
|
|
|
|
|
token,
|
|
|
|
|
src.substr( lexeme.length ),
|
|
|
|
|
( pos + lexeme.length ),
|
|
|
|
|
];
|
2017-08-21 02:20:03 -04:00
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Dumb compiler to transform AST into ECMAScript
|
|
|
|
|
*
|
|
|
|
|
* This is a really dumb code generator: it takes the AST and essentially
|
2017-09-02 01:30:13 -04:00
|
|
|
|
* transforms it 1:1 wherever possible into the target language. There is
|
|
|
|
|
* no intermediate representation (e.g. an ES AST).
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*
|
|
|
|
|
* This is nothing like what we actually want the _ultimate_ compiler to do
|
|
|
|
|
* after Birth, but it gets us to a point where we can self-host on a basic
|
|
|
|
|
* Prebirth language and evolve from there.
|
|
|
|
|
*
|
|
|
|
|
* The code generation can be pretty much summed up by the last line of
|
|
|
|
|
* `Compiler#_cdfn'.
|
|
|
|
|
*/
|
|
|
|
|
class Compiler
|
|
|
|
|
{
|
2017-08-29 01:42:15 -04:00
|
|
|
|
/**
|
|
|
|
|
* Initialize with function map
|
|
|
|
|
*
|
|
|
|
|
* The function map will be used to map certain functions into other
|
|
|
|
|
* names or forms. For example, `js:console' may map to `console.log'
|
|
|
|
|
* and `if' to an `if' statement+expression.
|
|
|
|
|
*
|
|
|
|
|
* @param {Object} fnmap function map
|
|
|
|
|
*/
|
|
|
|
|
constructor( fnmap )
|
|
|
|
|
{
|
|
|
|
|
this._fnmap = fnmap;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2017-08-21 02:20:03 -04:00
|
|
|
|
/**
|
|
|
|
|
* Compile AST into ECMAScript
|
|
|
|
|
*
|
2017-08-28 00:45:34 -04:00
|
|
|
|
* Every function is mapped 1:1 to a function in ECMAScript. So, we
|
|
|
|
|
* just map all root children (which are expected to be Scheme-style
|
|
|
|
|
* shorthand function definitions) to functions.
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*
|
2017-08-28 00:45:34 -04:00
|
|
|
|
* @param {Array} tree root containing top-level function definitions
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*/
|
|
|
|
|
compile( tree )
|
|
|
|
|
{
|
|
|
|
|
// map every definition to a ES function definition and delimit them
|
|
|
|
|
// (for readability) by two newlines
|
2017-09-02 01:25:31 -04:00
|
|
|
|
return tree.map( this._sexpToEs.bind( this ) )
|
2017-09-21 13:37:16 -04:00
|
|
|
|
.join( "\n\n" );
|
2017-08-21 02:20:03 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2017-09-21 13:37:16 -04:00
|
|
|
|
* Compile procedure definition into a ES function definition
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*
|
2017-08-28 00:45:34 -04:00
|
|
|
|
* This will fail if the given token is not a `define'.
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*
|
|
|
|
|
* @param {Object} t token
|
|
|
|
|
*
|
2017-08-28 00:45:34 -04:00
|
|
|
|
* @return {string} compiled function definition
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*/
|
|
|
|
|
_cdfn( t )
|
|
|
|
|
{
|
2017-08-28 00:45:34 -04:00
|
|
|
|
// e.g. (define (foo ...) body)
|
|
|
|
|
const [ , [ { value: name }, ...params ], ...body ] = t;
|
2017-08-21 02:20:03 -04:00
|
|
|
|
|
2017-09-21 02:19:38 -04:00
|
|
|
|
const id = this._idFromName( name );
|
2017-08-28 00:45:34 -04:00
|
|
|
|
const paramjs = this._paramsToEs( params );
|
|
|
|
|
const bodyjs = this._bodyToEs( body );
|
2017-08-21 02:20:03 -04:00
|
|
|
|
|
2017-09-21 13:37:16 -04:00
|
|
|
|
// this is the final format---each procedure becomes its own function
|
2017-08-28 00:45:34 -04:00
|
|
|
|
// definition in ES
|
|
|
|
|
return `function ${id}(${paramjs})\n{\n${bodyjs}\n};`;
|
2017-08-21 02:39:09 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2017-08-28 00:45:34 -04:00
|
|
|
|
* Compile parameter list
|
2017-08-21 02:39:09 -04:00
|
|
|
|
*
|
2017-08-28 00:45:34 -04:00
|
|
|
|
* This simply takes the value of the symbol and outputs it (formatted),
|
|
|
|
|
* delimited by commas.
|
2017-08-21 02:39:09 -04:00
|
|
|
|
*
|
2017-08-28 00:45:34 -04:00
|
|
|
|
* @param {Array} args token parameter list
|
2017-08-21 02:39:09 -04:00
|
|
|
|
*
|
2017-08-28 00:45:34 -04:00
|
|
|
|
* @return {string} compiled parameter list
|
2017-08-21 02:39:09 -04:00
|
|
|
|
*/
|
2017-08-28 00:45:34 -04:00
|
|
|
|
_paramsToEs( args )
|
2017-08-21 02:39:09 -04:00
|
|
|
|
{
|
2017-08-28 00:45:34 -04:00
|
|
|
|
return args.map( ({ value: name }) => this._idFromName( name ) )
|
|
|
|
|
.join( ", " );
|
2017-08-21 02:20:03 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Generate ECMAScript-friendly name from the given id
|
|
|
|
|
*
|
2017-08-29 01:40:47 -04:00
|
|
|
|
* A subset of special characters that are acceptable in Scheme are
|
|
|
|
|
* converted in an identifiable manner; others are simply converted to
|
|
|
|
|
* `$' in a catch-all and therefore could result in conflicts and cannot
|
|
|
|
|
* be reliably distinguished from one-another. Remember: this is
|
|
|
|
|
* temporary code.
|
|
|
|
|
*
|
2017-09-21 02:19:38 -04:00
|
|
|
|
* @param {string} name source name
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*
|
|
|
|
|
* @return {string} ES-friendly identifier
|
|
|
|
|
*/
|
2017-09-21 02:19:38 -04:00
|
|
|
|
_idFromName( name )
|
2017-08-21 02:20:03 -04:00
|
|
|
|
{
|
2017-09-21 02:19:38 -04:00
|
|
|
|
if ( /^\d+$/.test( name ) ) {
|
|
|
|
|
return name;
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-29 01:40:47 -04:00
|
|
|
|
// just some common ones; will fall back to `$' below
|
|
|
|
|
const conv = {
|
|
|
|
|
'-': '$_$',
|
|
|
|
|
'?': '$7$',
|
|
|
|
|
'@': '$a$',
|
|
|
|
|
'!': '$b$',
|
|
|
|
|
'>': '$g$',
|
|
|
|
|
'#': '$h$',
|
|
|
|
|
'*': '$k$',
|
|
|
|
|
'<': '$l$',
|
|
|
|
|
'&': '$n$',
|
|
|
|
|
'%': '$o$',
|
|
|
|
|
'+': '$p$',
|
|
|
|
|
'=': '$q$',
|
|
|
|
|
'^': '$v$',
|
|
|
|
|
'/': '$w$',
|
|
|
|
|
'$': '$$',
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if ( name === undefined ) {
|
|
|
|
|
throw SyntaxError( "Missing identifier name" );
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-21 02:19:38 -04:00
|
|
|
|
return '$$' + name.replace( /[^a-zA-Z0-9_]/g, c => conv[ c ] || '$' );
|
2017-08-21 02:20:03 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Compile body s-expressions into ECMAScript
|
|
|
|
|
*
|
|
|
|
|
* This produces a 1:1 mapping of BODY s-expressions to ES statements,
|
|
|
|
|
* recursively. The heavy lifting is done by `#_sexpToEs'.
|
|
|
|
|
*
|
2017-08-28 00:45:34 -04:00
|
|
|
|
* @param {Array} body s-expressions representing function body
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*
|
|
|
|
|
* @return {string} compiled BODY
|
|
|
|
|
*/
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
_bodyToEs( body, ret = true )
|
2017-08-21 02:20:03 -04:00
|
|
|
|
{
|
|
|
|
|
// the body must be an array of expressions (this should always be
|
|
|
|
|
// the case unless we have a bug in the compiler)
|
|
|
|
|
if ( !Array.isArray( body ) ) {
|
|
|
|
|
throw Error( "body must be an Array" );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// process each s-expression in BODY
|
|
|
|
|
const js = body.map( this._sexpToEs.bind( this ) );
|
|
|
|
|
|
|
|
|
|
// the result (that is, an array of compiled s-expressions) is
|
|
|
|
|
// joined semicolon-delimited, with a `return' statement preceding
|
|
|
|
|
// the final expression
|
2017-08-29 23:46:00 -04:00
|
|
|
|
return js.map( ( s, i ) =>
|
2017-08-21 02:20:03 -04:00
|
|
|
|
{
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
const retstmt = ( ret && i === ( js.length - 1 ) ) ? "return " : "";
|
|
|
|
|
return ` ${retstmt}${s};`;
|
2017-08-29 23:46:00 -04:00
|
|
|
|
} ).join( '\n' );
|
2017-08-21 02:20:03 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert s-expression or scalar into ECMAScript
|
|
|
|
|
*
|
|
|
|
|
* T may be either an array of tokens or a primitive token (e.g. string,
|
|
|
|
|
* symbol). This method is applied recursively to T as needed if T is
|
|
|
|
|
* an array.
|
|
|
|
|
*
|
|
|
|
|
* @param {Array|Object} t tokens representing s-expressions/scalars
|
|
|
|
|
*
|
|
|
|
|
* @return {string} compiled s-expression/scalar
|
|
|
|
|
*/
|
|
|
|
|
_sexpToEs( t )
|
|
|
|
|
{
|
|
|
|
|
// just output symbols as identifiers as-is for now
|
|
|
|
|
if ( !Array.isArray( t ) ) {
|
|
|
|
|
switch ( t.type )
|
|
|
|
|
{
|
|
|
|
|
// strings are output as-is (note that we don't escape
|
|
|
|
|
// double quotes, because the method of escaping them is the
|
|
|
|
|
// same in Scheme as it is in ECMAScript---a backslash)
|
|
|
|
|
case 'string':
|
|
|
|
|
return `"${t.value}"`;
|
|
|
|
|
|
2017-08-28 00:45:34 -04:00
|
|
|
|
// symbols have the same concerns as function definitions: the
|
2017-08-21 02:20:03 -04:00
|
|
|
|
// identifiers generated need to be ES-friendly
|
|
|
|
|
case 'symbol':
|
|
|
|
|
return this._idFromName( t.value );
|
|
|
|
|
|
|
|
|
|
default:
|
2017-08-28 00:45:34 -04:00
|
|
|
|
throw Error( `Cannot compile unknown token \`${t.type}'` );
|
2017-08-21 02:20:03 -04:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-02 01:25:31 -04:00
|
|
|
|
if ( t[ 0 ].value === 'define' ) {
|
|
|
|
|
return this._cdfn( t );
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-28 00:45:34 -04:00
|
|
|
|
// simple function application (fn ...args)
|
|
|
|
|
const [ { value: fn }, ...args ] = t;
|
2017-08-21 02:20:03 -04:00
|
|
|
|
|
2017-08-29 01:42:15 -04:00
|
|
|
|
const mapentry = this._fnmap[ fn ];
|
|
|
|
|
|
|
|
|
|
// if the fnmap contains a function entry, then it will handle the
|
|
|
|
|
// remaining processing
|
|
|
|
|
if ( mapentry && ( typeof mapentry === 'function' ) ) {
|
|
|
|
|
return mapentry(
|
|
|
|
|
args,
|
|
|
|
|
this._sexpToEs.bind( this ),
|
|
|
|
|
this._bodyToEs.bind( this )
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-21 02:20:03 -04:00
|
|
|
|
// convert all remaining symbols (after the symbol representing the
|
|
|
|
|
// function application) into arguments by parsing their sexps or
|
2017-08-28 00:45:34 -04:00
|
|
|
|
// scalar values
|
2017-08-29 01:42:15 -04:00
|
|
|
|
const idfn = mapentry || this._idFromName( fn, true );
|
2017-08-28 00:45:34 -04:00
|
|
|
|
const argstr = args.map( arg => this._sexpToEs( arg ) ).join( ", " );
|
2017-08-21 02:20:03 -04:00
|
|
|
|
|
2017-08-28 00:45:34 -04:00
|
|
|
|
// final function application
|
2017-08-21 02:20:03 -04:00
|
|
|
|
return `${idfn}(${argstr})`;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2017-08-29 01:42:15 -04:00
|
|
|
|
/**
|
2017-09-21 13:37:16 -04:00
|
|
|
|
* Function/procedure aliases and special forms
|
2017-08-29 01:42:15 -04:00
|
|
|
|
*
|
2017-08-31 00:40:46 -04:00
|
|
|
|
* And here we have what is probably the most grotesque part of this
|
|
|
|
|
* file. Saved the best for last.
|
|
|
|
|
*
|
2017-08-29 01:42:15 -04:00
|
|
|
|
* This map allows for a steady transition---items can be removed as they
|
|
|
|
|
* are written in Prebirth Lisp. This should give us a sane (but still
|
|
|
|
|
* simple) environment with which we can start to self-host.
|
|
|
|
|
*
|
|
|
|
|
* String values are simple function aliases. Function values take over the
|
2017-08-31 00:40:46 -04:00
|
|
|
|
* compilation of that function and allow for defining special forms (in
|
|
|
|
|
* place of macro support). The first argument to the function is the list
|
|
|
|
|
* of raw arguments (not yet compiled); the second argument is
|
|
|
|
|
* `Compiler#_sexpToEs'; and the third is `Compiler#bodyToEs'.
|
2017-08-29 01:42:15 -04:00
|
|
|
|
*
|
2017-08-31 00:40:46 -04:00
|
|
|
|
* These are by no means meant to be solid implementations; notable
|
|
|
|
|
* deficiencies are documented, but don't expect this to work properly in
|
2017-09-21 13:37:16 -04:00
|
|
|
|
* every case. They will be replaced with proper R7RS implementations in the
|
2017-08-31 00:40:46 -04:00
|
|
|
|
* future (after Birth).
|
2017-08-29 01:42:15 -04:00
|
|
|
|
*
|
|
|
|
|
* @type {Object}
|
|
|
|
|
*/
|
|
|
|
|
const fnmap = {
|
|
|
|
|
'js:console': 'console.log',
|
|
|
|
|
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
// yes, there are more important things to do until we get to the point
|
|
|
|
|
// where it's worth implementing proper tail calls
|
|
|
|
|
'js:while': ( [ pred, ...body ], stoes, btoes ) =>
|
|
|
|
|
"(function(__whilebrk){" +
|
|
|
|
|
`while (${stoes(pred)}){\n` +
|
|
|
|
|
`${btoes(body, false)} if (__whilebrk) break;\n` +
|
|
|
|
|
"}\n" +
|
|
|
|
|
"})(false)",
|
|
|
|
|
'js:break': () => '__whilebrk=true',
|
|
|
|
|
|
2017-09-21 02:23:05 -04:00
|
|
|
|
// fortunately ES6+ has native symbol support :)
|
|
|
|
|
// we don't (yet?) need list quoting in Prebirth
|
|
|
|
|
'quote': ( x, stoes ) => Array.isArray( x.value )
|
|
|
|
|
? (function () { throw SyntaxError(
|
|
|
|
|
"quoting lists is not yet supported; sorry!" )
|
|
|
|
|
} )()
|
|
|
|
|
: `Symbol.for('${stoes(x)}')`,
|
|
|
|
|
|
2017-09-02 01:30:13 -04:00
|
|
|
|
'lambda': ( [ args, ...body ], stoes, btoes ) =>
|
|
|
|
|
"function(" + args.map( stoes ).join( ", " ) + "){\n" +
|
|
|
|
|
btoes( body ) +
|
|
|
|
|
"}",
|
|
|
|
|
|
2017-08-30 00:14:45 -04:00
|
|
|
|
// simple if statement with optional else, wrapped in a self-executing
|
|
|
|
|
// function to simplify code generation (e.g. returning an if)
|
2017-08-29 01:42:15 -04:00
|
|
|
|
'if': ( [ pred, t, f ], stoes ) =>
|
2017-08-30 00:14:45 -04:00
|
|
|
|
"(function(){" +
|
2017-08-31 00:48:18 -04:00
|
|
|
|
`if (_truep(${stoes(pred)})){return ${stoes(t)};}` +
|
2017-08-30 00:14:45 -04:00
|
|
|
|
( ( f === undefined ) ? '' : `else{return ${stoes(f)};}` ) +
|
|
|
|
|
"})()",
|
2017-08-29 01:42:15 -04:00
|
|
|
|
|
2017-08-31 00:40:46 -04:00
|
|
|
|
// and short-circuits, so we need to implement it as a special form
|
|
|
|
|
// rather than an alias
|
|
|
|
|
'and': ( args, stoes ) =>
|
2017-09-21 13:37:16 -04:00
|
|
|
|
"(function(__and){\n" +
|
2017-08-31 00:40:46 -04:00
|
|
|
|
args.map( ( expr, i ) =>
|
2017-09-21 13:37:16 -04:00
|
|
|
|
`__and = ${stoes(expr)}; ` +
|
|
|
|
|
`if (!_truep(__and)) return false;\n`
|
2017-08-31 00:40:46 -04:00
|
|
|
|
).join( '' ) +
|
2017-09-21 13:37:16 -04:00
|
|
|
|
`return __and;})()`,
|
2017-08-31 00:40:46 -04:00
|
|
|
|
|
|
|
|
|
// or short-circuits, so we need to implement it as a special form
|
|
|
|
|
// rather than an alias
|
|
|
|
|
'or': ( args, stoes ) =>
|
2017-09-21 13:37:16 -04:00
|
|
|
|
"(function(__or){\n" +
|
2017-08-31 00:40:46 -04:00
|
|
|
|
args.map( ( expr, i ) =>
|
2017-09-21 13:37:16 -04:00
|
|
|
|
`__or = ${stoes(expr)}; ` +
|
|
|
|
|
`if (_truep(__or)) return __or;\n`
|
2017-08-31 00:40:46 -04:00
|
|
|
|
).join( '' ) +
|
|
|
|
|
"return false;})()",
|
|
|
|
|
|
2017-08-29 01:42:15 -04:00
|
|
|
|
// (let ((binding val) ...) ...body), compiled as a self-executing
|
|
|
|
|
// function which allows us to easily represent the return value of the
|
|
|
|
|
// entire expression while maintaining local scope
|
|
|
|
|
'let*': ( [ bindings, ...body ], stoes, btoes ) =>
|
|
|
|
|
"(function(){\n" +
|
|
|
|
|
bindings
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
.map( ([ x, val ]) => ` let ${stoes(x)} = ${stoes(val)};\n` )
|
2017-08-29 01:42:15 -04:00
|
|
|
|
.join( '' ) +
|
|
|
|
|
btoes( body ) + "\n" +
|
|
|
|
|
" })()",
|
|
|
|
|
|
|
|
|
|
// similar to the above, but variables cannot reference one-another
|
|
|
|
|
'let': ( [ bindings, ...body ], stoes, btoes ) =>
|
|
|
|
|
"(function(" +
|
|
|
|
|
bindings.map( ([ x ]) => stoes( x ) ).join( ", " ) +
|
|
|
|
|
"){\n" +
|
|
|
|
|
btoes( body ) + "\n" +
|
|
|
|
|
"})(" +
|
|
|
|
|
bindings.map( ([ , val ]) => stoes( val ) ).join( ", " ) +
|
|
|
|
|
")",
|
2017-08-31 00:40:46 -04:00
|
|
|
|
|
|
|
|
|
// and here I thought Prebirth Lisp would be simple...but having `case'
|
|
|
|
|
// support really keeps things much more tidy, so here we are (note that
|
|
|
|
|
// it doesn't support the arrow form, nor does it support expressions as
|
|
|
|
|
// data)
|
|
|
|
|
'case': ( [ key, ...clauses ], stoes, btoes ) =>
|
|
|
|
|
"(function(){" +
|
|
|
|
|
`const _key=${stoes(key)};\n` +
|
|
|
|
|
"switch (_key){\n" +
|
|
|
|
|
clauses.map( ([ data, ...exprs ]) =>
|
|
|
|
|
// warning: doesn't support expressions as data!
|
|
|
|
|
( ( data.lexeme === "else" )
|
|
|
|
|
? "default:\n"
|
|
|
|
|
: data.map(
|
|
|
|
|
datum => `case ${stoes(datum)}:\n`
|
|
|
|
|
).join( '' )
|
|
|
|
|
) +
|
|
|
|
|
btoes( exprs ) + "\n"
|
|
|
|
|
).join( '' ) +
|
|
|
|
|
"}" +
|
birth,prebirth: Non-recursive lexing to prevent stack exhaustion
This needs to run in the browser too, where we have no control over stack
limits.
* build-aux/bootstrap/birth.scm
(lex): Non-recursive strategy (loop with mutable list).
(make-token): Update doc. Produce list of token, new string, and
position. Don't recurse.
(body->es): Add `ret' param. Only produce `return' statement if new param
is set.
(cdfn): Use it.
(fnmap)
[js:while, js:break]: Add forms.
[lambda, let, case]: Use new `body->es' `ret' param.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
(prebirth->ecmascript): Adjust libprebirth path to be relative to self.
* build-aux/bootstrap/libprebirth.js
($$append$b$): Add `append!' procedure.
($$js$regexp, $$js$match, $$js$replace): Move a few lines up.
(fs): Provide stub if `require' is not defined.
* build-aux/bootstrap/prebirth.js
(_lex): Non-recursive strategy (loop with array-appending).
(_token): No more mutual recursion with `#_lex'. Return new string
and position.
(_bodyToEs): Add `ret' param. Only produce `return' statement if new
param is set.
(fnmap) [js:while, js:break]: Add forms.
[let*]: Define JS variables in output using `let' instead of `const' to
permit mutating with new `set!' form. Use new `body->es' `ret' param.
[set!]: Add form.
2017-10-09 00:59:11 -04:00
|
|
|
|
"})()",
|
|
|
|
|
|
|
|
|
|
// basic mutator (variable assignment)
|
|
|
|
|
'set!': ( [ varid, val ], stoes ) =>
|
|
|
|
|
`${stoes(varid)} = ${stoes(val)}`,
|
2017-08-29 01:42:15 -04:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2017-10-22 01:17:16 -04:00
|
|
|
|
/**
|
|
|
|
|
* Facade to make compilation as easy as `Prebirth#compile'
|
|
|
|
|
*/
|
|
|
|
|
class Prebirth
|
|
|
|
|
{
|
|
|
|
|
/**
|
|
|
|
|
* Parse and compile SRC.
|
|
|
|
|
*
|
|
|
|
|
* The output will be wrapped in a self-executing function to restrict
|
|
|
|
|
* scope, and will be prefixed with LIB (which should be libprebirth.js).
|
|
|
|
|
*
|
|
|
|
|
* @param {string} src Prebirth Lisp source to compile
|
|
|
|
|
* @param {string} lib libprebirth.js
|
|
|
|
|
*
|
|
|
|
|
* @return {string} compiler output with trailing newline
|
|
|
|
|
*/
|
|
|
|
|
compile( src, lib )
|
|
|
|
|
{
|
|
|
|
|
const p = new Parser();
|
|
|
|
|
const c = new Compiler( fnmap );
|
|
|
|
|
|
|
|
|
|
const tree = p.parseLisp( src );
|
|
|
|
|
|
|
|
|
|
// output libprebirth and compiled output, wrapped in a self-executing
|
|
|
|
|
// function to limit scope
|
|
|
|
|
return "(function(){" +
|
|
|
|
|
lib + '\n\n' +
|
|
|
|
|
c.compile( tree ) +
|
|
|
|
|
"})();\n";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2017-08-21 02:20:03 -04:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Prebirth was originally intended to be run via the command line using
|
|
|
|
|
* Node.js. But it doesn't have to be. If you want, feel free to run it in
|
2017-10-22 01:17:16 -04:00
|
|
|
|
* your web browser; you'll just have to instantiate your own Prebirth.
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*/
|
|
|
|
|
( function ()
|
|
|
|
|
{
|
2017-10-22 01:17:16 -04:00
|
|
|
|
if ( typeof process === 'undefined' ) {
|
|
|
|
|
return;
|
2017-08-21 02:20:03 -04:00
|
|
|
|
}
|
|
|
|
|
|
2017-10-22 01:17:16 -04:00
|
|
|
|
const fs = require( 'fs' );
|
|
|
|
|
const src = fs.readFileSync( '/dev/stdin' ).toString();
|
|
|
|
|
const lib = fs.readFileSync( __dirname + '/libprebirth.js' ).toString();
|
2017-08-21 02:20:03 -04:00
|
|
|
|
|
2017-10-22 01:17:16 -04:00
|
|
|
|
const prebirth = new Prebirth();
|
2017-08-21 02:20:03 -04:00
|
|
|
|
|
2017-10-22 01:17:16 -04:00
|
|
|
|
// compile and output to stdout
|
|
|
|
|
process.stdout.write( prebirth.compile( src, lib ) );
|
2017-08-21 02:20:03 -04:00
|
|
|
|
} )();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Now that we have output, the next step is the hard part: rewriting this
|
|
|
|
|
* file in Prebirth Lisp. As I mentioned, this process is called
|
|
|
|
|
* "Birth". It's at this point that we have to decide on basic
|
|
|
|
|
* abstractions---we are starting from scratch. The initial implementation
|
|
|
|
|
* is therefore unlikely to be as concise and elegant as Prebirth
|
|
|
|
|
* itself---it will be refactored.
|
|
|
|
|
*
|
|
|
|
|
* Here is an example Hello, World!:
|
|
|
|
|
*
|
2017-08-28 00:45:34 -04:00
|
|
|
|
* (define (hello x)
|
|
|
|
|
* (js:console "Hello," x, "!"))
|
2017-08-21 02:20:03 -04:00
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
* ¹ This term should invoke visuals of an abstract being entering existence
|
|
|
|
|
* in some strange nonlinear-time² kind of way. If you thought of
|
|
|
|
|
* something less pleasant, well, I'm sorry you went through that.
|
|
|
|
|
*
|
|
|
|
|
* ² Because we're dealing with nonlinear time!¹ This would be some bizarre
|
|
|
|
|
* recursive footnote crap if it weren't for that.²
|
|
|
|
|
*/
|