diff --git a/README.md b/README.md index 0ca8302..9dd6749 100644 --- a/README.md +++ b/README.md @@ -10,11 +10,12 @@ of any kind. --> +**This project is very incomplete, has stalled, and is unlikely to be +completed.** + Ulambda Scheme (also abbreviated "Y Scheme") is a self-hosting Scheme. The primary compiler target is JavaScript. -There's a lot more to be said, but that story will evolve over time. - ``` \\ // \\\ \\ // \\\ @@ -23,3 +24,26 @@ There's a lot more to be said, but that story will evolve over time. \\\ /// \\\ /// ``` + +## Bootstrapping +Ulambda is designed to be bootstrappable by anyone in the environment in +which it is designed to run: the web browser. Alternatively, it can be +bootstrapped using Node.js. + +### Via Web Browser +Simply visit `bootstrap/bootstrap.html` in a modern browser and follow the +provided instructions. + +You can also view it at . + +### Via Node.js +``` +$ ./autogen.sh && ./configure && make +``` + + +## Current State +As mentioned above, this project has stalled. The current state of +development is summarized in the Viability Test Suite in +[`bootstrap/rebirth/test.scm`](./bootstrap/rebirth/test.scm). + diff --git a/bootstrap/Bootstrap.js b/bootstrap/Bootstrap.js index faf2194..6d3fdda 100644 --- a/bootstrap/Bootstrap.js +++ b/bootstrap/Bootstrap.js @@ -116,29 +116,32 @@ class Bootstrap */ _birth() { - return this._loadPaths( [ - [ "birth.scm", "Birth" ], - [ "libprebirth.js", "libprebirth" ], - ] ) - .then( ( [ scm, lib ] ) => + return this._loadPaths( + [ + [ "birth.scm", "Birth" ], + [ "libprebirth.js", "libprebirth" ], + ].concat( Bootstrap._rebirthDeps ) + ) + .then( files => { this._strout( 'prebirthDesc' ); - const preout = this._prebirth.compile( scm, lib ); + const preout = this._prebirth.compile( + files[ "birth.scm" ], + files[ "libprebirth.js" ], + ); - return [ preout, scm, lib ]; + return [ preout, files ]; } ) - .then( ( [ birthjs, scm, lib ] ) => + .then( ( [ birthjs, files ] ) => { this._strout( 'prebirthComplete', birthjs.length ); this._strout( 'birthCompiled' ); this._strout( 'birthSelfCompiling' ); - const birthf = this._makeCompiler( birthjs, { - "libprebirth.js": lib - } ); + const birthf = this._makeCompiler( birthjs, files ); - const [ e, birthout ] = birthf( scm ); + const [ e, birthout ] = birthf( files[ "birth.scm" ] ); if ( e ) { throw e; @@ -236,17 +239,10 @@ class Bootstrap */ _rebirth( birth ) { - return this._loadPaths( [ - [ "rebirth.scm", "Rebirth" ], - [ "rebirth/es.scm" ], - [ "rebirth/relibprebirth.scm" ], - [ "rebirth/macro.scm" ], - ] ).then( ( [ scm, es, relibprebirth, macro ] ) => - this._compileRebirth( birth, scm, { - "rebirth/es.scm": es, - "rebirth/relibprebirth.scm": relibprebirth, - "rebirth/macro.scm": macro, - } ) + return this._loadPaths( + [ [ "rebirth.scm", "Rebirth" ] ].concat( Bootstrap._rebirthDeps ) + ).then( files => + this._compileRebirth( birth, files[ "rebirth.scm" ], files ) ); } @@ -332,7 +328,7 @@ class Bootstrap return this._loadPaths( [ [ "rebirth/test.scm", "Rebirth Viability Test" ], ] ) - .then( ( [ scm ] ) => + .then( ( { "rebirth/test.scm": scm } ) => { this._strout( 'rebirthTestCompiling' ); @@ -372,7 +368,7 @@ class Bootstrap * * @param {Array} paths file paths * - * @return {Promise} resolved with file contents or failure + * @return {Promise} resolved with map of paths to associated data */ _loadPaths( paths ) { @@ -380,7 +376,10 @@ class Bootstrap paths.map( ( [ path, desc ] ) => this._loadPath( path, desc ) ) - ); + ).then( files => files.reduce( + ( files, [ path, data ] ) => ( ( files[ path ] = data ), files ), + {} + ) ); } @@ -396,7 +395,7 @@ class Bootstrap * @param {string} path file path * @param {string=} desc file description for logging * - * @return {Promise} promise of string file contents + * @return {Promise} promise of array including path and string data */ _loadPath( path, desc = "" ) { @@ -407,7 +406,7 @@ class Bootstrap { this._strout( 'loadedf', path, data.length ); - return data; + return [ path, data ]; } ); } @@ -505,6 +504,24 @@ class Bootstrap } +/** + * Rebirth file dependencies + * + * These are all the files (include)'d by Rebirth. This is unfortunately + * necessary because of how the filesystem abstraction works + * cross-environment, otherwise the browser would have to be blocking for + * synchronous I/O (which we do not want, as it hangs the browser). + * + * @type {Array>} + */ +Bootstrap._rebirthDeps = [ + [ "rebirth/es.scm" ], + [ "rebirth/relibprebirth.scm" ], + [ "rebirth/macro.scm" ], + [ "rebirth/compiler.scm" ], +]; + + /** * Output strings in an easily accessible map * diff --git a/bootstrap/birth.scm b/bootstrap/birth.scm index 33db85e..c95d979 100644 --- a/bootstrap/birth.scm +++ b/bootstrap/birth.scm @@ -30,13 +30,14 @@ ;;; ;;; This is the Prebirth Lisp implementation of the JavaScript Prebirth ;;; compiler, found in `prebirth.js'---that compiler can be used to compile -;;; this compiler, which can then be used to compile itself, completing the -;;; bootstrapping process. This process is termed "Birth", and the process -;;; is successful if the output of Birth compiling itself is byte-for-byte -;;; identical to the output of compiling Birth with Prebirth. +;;; this compiler, which can then be used to compile itself. This process +;;; is termed "Birth", and the process is successful if the output of Birth +;;; compiling itself is byte-for-byte identical to the output of compiling +;;; Birth with Prebirth. ;;; ;;; This is largely a 1:1 translation of `prebirth.js'. See that file for -;;; terminology. +;;; terminology. Minor differences do exist for convenience in +;;; transitioning to the next phase (such as `include' and `cond-expand'). ;;; ;;; Note that we're dealing with a small subset of Scheme here, so certain ;;; things might be done differently given a proper implementation. @@ -44,7 +45,7 @@ ;;; The next step after this is ``Rebirth'': both Prebirth and Birth require ;;; the manually written `libprebirth.js' to function. Rebirth will remove ;;; that completely, which bootstraps the runtime in its entirety. At that -;;; point, all development will be exclusively in Scheme and we can get on +;;; point, all development will be exclusively in a Lisp and we can get on ;;; with Ulambda Scheme. ;;; @@ -278,7 +279,7 @@ ;; Compile Birth Lisp AST into ECMAScript. ;; ;; The AST can be generated with `parse-lisp'. -(define (birth->ecmascript ast) +(define (birth->ecmascript ast libprebirth) ;; Generate ECMAScript-friendly name from the given id. ;; ;; A subset of special characters that are acceptable in Scheme are @@ -400,6 +401,10 @@ ""))) args))) + (("include") (birth->ecmascript + (parse-lisp + (es:file->string (token-value (car args)))))) + ;; yes, there are more important things to do until we get to the ;; point where it's worth implementing proper tail calls (("es:while") @@ -564,16 +569,26 @@ (cdr t) t))) + ;; compile AST + (join "\n\n" (map sexp->es ast))) + + +;; Compile Birth Lisp AST into an ECMAScript program. +;; +;; This compiles the AST into ECMAScript using `birth->ecmascript' and +;; then wraps it in a self-executing function to limit scope and create the +;; toplevel environment. +(define (birth->ecmascript-prog ast env-es) ;; output libprebirth and compiled output, wrapped in a self-executing ;; function to limit scope (string-append "(function(){" (es:file->string "libprebirth.js") "\n\n" - (join "\n\n" (map sexp->es ast)) + (birth->ecmascript ast) "})();")) ;; at this point, this program can parse itself and output a CST (sans ;; whitespace) -(es:console (birth->ecmascript +(es:console (birth->ecmascript-prog (parse-lisp (es:file->string "/dev/stdin")))) diff --git a/bootstrap/rebirth.scm b/bootstrap/rebirth.scm index 30fcbdd..0dc247f 100644 --- a/bootstrap/rebirth.scm +++ b/bootstrap/rebirth.scm @@ -38,7 +38,7 @@ ;;; completes the raw, self-hosting bootstrapping process. ;;; ;;; To continue with the creepy birthing puns, you can consider libprebirth -;; to be the umbilical cord. After Birth, it's still attached---here we +;;; to be the umbilical cord. After Birth, it's still attached---here we ;;; cut it. ;;; ;;; Of course, bootstrapping can't end there: we need a fully functioning @@ -49,12 +49,13 @@ ;;; Note that we're dealing with a small subset of Scheme here, so certain ;;; things might be done differently given a proper implementation. ;;; -;;; This is an exact copy of `birth.scm', modified to introduce additional -;;; features. This is important, since Birth is a 1:1 translation of the -;;; Prebirth compiler and needs to stay that way. This fork allows us to -;;; vary as much as we want from the initial implementation. See the commit -;;; history for this file for more information as to how it evolved (the -;;; first commit is the direct copy before actual code changes). +;;; This started as an exact copy of `birth.scm', modified to introduce +;;; additional features. This is important, since Birth is mostly a 1:1 +;;; translation of the Prebirth compiler and needs to stay that way. This +;;; fork allows us to vary as much as we want from the initial +;;; implementation. See the commit history for this file for more +;;; information as to how it evolved (the first commit is the direct copy +;;; before actual code changes). ;;; ;;; This file follows a narrative (from Birth to Reⁿbirth), but it's more of ;;; a choose-your-adventure-book-style narrative: order of certain @@ -68,805 +69,20 @@ -;; So, to begin, goto STEP 0! ----------------, -;; V -(cond-expand - (include - (include "rebirth/es.scm") ;; STEP 2 (start at STEP 0) <--, - (include "rebirth/relibprebirth.scm") ;; STEP 0 (start here) / - (include "rebirth/macro.scm"))) ;; STEP 1 (then go to STEP 2) -` +;; So, to begin, goto STEP 0! -------------, +;; V +(include "rebirth/es.scm") ;; STEP 2 (start at STEP 0) <--, +(include "rebirth/relibprebirth.scm") ;; STEP 0 (start here) / +(include "rebirth/macro.scm") ;; STEP 1 (then go to STEP 2) -` - -;; pair selection -(define (cadr xs) - (car (cdr xs))) -(define (caadr xs) - (car (car (cdr xs)))) -(define (caddr xs) - (car (cdr (cdr xs)))) -(define (cadddr xs) - (car (cdr (cdr (cdr xs))))) -(define (caddddr xs) - (car (cdr (cdr (cdr (cdr xs)))))) -(define (cddr xs) - (cdr (cdr xs))) - -(define (not x) - (if x #f #t)) - -;; for convenience -(define (es:match-regexp re s) - (es:match (es:regexp re) s)) - - - -;; Convert source input into a string of tokens. +;; The runtime is then initialized and we can proceed with defining the +;; compiler. ;; -;; This is the lexer. Whitespace is ignored. The grammar consists of -;; simple s-expressions. -;; -;; Tokens are produced with `make-token'. The source SRC will be -;; left-truncated as input is processed. POS exists for producing metadata -;; for error reporting---it has no impact on parsing. -;; -;; This implementation was originally recursive and immutable, but the stack -;; was being exhausted, so it was refactored into an inferior -;; implementation. Note the use of `es:while' and `es:break'---these are -;; quick fixes to the problem of stack exhaustion in browsers (where we have -;; no control over the stack limit); proper tail call support will come -;; later when we have a decent architecture in place. -;; -;; The result is a list of tokens. See `token' for the format. -(define (lex src pos) - (let ((toks (list))) - (es:while #t ; browser stack workaround - (let* ((ws (or (es:match-regexp "^\\s+" - src) - (list ""))) - (ws-len (string-length (car ws))) - (trim (substring src ws-len)) ; ignore whitespace, if any - (newpos (+ pos ws-len))) ; adj pos to account for removed ws - - (if (string=? "" trim) - (es:break) ; EOF and we're done - - ;; normally we'd use `string-ref' here, but then we'd have to - ;; implement Scheme characters, so let's keep this simple and keep - ;; with strings - (let* ((ch (substring trim 0 1)) - (t (case ch - ;; comments extend until the end of the line - ((";") (let ((eol (es:match-regexp "^(.*?)(\\n|$)" trim))) - (make-token "comment" (cadr eol) trim newpos))) - - ;; left and right parenthesis are handled in the same - ;; manner: they produce distinct tokens with - ;; single-character lexemes - (("(") (make-token "open" ch trim newpos)) - ((")") (make-token "close" ch trim newpos)) - - ;; strings are delimited by opening and closing ASCII - ;; double quotes, which can be escaped with a - ;; backslash - (("\"") (let ((str (es:match-regexp - "^\"(|(?:.|\\\n)*?[^\\\\])\"" - trim))) - (or str (parse-error - src pos - "missing closing string delimiter")) - ;; a string token consists of the entire - ;; string including quotes as its lexeme, - ;; but its value will be the value of the - ;; string without quotes due to the `str' - ;; match group (see `token') - (make-token "string" str trim newpos))) - - (else - ;; anything else is considered a symbol up until - ;; whitespace or any of the aforementioned - ;; delimiters - (let ((symbol (es:match-regexp "^[^\\s()\"]+" - trim))) - (make-token "symbol" symbol trim newpos)))))) - - ;; yikes---see notes in docblock with regards to why - ;; we're using mutators here - (set! toks (append toks (list (car t)))) - (set! src (cadr t)) - (set! pos (caddr t)))))) - toks)) - - - -;; Throw an error with a window of surrounding source code. -;; -;; The "window" is simply ten characters to the left and right of the -;; first character of the source input SRC that resulted in the error. -;; It's a little more than useless. -(define (parse-error src pos msg) - (let ((window (substring src (- pos 10) (+ pos 10)))) - (error (string-append msg " (pos " pos "): " window) - src))) - - - -;; Produce a token, left-truncate src, and update pos. -;; -;; Unlike the JS Prebirth implementation which uses a key/value object, -;; we're just using a simple list. -;; -;; The expected arguments are: the token type TYPE, the match group or -;; string MATCH, left-truncated source code SRC, and the position POS -;; relative to the original source code. -(define (make-token type match src pos) - (let* ((parts (if (list? match) match (list match match))) - (lexeme (car parts)) - ;; the value is the first group of the match (indicating what we - ;; are actually interested in), and the lexeme is the full match, - ;; which might include, for example, string delimiters - (value (or (and (pair? (cdr parts)) - (cadr parts)) - lexeme)) - (len (string-length lexeme))) - - ;; produce token and recurse on `lex', left-truncating the source - ;; string to discard what we have already processed - (list (list (quote token) type lexeme value pos) - (substring src len) - (+ pos len)))) - - -;; various accessor procedures for token lists (we're Prebirth Lisp here, -;; so no record support or anything fancy!) -(define (token? t) (and (pair? t) (symbol=? (quote token) (car t)))) -(define (token-type t) (cadr t)) -(define (token-lexeme t) (caddr t)) -(define (token-value t) (cadddr t)) -(define (token-pos t) (caddddr t)) - - - -;; Produce an AST from the given string SRC of sexps -;; -;; This is essentially the CST with whitespace removed. It first invokes -;; the lexer to produce a token string from the input sexps SRC. From this, -;; it verifies only proper nesting (that SRC does not close sexps too early -;; and that EOF isn't reached before all sexps are closed) and produces an -;; AST that is an isomorphism of the original sexps. -(define (parse-lisp src) - ;; accessor methods to make you and me less confused - (define (ast-depth ast) (car ast)) - (define (ast-tree ast) (cadr ast)) - (define (ast-stack ast) (caddr ast)) - - - ;; perform a leftmost reduction on the token string - (define (toks->ast toks) - (fold - (lambda (token result) - (let ((depth (ast-depth result)) - (xs (ast-tree result)) - (stack (ast-stack result)) - (type (token-type token)) - (pos (token-pos token))) - - ;; there are very few token types to deal with (again, this is a - ;; very simple bootstrap lisp) - (case type - ;; ignore comments - (("comment") result) - - ;; when beginning a new expression, place the expression - ;; currently being processed onto a stack, allocate a new list, - ;; and we'll continue processing into that new list - (("open") (list (+ depth 1) - (list) - (cons xs stack))) - - ;; once we reach the end of the expression, pop the parent off of - ;; the stack and append the new list to it - (("close") (if (zero? depth) - (parse-error src pos - "unexpected closing parenthesis") - (list (- depth 1) - (append (car stack) (list xs)) - (cdr stack)))) - - ;; strings and symbols (we cheat and just consider everything, - ;; including numbers and such, to be symbols) are just copied - ;; in place - (("string" "symbol") (list depth - (append xs (list token)) - stack)) - - ;; we should never encounter anything else unless there's a bug - ;; in the tokenizer or we forget a token type above - (else (parse-error - src pos (string-append - "unexpected token `" type "'")))))) - (list 0 (list) (list)) ; initial 0 depth; empty tree; expr stack - toks)) - - - ;; lex the input SRC and pass it to `toks->ast' to generate the AST; - ;; if the depth is non-zero after we're done, then we're unbalanced. - (let* ((toks (lex src 0)) - (ast (toks->ast toks))) - (if (zero? (ast-depth ast)) - (ast-tree ast) - ;; if we terminate at a non-zero depth, that means there ar still - ;; open sexps - (error (string-append - "unexpected end of input at depth " - (ast-depth ast)))))) - - -;; Generate ECMAScript-friendly parameter name for the given token T. -;; -;; The generated name will not have any environment references and is -;; suitable only for the immediate scope. -(define (tparam->es t) - (tname->id (token-value t))) - - -;; Predicate determining whether NAME should be output verbatim as an -;; ECMAScript identifier. -;; -;; This only returns #t for numbers. -(define (tname-verbatim? name) - (es:match (es:regexp "^-?(\\d+(\\.\\d+)?|\\.\\d+)$") name)) - - -;; Generate ECMAScript to reference the variable associated with the token T -;; in the current environment. -;; -;; The "current" environment is relative to whatever context into which the -;; caller places this generated code---that is, the environment is -;; resolved by the runtime environment. -;; -;; If macro support is not yet compiled in, then this returns the identifier -;; name _without_ the environment, just as Birth. -(define (env-ref t) - (let ((name (if (token? t) - (token-value t) - t))) - (if (tname-verbatim? name) - name - (cond-expand - (cdfn-macro - (string-append "_env." (tname->id name))) - (else - (tname->id name)))))) - - -;; Generate ECMAScript-friendly name from the given id. -;; -;; A subset of special characters that are acceptable in Scheme are -;; converted in an identifiable manner; others are simply converted to `$' -;; in a catch-all and therefore could result in conflicts and cannot be -;; reliably distinguished from one-another. Remember: this is temporary -;; code. -(define (tname->id name) - (if (tname-verbatim? name) - name - (string-append - "$$" (es:replace (es:regexp "[^a-zA-Z0-9_]" "g") - (lambda (c) - (case c - (("-") "$_$") - (("?") "$7$") - (("@") "$a$") - (("!") "$b$") - ((">") "$g$") - (("#") "$h$") - (("*") "$k$") - (("<") "$l$") - (("&") "$n$") - (("%") "$o$") - (("+") "$p$") - (("=") "$q$") - (("^") "$v$") - (("/") "$w$") - (("$") "$$") - (else "$"))) - name)))) - -;; Join a list of strings XS on a delimiter DELIM -(define (join delim xs) - (if (pair? xs) - (fold (lambda (x str) - (string-append str delim x)) - (car xs) - (cdr xs)) - "")) - - -;; Compile parameter list. -;; -;; This takes the value of the symbol and outputs it (formatted), delimited -;; by commas. -;; -;; Since we do not support actual pairs (yet), the "." syntax that normally -;; denotes the cdr is retained and presents itself here. The form "(arg1, -;; arg2 . rest)" creates a list `rest' containing all remaining arguments -;; after that point. Conveniently, ECMAScript Harmony supports this -;; natively with the "..." syntax. -(define (params->es params) - (define (%param-conv params) - (let* ((param (car params)) - (name (token-value param)) - (id (tname->id name)) - (rest (cdr params))) - (if (string=? name ".") - (list (string-append - "..." (car (%param-conv rest)))) - (if (pair? rest) - (cons id (%param-conv rest)) - (list id))))) - - (if (pair? params) - (join "," (%param-conv params)) - "")) - - -;; Compile body s-expressions into ECMAScript -;; -;; This produces a 1:1 mapping of body XS s-expressions to ES statements, -;; recursively. The heavy lifting is done by `sexp->es'. -(define (body->es xs ret) - ;; recursively process body XS until we're out of pairs - (if (not (pair? xs)) - "" - (let* ((x (car xs)) - (rest (cdr xs)) - (more? (or (not ret) (pair? rest)))) - ;; the result is a semicolon-delimited string of statements, with - ;; the final statement prefixed with `return' unless (not ret) - (string-append - " " - (if more? "" "return ") ; prefix with `return' if last body exp - (sexp->es x) ";" ; process current body expression - (if (pair? rest) "\n" "") - (body->es rest ret))))) ; recurse - - -;; Place parameters PARAMS into the current environment. -;; -;; This is ugly so that Rebirth can support multiple implementations at -;; once---those with environment support and those without. -(define (env-params params) - (join "\n" - (map (lambda (param) - (if (string=? (token-value param) ".") - "" ; next param is the cdr - (string-append (env-ref param) " = " - (tparam->es param) ";"))) - params))) - - -;; Compile variable or procedure definition into ES -;; -;; This performs a crude check to determine whether a procedure definition -;; was supplied: if the cadr of the given token T is itself token, then it -;; is considered to be a variable. -(define (cdfn t) - (if (token? (cadr t)) - (cdfn-var t) ;; (define foo ...) - (cdfn-proc t #f))) ;; (define (foo ...) ...) - - -;; Compile variable definition into ES -;; -;; This compiles the token T into a simple let-assignment. -(define (cdfn-var t) - (let* ((dfn (cadr t)) - (id (tname->id (token-value dfn))) - (value (sexp->es (caddr t)))) - (string-append "let " id "=" value ";_env." id " = " id))) - - -;; Compile procedure definition into an ES function definition -;; -;; This will fail if the given token is not a `define'. -;; -;; The output does something peculiar: it not only assigns to the active -;; scope, but also to the root of the environment, which has the effect of -;; making the procedure available to _everything_. The reason for this is a -;; kluge to make procedures available to macros during compilation without -;; having to wait for a rebirth repass. But this does have its issues and -;; it's important to understand that this is a temporary solution until -;; Ulambda has some level of static analysis. -;; -;; Care needs to be taken to make sure, regardless of scope, procedures of -;; the same name are not defined if used within macros, otherwise the latter -;; (again, regardless of scope) in the file will take precedence. This -;; behavior will not be observed by execution of compiled code, though, -;; because the scope will have the correct version. However, if a procedure -;; is _not_ in scope, then rather than being undefined, the one assigned to -;; root would be available. -;; -;; The generated ECMAScript is evaluated immediately to make it available to -;; macros during the compilation process. -(define (cdfn-proc t id-override) - ;; e.g. (define (foo ...) body) - (let* ((dfn (cadr t)) - (id (or id-override - (tname->id (token-value (car dfn))))) - (named? (not (string=? id ""))) - (params (cdr dfn)) - (fparams (params->es params)) - (fenv (env-params params)) - (body (body->es (cddr t) #t))) - ;; this is the final format---each procedure becomes its own function - ;; definition in ES - (let ((es (string-append - "function " id "(" fparams ")\n{" - "return (function(_env){\n" fenv "\n" - body - "\n})(Object.create(_env));}" - (if named? - (string-append ";_env." id " = " id - ";_env.root." id " = " id ";") - "")))) - ;; Immediately evaluate to make available to macros during - ;; compilation. See procedure notes above. - (cond-expand - (string->es - (if named? (string->es "eval($$es)")))) - es))) - - -;; Quote an expression -;; -;; If SEXP is a token, produce an ECMAScript Symbol. Otherwise, -;; recursively apply to each element in the list. -;; -;; TODO: This implementation isn't wholly correct---numbers, for example, -;; should not be converted to symbols, as they already are one. -(define (quote-sexp sexp) - (if (token? sexp) - (case (token-type sexp) - (("string") (sexp->es sexp)) - (else - (string-append "Symbol.for('" (token-value sexp) "')"))) - (string-append - "[" (join "," (map quote-sexp sexp)) "]"))) - - -;; Quasiquote an expression -;; -;; A quasiquoted expression acts just like a quoted expression with one -;; notable exception---quoting can be escaped using special forms. For -;; example, each of these are equivalent: -;; -;; (quasiquote (a 1 2 (unquote (eq? 3 4)))) -;; (list (quote a) 1 2 (eq? 3 4)) -;; (quasiquote (a (unquote-splicing (list 1 2)) (unquote (eq? 3 4)))) -;; -;; TODO/WARNING: Normally "(quasiquote a (unquote-splicing b))" would -;; produce "(a . b)" in a proper Lisp, but we do not yet support proper -;; pairs at the time that this procedure was written; all cdrs are assumed -;; to be lists. So do not do that---always splice lists. -(define (quasiquote-sexp sexp) - ;; get type of token at car of pair, unless not a pair - (define (%sexp-maybe-type sexp) - (and (pair? sexp) - (token? (car sexp)) - (token-value (car sexp)))) - - ;; recursively process the sexp, handling various types of unquoting - (define (%quote-maybe sexp delim) - (if (pair? sexp) - (let* ((item (car sexp)) - (rest (cdr sexp)) - (type (%sexp-maybe-type item)) - (add-delim (not (or (string=? type "unquote-splicing") - (string=? type "unquote@"))))) - (string-append - (case type - ;; escape quoting, nest within - (("unquote") - (string-append (if delim "," "") - (sexp->es (cadr item)))) - - ;; escape quoting, splice list into parent expression - ;; (lazy kluge warning), along with an alias for brevity - ;; given that we lack the ",@" syntax right now - (("unquote-splicing" "unquote@") - (string-append - "]).concat(" (sexp->es (cadr item)) ").concat([")) - - ;; anything else, we're still quasiquoting recursively - (else (string-append (if delim "," "") - (quasiquote-sexp item)))) - - ;; continue processing this list - (%quote-maybe rest add-delim))) - "")) - - ;; tokens fall back to normal quoting - (if (token? sexp) - (quote-sexp sexp) - (string-append - "([" (%quote-maybe sexp #f) "])"))) - - -;; Statically expand expressions based on implementation features -;; -;; Support for `cond-expand' allows Rebirth to introduce new features each -;; time that it is compiled. If matched, expressions will be evaluated as -;; if they were entered in place of the `cond-expand' itself; otherwise, -;; the entire `cond-expand' expression as a whole will be discarded. -;; -;; Birth will always discard `cond-expand' expressions unless they contain -;; an `else' clause, which permits us to compile on the first pass without -;; error. -(define (expand-cond-expand args) - (if (pair? args) - (let* ((clause (car args)) - (feature (token-value (car clause))) - (body (cdr clause))) - ;; now we get meta - (cond-expand - (string->es - (case feature - (("string->es" "include" "else") (body->es body #f)) - (else (if (es:defined? feature) - (body->es body #f) - (expand-cond-expand (cdr args)))))) - ;; if we're not yet compiled with Rebirth, then string->es will - ;; not yet be available---but it _will_ be in Rebirth, so - ;; compile cond-expand such that it marks it as supported - (else - (case feature - ;; these two are always supported in Rebirth Lisp - (("string->es" "include" "else") (body->es body #f)) - ;; keep recursing until we find something (this allows us to - ;; short-circuit, most notably with "else") - (else - (expand-cond-expand (cdr args))))))) - "")) - - -;; Determine whether the given name NAME represents a macro. -;; -;; If `string->es' is not yet supported, then this procedure always -;; yields `#f'. Otherwise, the compiler runtime `_env.macros' is consulted. -;; -;; See `cdfn-macro' for more information. -(define (macro? name) - (cond-expand - (string->es - (string->es "_env.macros[$$name] !== undefined")) - (else #f))) - - -;; Determine if FN is a procedure or macro and apply it accordingly with -;; arguments ARGS. -;; -;; These actions represent two separate environments: If a macro, then the -;; call needs to be executed immediately within the context of the compiler -;; runtime. Otherwise, procedure applications are simply compiled to be -;; produced with the rest of the compiler output and will be run at a later -;; time within the context of the compiled program. -(define (apply-proc-or-macro fn args) - (if (macro? fn) - (string->es "_env.macros[$$fn].apply(null,$$args)") - ;; Procedures are produced as part of the compiler output. - (let ((argstr (join ", " (map sexp->es args)))) - (string-append (env-ref fn) "(" argstr ")")))) - - -;; Primitive special forms. -;; -;; These are forms that cannot be re-written as macros because of -;; chicken-and-egg issues. Since the Rebirth compiler is temporary, we're -;; not going to worry about getting rid of the rest of these. -;; -;; String values are simple function aliases. Function values take over -;; the compilation of that function and allow for defining special forms -;; (in place of macro support). The first argument FN is the name of the -;; function/procedure/form, and ARGS is the list of arguments. -(define (fnmap fn args t) - (case fn - ;; output raw code into the compiled ECMAScript (what could go wrong?) - (("string->es") - (token-value (car args))) - - ;; very primitive cond-expand - (("cond-expand") (expand-cond-expand args)) - - ;; Note that the unquote forms are only valid within a quasiquote; see - ;; that procedure for the handling of those forms. Since we do not - ;; support the special prefix form, we also offer "`quote" as a - ;; shorthand for quasiquote. - (("quote") (quote-sexp (car args))) - (("quasiquote" "`quote") (quasiquote-sexp (car args))) - - (("define") (cdfn t)) - (("define-macro") (cdfn-macro t)) ; not defined until string->es cond - - ;; Defining `include' is trivial right now since we're not doing any - ;; sort of static analysis---we just need to start compilation of the - ;; requested file and inline it right where we are. Note that this - ;; doesn't enclose the file in `begin', so this isn't yet proper. - (("include") (rebirth->ecmascript - (parse-lisp - (es:file->string (token-value (car args)))))) - - ;; If we have macro support (`cdfn-macro'), then assume that they exist - ;; and try to use them; otherwise, continue to use built-in forms, which - ;; have been moved into `fnmap-premacro'). - (else - (cond-expand - (cdfn-macro (apply-proc-or-macro fn args)) - (else (fnmap-premacro fn args t)))))) - - -;; Special forms to be removed on future Rebirth pass in favor of macros -;; -;; See Step 2 above for the replacement macro definitions. -(cond-expand - (cdfn-macro) ; our cond-expand does not support `else' - (else - (define (fnmap-premacro fn args t) - (case fn - (("es:console") - (string-append "console.log(" (map sexp->es args) ")")) - (("es:error") - (string-append "console.error(" (map sexp->es args) ")")) - - ;; yes, there are more important things to do until we get to the - ;; point where it's worth implementing proper tail calls - (("es:while") - (let ((pred (car args)) - (body (cdr args))) - (string-append - "(function(__whilebrk){" - "while (" (sexp->es pred) "){\n" - (body->es body #f) " if (__whilebrk) break;\n" - "}\n" - "})(false)"))) - (("es:break") "__whilebrk=true") - - (("lambda") - (let ((fnargs (car args)) - (body (cdr args))) - (string-append - "function(" (join ", " (map sexp->es fnargs)) "){\n" - (body->es body #t) - "}"))) - - ;; simple if statement with optional else, wrapped in a self-executing - ;; function to simplify code generation (e.g. returning an if) - (("if") - (let ((pred (car args)) - (t (cadr args)) - (f (and (pair? (cddr args)) - (caddr args)))) - (string-append - "(function(){" - "if (_truep(" (sexp->es pred) ")){return " (sexp->es t) ";}" - (or (and (pair? f) - (string-append "else{return " (sexp->es f) ";}")) - "") - "})()"))) - - ;; and short-circuits, so we need to implement it as a special form - ;; rather than an alias - (("and") - (string-append - "(function(__and){\n" - (join "" (map (lambda (expr) - (string-append - "__and = " (sexp->es expr) "; " - "if (!_truep(__and)) return false;\n")) - args)) - "return __and;})()")) - - ;; or short-circuits, so we need to implement it as a special form - ;; rather than an alias - (("or") - (string-append - "(function(__or){\n" - (join "" (map (lambda (expr) - (string-append - "__or = " (sexp->es expr) "; " - "if (_truep(__or)) return __or;\n")) - args)) - "return false;})()")) - - ;; (let ((binding val) ...) ...body), compiled as a self-executing - ;; function which allows us to easily represent the return value of - ;; the entire expression while maintaining local scope. - (("let*") - (let ((bindings (car args)) - (body (cdr args))) - (string-append - "(function(){\n" - (join "" (map (lambda (binding) - (string-append - " let " (sexp->es (car binding)) - " = " (sexp->es (cadr binding)) ";\n")) - bindings)) - (body->es body #t) "\n" - " })()"))) - - ;; similar to the above, but variables cannot reference one-another - (("let") - (let* ((bindings (car args)) - (body (cdr args)) - (fparams (join ", " (map sexp->es - (map car bindings)))) - (fargs (join ", " (map sexp->es - (map cadr bindings))))) - (string-append "(function(" fparams "){\n" - (body->es body #t) "\n" - "})(" fargs ")"))) - - ;; and here I thought Prebirth Lisp would be simple...but having - ;; `case' support really keeps things much more tidy, so here we are - ;; (note that it doesn't support the arrow form, nor does it support - ;; expressions as data) - (("case") - (let ((key (car args)) - (clauses (cdr args))) - (string-append - "(function(){const _key=" (sexp->es key) ";\n" - "switch (_key){\n" - (join "" - (map (lambda (data exprs) - (string-append - (if (and (token? data) - (string=? "else" (token-lexeme data))) - "default:\n" - (join "" - (map (lambda (datum) - (string-append - "case " (sexp->es datum) ":\n")) - data))) - (body->es exprs #t) "\n")) - (map car clauses) - (map cdr clauses))) - "}})()"))) - - (("set!") - (let ((varid (car args)) - (val (cadr args))) - (string-append (sexp->es varid) " = " (sexp->es val)))) - - ;; procedure or macro - (else (apply-proc-or-macro fn args)))))) - - -;; Convert s-expressions or scalar into ECMAScript -;; -;; T may be either an array of tokens or a primitive token (e.g. string, -;; symbol). This procedure is applied recursively to T as needed if T is -;; a list. -(define (sexp->es t) - (if (not (list? t)) - (error "unexpected non-list for sexp->es token")) - - (if (token? t) - (case (token-type t) - ;; strings output as-is (note that we don't escape double quotes, - ;; because the method of escaping them is the same in Scheme as it - ;; is in ECMAScript---a backslash) - (("string") (string-append "\"" (token-value t) "\"")) - - ;; symbols have the same concerns as procedure definitions: the - ;; identifiers generated need to be ES-friendly - (("symbol") (env-ref t)) - - (else (error - (string-append - "cannot compile unknown token `" (token-type t) "'")))) - - ;; otherwise, process the expression - (fnmap (token-value (car t)) - (cdr t) - t))) +;; Most of the compiler has been extracted into a separate file so that it +;; can be used by other programs; this is necessary because the macro system +;; uses those definitions, so naturally any program using macros will +;; require that the compiler definitions be imported. +(include "rebirth/compiler.scm") ;; Compile Rebirth Lisp AST into ECMAScript. @@ -887,15 +103,6 @@ (rebirth->ecmascript ast) "})(" env-es ");")) -;; An empty environment. -;; -;; This holds a reference to itself as `root' so that we can access the top -;; of the prototype chain easily. The reason for this is a kluge to give -;; macros access to procedures as they are defined (without having to wait -;; until the execution of a new version of rebirth). See `cdfn-proc'. -(define (es:empty-env) - "(function(){let o = {macros:{}}; o.root = o; return o;})()") - ;; at this point, this program can parse itself and output a CST (sans ;; whitespace) diff --git a/bootstrap/rebirth/compiler.scm b/bootstrap/rebirth/compiler.scm new file mode 100644 index 0000000..ffe79d4 --- /dev/null +++ b/bootstrap/rebirth/compiler.scm @@ -0,0 +1,676 @@ +;;;; ECMAScript compiler for Rebirth Lisp +;;;; +;;;; Copyright (C) 2017, 2018 Mike Gerwitz +;;;; +;;;; This file is part of Ulambda Scheme. +;;;; +;;;; Ulambda Scheme is free software: you can redistribute it and/or modify +;;;; it under the terms of the GNU Affero General Public License as +;;;; published by the Free Software Foundation, either version 3 of the +;;;; License, or (at your option) any later version. +;;;; +;;;; This program is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU Affero General Public License +;;;; along with this program. If not, see . +;;;; + + + +;;; Commentary: + +;;; THIS IS BOOTSTRAP CODE INTENDED FOR USE ONLY IN REBIRTH. +;;; +;;; +;;; This defines the lexer, parser, and core of the compiler. Note that +;;; most of the actual compiling happens with the macros defined in +;;; `es.scm'. +;;; +;;; The core structure persists from Birth, along with much of the original +;;; code. Certain parts have been simplified or moved into other files, +;;; while other parts have been extended. +;;; + +;;; Code: + + + +;; Convert source input into a string of tokens. +;; +;; This is the lexer. Whitespace is ignored. The grammar consists of +;; simple s-expressions. +;; +;; Tokens are produced with `make-token'. The source SRC will be +;; left-truncated as input is processed. POS exists for producing metadata +;; for error reporting---it has no impact on parsing. +;; +;; This implementation was originally recursive and immutable, but the stack +;; was being exhausted, so it was refactored into an inferior +;; implementation. Note the use of `es:while' and `es:break'---these are +;; quick fixes to the problem of stack exhaustion in browsers (where we have +;; no control over the stack limit); proper tail call support will come +;; later when we have a decent architecture in place. +;; +;; The result is a list of tokens. See `token' for the format. +(define (lex src pos) + (define (es:match-regexp re s) + (es:match (es:regexp re) s)) + + (let ((toks (list))) + (es:while #t ; browser stack workaround + (let* ((ws (or (es:match-regexp "^\\s+" + src) + (list ""))) + (ws-len (string-length (car ws))) + (trim (substring src ws-len)) ; ignore whitespace, if any + (newpos (+ pos ws-len))) ; adj pos to account for removed ws + + (if (string=? "" trim) + (es:break) ; EOF and we're done + + ;; normally we'd use `string-ref' here, but then we'd have to + ;; implement Scheme characters, so let's keep this simple and keep + ;; with strings + (let* ((ch (substring trim 0 1)) + (t (case ch + ;; comments extend until the end of the line + ((";") (let ((eol (es:match-regexp "^(.*?)(\\n|$)" trim))) + (make-token "comment" (cadr eol) trim newpos))) + + ;; left and right parenthesis are handled in the same + ;; manner: they produce distinct tokens with + ;; single-character lexemes + (("(") (make-token "open" ch trim newpos)) + ((")") (make-token "close" ch trim newpos)) + + ;; strings are delimited by opening and closing ASCII + ;; double quotes, which can be escaped with a + ;; backslash + (("\"") (let ((str (es:match-regexp + "^\"(|(?:.|\\\n)*?[^\\\\])\"" + trim))) + (or str (parse-error + src pos + "missing closing string delimiter")) + ;; a string token consists of the entire + ;; string including quotes as its lexeme, + ;; but its value will be the value of the + ;; string without quotes due to the `str' + ;; match group (see `token') + (make-token "string" str trim newpos))) + + (else + ;; anything else is considered a symbol up until + ;; whitespace or any of the aforementioned + ;; delimiters + (let ((symbol (es:match-regexp "^[^\\s()\"]+" + trim))) + (make-token "symbol" symbol trim newpos)))))) + + ;; yikes---see notes in docblock with regards to why + ;; we're using mutators here + (set! toks (append toks (list (car t)))) + (set! src (cadr t)) + (set! pos (caddr t)))))) + toks)) + + + +;; Throw an error with a window of surrounding source code. +;; +;; The "window" is simply ten characters to the left and right of the +;; first character of the source input SRC that resulted in the error. +;; It's a little more than useless. +(define (parse-error src pos msg) + (let ((window (substring src (- pos 10) (+ pos 10)))) + (error (string-append msg " (pos " pos "): " window) + src))) + + + +;; Produce a token, left-truncate src, and update pos. +;; +;; Unlike the JS Prebirth implementation which uses a key/value object, +;; we're just using a simple list. +;; +;; The expected arguments are: the token type TYPE, the match group or +;; string MATCH, left-truncated source code SRC, and the position POS +;; relative to the original source code. +(define (make-token type match src pos) + (let* ((parts (if (list? match) match (list match match))) + (lexeme (car parts)) + ;; the value is the first group of the match (indicating what we + ;; are actually interested in), and the lexeme is the full match, + ;; which might include, for example, string delimiters + (value (or (and (pair? (cdr parts)) + (cadr parts)) + lexeme)) + (len (string-length lexeme))) + + ;; produce token and recurse on `lex', left-truncating the source + ;; string to discard what we have already processed + (list (list (quote token) type lexeme value pos) + (substring src len) + (+ pos len)))) + + +;; various accessor procedures for token lists (we're Birth Lisp here, +;; so no record support or anything fancy!) +(define (token? t) (and (pair? t) (symbol=? (quote token) (car t)))) +(define (token-type t) (cadr t)) +(define (token-lexeme t) (caddr t)) +(define (token-value t) (cadddr t)) +(define (token-pos t) (caddddr t)) + + +;; Produce an AST from the given string SRC of sexps +;; +;; This is essentially the CST with whitespace removed. It first invokes +;; the lexer to produce a token string from the input sexps SRC. From this, +;; it verifies only proper nesting (that SRC does not close sexps too early +;; and that EOF isn't reached before all sexps are closed) and produces an +;; AST that is an isomorphism of the original sexps. +(define (parse-lisp src) + ;; accessor methods to make you and me less confused + (define (ast-depth ast) (car ast)) + (define (ast-tree ast) (cadr ast)) + (define (ast-stack ast) (caddr ast)) + + + ;; perform a leftmost reduction on the token string + (define (toks->ast toks) + (fold + (lambda (token result) + (let ((depth (ast-depth result)) + (xs (ast-tree result)) + (stack (ast-stack result)) + (type (token-type token)) + (pos (token-pos token))) + + ;; there are very few token types to deal with (again, this is a + ;; very simple bootstrap lisp) + (case type + ;; ignore comments + (("comment") result) + + ;; when beginning a new expression, place the expression + ;; currently being processed onto a stack, allocate a new list, + ;; and we'll continue processing into that new list + (("open") (list (+ depth 1) + (list) + (cons xs stack))) + + ;; once we reach the end of the expression, pop the parent off of + ;; the stack and append the new list to it + (("close") (if (zero? depth) + (parse-error src pos + "unexpected closing parenthesis") + (list (- depth 1) + (append (car stack) (list xs)) + (cdr stack)))) + + ;; strings and symbols (we cheat and just consider everything, + ;; including numbers and such, to be symbols) are just copied + ;; in place + (("string" "symbol") (list depth + (append xs (list token)) + stack)) + + ;; we should never encounter anything else unless there's a bug + ;; in the tokenizer or we forget a token type above + (else (parse-error + src pos (string-append + "unexpected token `" type "'")))))) + (list 0 (list) (list)) ; initial 0 depth; empty tree; expr stack + toks)) + + + ;; lex the input SRC and pass it to `toks->ast' to generate the AST; + ;; if the depth is non-zero after we're done, then we're unbalanced. + (let* ((toks (lex src 0)) + (ast (toks->ast toks))) + (if (zero? (ast-depth ast)) + (ast-tree ast) + ;; if we terminate at a non-zero depth, that means there ar still + ;; open sexps + (error (string-append + "unexpected end of input at depth " + (ast-depth ast)))))) + + +;; Generate ECMAScript-friendly parameter name for the given token T. +;; +;; The generated name will not have any environment references and is +;; suitable only for the immediate scope. +(define (tparam->es t) + (tname->id (token-value t))) + + +;; Predicate determining whether NAME should be output verbatim as an +;; ECMAScript identifier. +;; +;; This only returns #t for numbers. +(define (tname-verbatim? name) + (es:match (es:regexp "^-?(\\d+(\\.\\d+)?|\\.\\d+)$") name)) + + +;; Generate ECMAScript to reference the variable associated with the token T +;; in the current environment. +;; +;; The "current" environment is relative to whatever context into which the +;; caller places this generated code---that is, the environment is +;; resolved by the runtime environment. +;; +;; If macro support is not yet compiled in, then this returns the identifier +;; name _without_ the environment, just as Birth. +(define (env-ref t) + (let ((name (if (token? t) + (token-value t) + t))) + (if (tname-verbatim? name) + name + (cond-expand + (cdfn-macro + (string-append "_env." (tname->id name))) + (else + (tname->id name)))))) + + +;; Generate ECMAScript-friendly name from the given id. +;; +;; A subset of special characters that are acceptable in Scheme are +;; converted in an identifiable manner; others are simply converted to `$' +;; in a catch-all and therefore could result in conflicts and cannot be +;; reliably distinguished from one-another. Remember: this is temporary +;; code. +(define (tname->id name) + (if (tname-verbatim? name) + name + (string-append + "$$" (es:replace (es:regexp "[^a-zA-Z0-9_]" "g") + (lambda (c) + (case c + (("-") "$_$") + (("?") "$7$") + (("@") "$a$") + (("!") "$b$") + ((">") "$g$") + (("#") "$h$") + (("*") "$k$") + (("<") "$l$") + (("&") "$n$") + (("%") "$o$") + (("+") "$p$") + (("=") "$q$") + (("^") "$v$") + (("/") "$w$") + (("$") "$$") + (else "$"))) + name)))) + +;; Join a list of strings XS on a delimiter DELIM +(define (join delim xs) + (if (pair? xs) + (fold (lambda (x str) + (string-append str delim x)) + (car xs) + (cdr xs)) + "")) + + +;; Compile parameter list. +;; +;; This takes the value of the symbol and outputs it (formatted), delimited +;; by commas. +;; +;; Since we do not support actual pairs (yet), the "." syntax that normally +;; denotes the cdr is retained and presents itself here. The form "(arg1, +;; arg2 . rest)" creates a list `rest' containing all remaining arguments +;; after that point. Conveniently, ECMAScript Harmony supports this +;; natively with the "..." syntax. +(define (params->es params) + (define (%param-conv params) + (let* ((param (car params)) + (name (token-value param)) + (id (tname->id name)) + (rest (cdr params))) + (if (string=? name ".") + (list (string-append + "..." (car (%param-conv rest)))) + (if (pair? rest) + (cons id (%param-conv rest)) + (list id))))) + + (if (pair? params) + (join "," (%param-conv params)) + "")) + + +;; Compile body s-expressions into ECMAScript +;; +;; This produces a 1:1 mapping of body XS s-expressions to ES statements, +;; recursively. The heavy lifting is done by `sexp->es'. +(define (body->es xs ret) + ;; recursively process body XS until we're out of pairs + (if (not (pair? xs)) + "" + (let* ((x (car xs)) + (rest (cdr xs)) + (more? (or (not ret) (pair? rest)))) + ;; the result is a semicolon-delimited string of statements, with + ;; the final statement prefixed with `return' unless (not ret) + (string-append + " " + (if more? "" "return ") ; prefix with `return' if last body exp + (sexp->es x) ";" ; process current body expression + (if (pair? rest) "\n" "") + (body->es rest ret))))) ; recurse + + +;; Place parameters PARAMS into the current environment. +;; +;; This is ugly so that Rebirth can support multiple implementations at +;; once---those with environment support and those without. +(define (env-params params) + (join "\n" + (map (lambda (param) + (if (string=? (token-value param) ".") + "" ; next param is the cdr + (string-append (env-ref param) " = " + (tparam->es param) ";"))) + params))) + + +;; Compile variable or procedure definition into ES +;; +;; This performs a crude check to determine whether a procedure definition +;; was supplied: if the cadr of the given token T is itself token, then it +;; is considered to be a variable. +(define (cdfn t) + (if (token? (cadr t)) + (cdfn-var t) ;; (define foo ...) + (cdfn-proc t #f))) ;; (define (foo ...) ...) + + +;; Compile variable definition into ES +;; +;; This compiles the token T into a simple let-assignment. +(define (cdfn-var t) + (let* ((dfn (cadr t)) + (id (tname->id (token-value dfn))) + (value (sexp->es (caddr t)))) + (string-append "let " id "=" value ";_env." id " = " id))) + + +;; Compile procedure definition into an ES function definition +;; +;; This will fail if the given token is not a `define'. +;; +;; The output does something peculiar: it not only assigns to the active +;; scope, but also to the root of the environment, which has the effect of +;; making the procedure available to _everything_. The reason for this is a +;; kluge to make procedures available to macros during compilation without +;; having to wait for a rebirth repass. But this does have its issues and +;; it's important to understand that this is a temporary solution until +;; Ulambda has some level of static analysis. +;; +;; Care needs to be taken to make sure, regardless of scope, procedures of +;; the same name are not defined if used within macros, otherwise the latter +;; (again, regardless of scope) in the file will take precedence. This +;; behavior will not be observed by execution of compiled code, though, +;; because the scope will have the correct version. However, if a procedure +;; is _not_ in scope, then rather than being undefined, the one assigned to +;; root would be available. +;; +;; The generated ECMAScript is evaluated immediately to make it available to +;; macros during the compilation process. +(define (cdfn-proc t id-override) + ;; e.g. (define (foo ...) body) + (let* ((dfn (cadr t)) + (id (or id-override + (tname->id (token-value (car dfn))))) + (named? (not (string=? id ""))) + (params (cdr dfn)) + (fparams (params->es params)) + (fenv (env-params params)) + (body (body->es (cddr t) #t))) + ;; this is the final format---each procedure becomes its own function + ;; definition in ES + (let ((es (string-append + "function " id "(" fparams ")\n{" + "return (function(_env){\n" fenv "\n" + body + "\n})(Object.create(_env));}" + (if named? + (string-append ";_env." id " = " id + ";_env.root." id " = " id ";") + "")))) + ;; Immediately evaluate to make available to macros during + ;; compilation. See procedure notes above. + (cond-expand + (string->es + (if named? (string->es "eval($$es)")))) + es))) + + +;; Quote an expression +;; +;; If SEXP is a token, produce an ECMAScript Symbol. Otherwise, +;; recursively apply to each element in the list. +;; +;; TODO: This implementation isn't wholly correct---numbers, for example, +;; should not be converted to symbols, as they already are one. +(define (quote-sexp sexp) + (if (token? sexp) + (case (token-type sexp) + (("string") (sexp->es sexp)) + (else + (string-append "Symbol.for('" (token-value sexp) "')"))) + (string-append + "[" (join "," (map quote-sexp sexp)) "]"))) + + +;; Quasiquote an expression +;; +;; A quasiquoted expression acts just like a quoted expression with one +;; notable exception---quoting can be escaped using special forms. For +;; example, each of these are equivalent: +;; +;; (quasiquote (a 1 2 (unquote (eq? 3 4)))) +;; (list (quote a) 1 2 (eq? 3 4)) +;; (quasiquote (a (unquote-splicing (list 1 2)) (unquote (eq? 3 4)))) +;; +;; TODO/WARNING: Normally "(quasiquote a (unquote-splicing b))" would +;; produce "(a . b)" in a proper Lisp, but we do not yet support proper +;; pairs at the time that this procedure was written; all cdrs are assumed +;; to be lists. So do not do that---always splice lists. +(define (quasiquote-sexp sexp) + ;; get type of token at car of pair, unless not a pair + (define (%sexp-maybe-type sexp) + (and (pair? sexp) + (token? (car sexp)) + (token-value (car sexp)))) + + ;; recursively process the sexp, handling various types of unquoting + (define (%quote-maybe sexp delim) + (if (pair? sexp) + (let* ((item (car sexp)) + (rest (cdr sexp)) + (type (%sexp-maybe-type item)) + (add-delim (not (or (string=? type "unquote-splicing") + (string=? type "unquote@"))))) + (string-append + (case type + ;; escape quoting, nest within + (("unquote") + (string-append (if delim "," "") + (sexp->es (cadr item)))) + + ;; escape quoting, splice list into parent expression + ;; (lazy kluge warning), along with an alias for brevity + ;; given that we lack the ",@" syntax right now + (("unquote-splicing" "unquote@") + (string-append + "]).concat(" (sexp->es (cadr item)) ").concat([")) + + ;; anything else, we're still quasiquoting recursively + (else (string-append (if delim "," "") + (quasiquote-sexp item)))) + + ;; continue processing this list + (%quote-maybe rest add-delim))) + "")) + + ;; tokens fall back to normal quoting + (if (token? sexp) + (quote-sexp sexp) + (string-append + "([" (%quote-maybe sexp #f) "])"))) + + +;; Statically expand expressions based on implementation features +;; +;; Support for `cond-expand' allows Rebirth to introduce new features each +;; time that it is compiled. If matched, expressions will be evaluated as +;; if they were entered in place of the `cond-expand' itself; otherwise, +;; the entire `cond-expand' expression as a whole will be discarded. +;; +;; Birth will always discard `cond-expand' expressions unless they contain +;; an `else' clause, which permits us to compile on the first pass without +;; error. +(define (expand-cond-expand args) + (if (pair? args) + (let* ((clause (car args)) + (feature (token-value (car clause))) + (body (cdr clause))) + ;; now we get meta + (cond-expand + (string->es + (case feature + (("string->es" "else") (body->es body #f)) + (else (if (es:defined? feature) + (body->es body #f) + (expand-cond-expand (cdr args)))))) + ;; if we're not yet compiled with Rebirth, then string->es will + ;; not yet be available---but it _will_ be in Rebirth, so + ;; compile cond-expand such that it marks it as supported + (else + (case feature + ;; these two are always supported in Rebirth Lisp + (("string->es" "else") (body->es body #f)) + ;; keep recursing until we find something (this allows us to + ;; short-circuit, most notably with "else") + (else + (expand-cond-expand (cdr args))))))) + "")) + + +;; Determine whether the given name NAME represents a macro. +;; +;; If `string->es' is not yet supported, then this procedure always +;; yields `#f'. Otherwise, the compiler runtime `_env.macros' is consulted. +;; +;; See `cdfn-macro' for more information. +(define (macro? name) + (cond-expand + (string->es + (string->es "_env.macros[$$name] !== undefined")) + (else #f))) + + +;; Determine if FN is a procedure or macro and apply it accordingly with +;; arguments ARGS. +;; +;; These actions represent two separate environments: If a macro, then the +;; call needs to be executed immediately within the context of the compiler +;; runtime. Otherwise, procedure applications are simply compiled to be +;; produced with the rest of the compiler output and will be run at a later +;; time within the context of the compiled program. +(define (apply-proc-or-macro fn args) + (if (macro? fn) + (string->es "_env.macros[$$fn].apply(null,$$args)") + ;; Procedures are produced as part of the compiler output. + (let ((argstr (join ", " (map sexp->es args)))) + (string-append (env-ref fn) "(" argstr ")")))) + + +;; Primitive special forms. +;; +;; These are forms that cannot be re-written as macros because of +;; chicken-and-egg issues. Since the Rebirth compiler is temporary, we're +;; not going to worry about getting rid of the rest of these. +;; +;; String values are simple function aliases. Function values take over +;; the compilation of that function and allow for defining special forms +;; (in place of macro support). The first argument FN is the name of the +;; function/procedure/form, and ARGS is the list of arguments. +;; +;; For the forms previously defined here in Birth, see `fnmap-premacro' in +;; `es.scm'. +(define (fnmap fn args t) + (case fn + ;; output raw code into the compiled ECMAScript (what could go wrong?) + (("string->es") + (token-value (car args))) + + ;; very primitive cond-expand + (("cond-expand") (expand-cond-expand args)) + + ;; Note that the unquote forms are only valid within a quasiquote; see + ;; that procedure for the handling of those forms. Since we do not + ;; support the special prefix form, we also offer "`quote" as a + ;; shorthand for quasiquote. + (("quote") (quote-sexp (car args))) + (("quasiquote" "`quote") (quasiquote-sexp (car args))) + + (("define") (cdfn t)) + (("define-macro") (cdfn-macro t)) ; not defined until string->es cond + + ;; Defining `include' is trivial right now since we're not doing any + ;; sort of static analysis---we just need to start compilation of the + ;; requested file and inline it right where we are. Note that this + ;; doesn't enclose the file in `begin', so this isn't yet proper. + (("include") (rebirth->ecmascript + (parse-lisp + (es:file->string (token-value (car args)))))) + + ;; If we have macro support (`cdfn-macro'), then assume that they exist + ;; and try to use them; otherwise, continue to use built-in forms, which + ;; have been moved into `fnmap-premacro'). + (else + (cond-expand + (cdfn-macro (apply-proc-or-macro fn args)) + (else (fnmap-premacro fn args t)))))) + + +;; Convert s-expressions or scalar into ECMAScript +;; +;; T may be either an array of tokens or a primitive token (e.g. string, +;; symbol). This procedure is applied recursively to T as needed if T is +;; a list. +(define (sexp->es t) + (if (not (list? t)) + (error "unexpected non-list for sexp->es token")) + + (if (token? t) + (case (token-type t) + ;; strings output as-is (note that we don't escape double quotes, + ;; because the method of escaping them is the same in Scheme as it + ;; is in ECMAScript---a backslash) + (("string") (string-append "\"" (token-value t) "\"")) + + ;; symbols have the same concerns as procedure definitions: the + ;; identifiers generated need to be ES-friendly + (("symbol") (env-ref t)) + + (else (error + (string-append + "cannot compile unknown token `" (token-type t) "'")))) + + ;; otherwise, process the expression + (fnmap (token-value (car t)) + (cdr t) + t))) diff --git a/bootstrap/rebirth/es.scm b/bootstrap/rebirth/es.scm index 1a46c8d..b66de3c 100644 --- a/bootstrap/rebirth/es.scm +++ b/bootstrap/rebirth/es.scm @@ -74,8 +74,38 @@ +;; Pair selection (these were part of birth.scm). +(define (cadr xs) + (car (cdr xs))) +(define (caadr xs) + (car (car (cdr xs)))) +(define (caddr xs) + (car (cdr (cdr xs)))) +(define (cadddr xs) + (car (cdr (cdr (cdr xs))))) +(define (caddddr xs) + (car (cdr (cdr (cdr (cdr xs)))))) +(define (cddr xs) + (cdr (cdr xs))) + + + +;; An empty environment. +;; +;; This holds a reference to itself as `root' so that we can access the top +;; of the prototype chain easily. The reason for this is a kluge to give +;; macros access to procedures as they are defined (without having to wait +;; until the execution of a new version of rebirth). See `cdfn-proc'. +(define (es:empty-env) + "(function(){let o = {macros:{}}; o.root = o; return o;})()") + + + (cond-expand (cdfn-macro + ;; We have macro support, so we can substitute the original Birth + ;; definitions with macro equivalents. + (define-macro (%es:native-apply fn . args) (`quote (string->es @@ -91,7 +121,6 @@ (define (es:json-stringify value) (string->es "JSON.stringify($$value)")) - ;; Expand the body BODY into a new environment. Environments are ;; currently handled by the ES runtime, so this is easy. (define-macro (es:envf env . body) @@ -210,6 +239,9 @@ (map cdr clauses))) "}})()") + ;; this was part of birth.scm + (define (not x) + (if x #f #t)) ;; We can just re-use `let' for `begin' since it already does exactly ;; what we need it to @@ -266,4 +298,147 @@ (`quote (%es:setenv (%es:env) (unquote (tname->id (token-lexeme varid))) - (unquote val)))))) + (unquote val))))) + + + + (else + ;; We do _not_ have macro support, so fall back to the original + ;; definitions used by Birth. + (define (fnmap-premacro fn args t) + (case fn + (("es:console") + (string-append "console.log(" (map sexp->es args) ")")) + (("es:error") + (string-append "console.error(" (map sexp->es args) ")")) + + ;; yes, there are more important things to do until we get to the + ;; point where it's worth implementing proper tail calls + (("es:while") + (let ((pred (car args)) + (body (cdr args))) + (string-append + "(function(__whilebrk){" + "while (" (sexp->es pred) "){\n" + (body->es body #f) " if (__whilebrk) break;\n" + "}\n" + "})(false)"))) + (("es:break") "__whilebrk=true") + + (("lambda") + (let ((fnargs (car args)) + (body (cdr args))) + (string-append + "function(" (join ", " (map sexp->es fnargs)) "){\n" + (body->es body #t) + "}"))) + + ;; simple if statement with optional else, wrapped in a self-executing + ;; function to simplify code generation (e.g. returning an if) + (("if") + (let ((pred (car args)) + (t (cadr args)) + (f (and (pair? (cddr args)) + (caddr args)))) + (string-append + "(function(){" + "if (_truep(" (sexp->es pred) ")){return " (sexp->es t) ";}" + (or (and (pair? f) + (string-append "else{return " (sexp->es f) ";}")) + "") + "})()"))) + + ;; and short-circuits, so we need to implement it as a special form + ;; rather than an alias + (("and") + (string-append + "(function(__and){\n" + (join "" (map (lambda (expr) + (string-append + "__and = " (sexp->es expr) "; " + "if (!_truep(__and)) return false;\n")) + args)) + "return __and;})()")) + + ;; or short-circuits, so we need to implement it as a special form + ;; rather than an alias + (("or") + (string-append + "(function(__or){\n" + (join "" (map (lambda (expr) + (string-append + "__or = " (sexp->es expr) "; " + "if (_truep(__or)) return __or;\n")) + args)) + "return false;})()")) + + ;; (let ((binding val) ...) ...body), compiled as a self-executing + ;; function which allows us to easily represent the return value of + ;; the entire expression while maintaining local scope. + (("let*") + (let ((bindings (car args)) + (body (cdr args))) + (string-append + "(function(){\n" + (join "" (map (lambda (binding) + (string-append + " let " (sexp->es (car binding)) + " = " (sexp->es (cadr binding)) ";\n")) + bindings)) + (body->es body #t) "\n" + " })()"))) + + ;; similar to the above, but variables cannot reference one-another + (("let") + (let* ((bindings (car args)) + (body (cdr args)) + (fparams (join ", " (map sexp->es + (map car bindings)))) + (fargs (join ", " (map sexp->es + (map cadr bindings))))) + (string-append "(function(" fparams "){\n" + (body->es body #t) "\n" + "})(" fargs ")"))) + + ;; and here I thought Prebirth Lisp would be simple...but having + ;; `case' support really keeps things much more tidy, so here we are + ;; (note that it doesn't support the arrow form, nor does it support + ;; expressions as data) + (("case") + (let ((key (car args)) + (clauses (cdr args))) + (string-append + "(function(){const _key=" (sexp->es key) ";\n" + "switch (_key){\n" + (join "" + (map (lambda (data exprs) + (string-append + (if (and (token? data) + (string=? "else" (token-lexeme data))) + "default:\n" + (join "" + (map (lambda (datum) + (string-append + "case " (sexp->es datum) ":\n")) + data))) + (body->es exprs #t) "\n")) + (map car clauses) + (map cdr clauses))) + "}})()"))) + + (("set!") + (let ((varid (car args)) + (val (cadr args))) + (string-append (sexp->es varid) " = " (sexp->es val)))) + + ;; procedure or macro + (else (apply-proc-or-macro fn args)))) + + ;; This was part of birth.scm. This is included here in the `else' + ;; clause because it is defined using `if': On early Rebirth passes, + ;; `if' is built into the compiler and so it is available + ;; immediately. But on future passes, `if' is defined in this file, and + ;; so we cannot define `not' until that point (see the duplicate + ;; definition of `not' above). + (define (not x) + (if x #f #t)))) diff --git a/bootstrap/rebirth/r5rs-syntax.scm b/bootstrap/rebirth/r5rs-syntax.scm new file mode 100644 index 0000000..a11444c --- /dev/null +++ b/bootstrap/rebirth/r5rs-syntax.scm @@ -0,0 +1,56 @@ +;;; Syntatic forms for R⁵RS Scheme +;;; +;;; Copyright (C) 2018 Mike Gerwitz +;;; +;;; This file is part of Ulambda Scheme. +;;; +;;; Ulambda Scheme is free software: you can redistribute it and/or modify +;;; it under the terms of the GNU Affero General Public License as +;;; published by the Free Software Foundation, either version 3 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU Affero General Public License +;;; along with this program. If not, see . +;;; +;;; These are syntatic forms suitable for use in `(null-environment 5)'. +;;; +;;; This will eventually be shared between Rebirth and Ulambda. It assumes +;;; that macro support exists. + +;; convention: %% compiler internal + +(define-macro (%%apply-procedure proc-name . args)) +(define-macro (%%apply-?-procedure proc-name . args)) + +(define-macro (%%apply-macro macro-name . args)) +(define-macro (%%apply-?-macro macro-name . args)) + + + +(define-macro (lambda fnargs . body) + (`quote + (PROC (unquote fnargs) (unquote body)))) + +(define-macro (let bindings . body) + (if (empty? bindings) + body + (list (quote LET) + (caar bindings) + (cadr (car bindings)) + (let (cdr bindings) body)))) + + (define-es-macro (let* bindings . body) + "(function(){\n" + (join "" (map (lambda (binding) + (string-append + "let " (tparam->es (car binding)) ; TODO: BC; remove + " = " (env-ref (car binding)) + " = " (sexp->es (cadr binding)) ";\n")) + bindings)) + (body->es body #t) "\n" + " })()") diff --git a/bootstrap/rebirth/relibprebirth.scm b/bootstrap/rebirth/relibprebirth.scm index 596f19c..3e3a7b4 100644 --- a/bootstrap/rebirth/relibprebirth.scm +++ b/bootstrap/rebirth/relibprebirth.scm @@ -41,10 +41,6 @@ ;;; not_ include libprebirth in its output, we can define the libprebirth ;;; primitives ourselves in Rebirth Lisp. Cut the cord. ;;; -;;; Some of these definitions aren't valid: variable arguments, for -;;; example, aren't represented _at all_---the `define' form will be -;;; properly implemented in the future to correct this. -;;; ;;; Code: diff --git a/bootstrap/rebirth/test.scm b/bootstrap/rebirth/test.scm index 4783364..6995595 100644 --- a/bootstrap/rebirth/test.scm +++ b/bootstrap/rebirth/test.scm @@ -52,8 +52,7 @@ (include "rebirth/es.scm") (include "rebirth/relibprebirth.scm") (include "rebirth/macro.scm") -(define (not x) - (if x #f #t)) +(include "rebirth/compiler.scm") @@ -630,6 +629,6 @@ 10) (skip "eval inheriting environment" (let ((x (quote xsym))) - (eval (list x) + (eval (`quote (list (unquote x))) (es:inherit-env))) (quote xsym))))