From 62ab1ee7326e856d7dbfa63a82b625e521bff655 Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Sun, 7 Jan 2018 00:11:17 -0500 Subject: [PATCH] rebirth: Primitive environment support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note that this doesn't yet provide any useful abstractions for creating discrete environments---it merely provides support for them moving forward. Hopefully. This is the next big step toward rewriting the compiler as a series of macro passes. I'll write more on this later; it has profound consequences (well, as a jump from Rebirth into Ulambda, which is a name that I haven't mentioned until now). It was yesterday that I also got word that I'll be speaking at LibrePlanet 2018 in March. This is great news, but unfortunate news for this project---it has had very little of time time to begin with, and now it's going to have even less until after the conference. * build-aux/bootstrap/rebirth.scm: Add toplevel note about Ulambda. Add notes to "Step 2" regarding environments. (es:envf, %es:env): Add macros. (lambda, let*, let, set!): Macro forms use environments. (%es:has-own-prop, %es:proto-of, %es:envobj-for, %es:setenv): New procedures for `set!' macro. (es:null?): Add procedure. (_macros): Remove ES global (now using `_env.macros'). (cdfn-macro, macro?): Use `_env.macros' in place of `macros'. (tparam->es, tname-verbatim?, env-ref, env-params): Add procedures. (tname->id): Use `tname-verbatim?', which extracts the digit check. (cdfn-var, cdfn-proc): Also assign to current environment. (apply-proc-or-macro): Use `_env.macros'. Apply function from environment. (sexp->es)[symbol]: Generate environment reference. (rebirth->ecmascript): Generate toplevel environment. --- build-aux/bootstrap/rebirth.scm | 202 ++++++++++++++++++++++++++------ 1 file changed, 166 insertions(+), 36 deletions(-) diff --git a/build-aux/bootstrap/rebirth.scm b/build-aux/bootstrap/rebirth.scm index d8f1c15..71d94e9 100644 --- a/build-aux/bootstrap/rebirth.scm +++ b/build-aux/bootstrap/rebirth.scm @@ -37,6 +37,12 @@ ;;; Rererebirth, or Re*birth, or Reⁿbirth---it is a recursively self-hosting ;;; compiler. It adds features to itself each time it compiles itself. ;;; +;;; The ultimate goal after all of those compilation steps is to produce a +;;; compiler that can support the actual Scheme to be used by Gibble: +;;; Ulambda Scheme (abbreviated "Y Scheme", for an upside-down lambda, where +;;; "Y" stands for the Y combinator). More on the name in the future +;;; (perhaps see the Ulambda documentation). +;;; ;;; Note that we're dealing with a small subset of Scheme here, so certain ;;; things might be done differently given a proper implementation. ;;; @@ -81,6 +87,22 @@ ;; case. They will be replaced with proper R7RS implementations in the ;; future. ;; +;; These macros have references to `_env', representing the current +;; environment. It is at this point that we also add primitive environment +;; support---this is essential as we move forward into purely macro-based +;; compilation passes, since we need to be able to have discrete +;; environments to run each of those passes in. More on that later. +;; +;; Scheme creates a new environment for every level of scope. Each +;; environment inherits the one above it, which produces the familiar +;; lexical scoping. As it turns out, this structure is represented +;; perfectly by ECMAScript's prototype model---a reference to a key on a +;; prototype chain is transparently proxied up the chain until it is +;; found. Therefore, environments are chained using a simple +;; `Object.create'. For now, anyway---that's the easy solution for now, but +;; in the future (Ulambda) we're likely to have a heap instead, once we have +;; static analysis. +;; ;; Initially, everything here was a near-exact copy of the `fnmap-premacro' ;; forms, re-arranged as needed for compilation (see limitations of ;; `cdfn-macro'), so all changes are clearly visible in the repository @@ -103,6 +125,17 @@ (`quote (string->es (unquote@ body)))) + ;; Expand the body BODY into a new environment inherited from the current + ;; environment. Environments are currently handled by the ES runtime, so + ;; this is easy. + (define-macro (es:envf . body) + (`quote + (string-append + "(function(_env){" + "return " + (unquote@ body) + "})(Object.create(_env))"))) + (define-macro (define-es-macro decl . body) (quasiquote (define-macro (unquote decl) @@ -110,6 +143,9 @@ (quote string->es) (string-append (unquote@ body)))))) + ;; Reference to current environment object. + (define-es-macro (%es:env) "_env") + ;; Don't worry---basic tail call support (at least for recursion) is ;; nearing, and then we can get rid of this ugly thing. (define-es-macro (es:while pred . body) @@ -122,28 +158,33 @@ "__whilebrk=true") (define-es-macro (lambda fnargs . body) - "function(" (join ", " (map sexp->es fnargs)) "){\n" - (body->es body #t) - "}") + (es:envf + "function(" (join ", " (map tparam->es fnargs)) "){\n" + (env-params fnargs) + (body->es body #t) + "}")) (define-es-macro (let* bindings . body) "(function(){\n" (join "" (map (lambda (binding) (string-append - " let " (sexp->es (car binding)) + "let " (tparam->es (car binding)) ; TODO: BC; remove + " = " (env-ref (car binding)) " = " (sexp->es (cadr binding)) ";\n")) bindings)) (body->es body #t) "\n" " })()") (define-es-macro (let bindings . body) - (let* ((fparams (join ", " (map sexp->es - (map car bindings)))) - (fargs (join ", " (map sexp->es - (map cadr bindings))))) - (string-append "(function(" fparams "){\n" - (body->es body #t) "\n" - "})(" fargs ")"))) + (let* ((params (map car bindings)) + (fparams (join ", " (map tparam->es params))) + (args (map cadr bindings)) + (fargs (map sexp->es args))) + (string-append (es:envf + "(function(" fparams "){\n" + (env-params params) + (body->es body #t) "\n" + "})(" fargs ")")))) (define-es-macro (and . args) "(function(__and){\n" @@ -193,8 +234,39 @@ (map cdr clauses))) "}})()") - (define-es-macro (set! varid val) - (sexp->es varid) " = " (sexp->es val)))) + ;; We unfortunately have to worry about environment mutability in the + ;; current implementation. Since variables within environments are + ;; implemented using ECMAScript's prototype chain, any sets affect the + ;; object that the assignment is performed _on_, _not_ the prototype that + ;; contains the key being set. Therefore, we have to traverse up the + ;; prototype chain until we find the correct value, and set it on that + ;; object. + ;; + ;; There are other ways to accomplish this. For example, we should + ;; define setters for each variable and then not worry about + ;; traversing. However, since set! is rare, we wouldn't want to incur a + ;; performance hit for every single variable. + (define (%es:has-own-prop o id) + (string->es "Object.hasOwnProperty.call($$o, $$id)")) + (define (%es:proto-of o) + (string->es "Object.getPrototypeOf($$o)")) + (define (%es:envobj-for env id) + (if (and (string=? (es:typeof env) "object") + (not (es:null? env))) + (if (%es:has-own-prop env id) + env + (%es:envobj-for (%es:proto-of env) id)) + (error (string-append "unknown variable: `" id "'")))) + (define (%es:setenv env id val) + (let ((envo (%es:envobj-for env id))) + (string->es "$$envo[$$id] = $$val"))) + + ;; set! is then a simple application of `%es:setenv'. + (define-macro (set! varid val) + (`quote + (%es:setenv (%es:env) + (unquote (tname->id (token-lexeme varid))) + (unquote val)))))) @@ -230,6 +302,8 @@ (define (es:defined? x) (let ((id (tname->id x))) (string->es "eval('typeof ' + $$id) !== 'undefined'"))) + (define (es:null? x) + (string->es "$$x === null")) (define (es:typeof x) (string->es "typeof $$x")) @@ -343,6 +417,7 @@ (define (es:replace r repl s) (string->es "$$s.replace($$r, $$repl)")) + (define *fsdata* (if (string->es "typeof __fsinit === 'undefined'") (string->es "{}") @@ -404,9 +479,6 @@ ;; enlighten the curious reader. (cond-expand (string->es - ;; Stores macros for compiler runtime. - (string->es "const _macros = {}") - (define (cdfn-macro sexp) (define (%make-macro-proc sexp) ;; The syntax for a macro definition is the same as a procedure @@ -446,7 +518,7 @@ ;; abstractions that would make this dissertation unnecessary. (string->es "eval('(' + $$proc$_$es + ')')"))) - ;; We then store the macro by name in memory in `_macros'. When + ;; We then store the macro by name in memory in `_env.macros'. When ;; invoked, it will apply the result of the above generated procedure ;; to `macro-compile-result' (defined below), which will produce the ;; ECMAScript code resulting from the macro application. @@ -484,7 +556,7 @@ (let ((macro-proc (%make-macro-proc sexp)) (macro-id (token-value (caadr sexp)))) ; XXX (string->es - "_macros[$$macro$_$id] = function(){ + "_env.macros[$$macro$_$id] = function(){ return $$macro$_$compile$_$result( $$macro$_$proc.apply(this,arguments))};") ;; Because the macro procedure was evaluated at runtime, it would @@ -821,6 +893,44 @@ (ast-depth ast)))))) +;; Generate ECMAScript-friendly parameter name for the given token T. +;; +;; The generated name will not have any environment references and is +;; suitable only for the immediate scope. +(define (tparam->es t) + (tname->id (token-value t))) + + +;; Predicate determining whether NAME should be output verbatim as an +;; ECMAScript identifier. +;; +;; This only returns #t for numbers. +(define (tname-verbatim? name) + (es:match (es:regexp "^\\d+$") name)) + + +;; Generate ECMAScript to reference the variable associated with the token T +;; in the current environment. +;; +;; The "current" environment is relative to whatever context into which the +;; caller places this generated code---that is, the environment is +;; resolved by the runtime environment. +;; +;; If macro support is not yet compiled in, then this returns the identifier +;; name _without_ the environment, just as Birth. +(define (env-ref t) + (let ((name (if (token? t) + (token-value t) + t))) + (if (tname-verbatim? name) + name + (cond-expand + (cdfn-macro + (string-append "_env." (tname->id name))) + (else + (tname->id name)))))) + + ;; Generate ECMAScript-friendly name from the given id. ;; ;; A subset of special characters that are acceptable in Scheme are @@ -829,7 +939,7 @@ ;; reliably distinguished from one-another. Remember: this is temporary ;; code. (define (tname->id name) - (if (es:match (es:regexp "^\\d+$") name) + (if (tname-verbatim? name) name (string-append "$$" (es:replace (es:regexp "[^a-zA-Z0-9_]" "g") @@ -912,6 +1022,20 @@ (body->es rest ret))))) ; recurse +;; Place parameters PARAMS into the current environment. +;; +;; This is ugly so that Rebirth can support multiple implementations at +;; once---those with environment support and those without. +(define (env-params params) + (join "\n" + (map (lambda (param) + (if (string=? (token-value param) ".") + "" ; next param is the cdr + (string-append (env-ref param) " = " + (tparam->es param) ";"))) + params))) + + ;; Compile variable or procedure definition into ES ;; ;; This performs a crude check to determine whether a procedure definition @@ -930,7 +1054,7 @@ (let* ((dfn (cadr t)) (id (tname->id (token-value dfn))) (value (sexp->es (caddr t)))) - (string-append "let " id "=" value))) + (string-append "let " id "=" value ";_env." id " = " id))) ;; Compile procedure definition into an ES function definition @@ -938,15 +1062,21 @@ ;; This will fail if the given token is not a `define'. (define (cdfn-proc t id-override) ;; e.g. (define (foo ...) body) - (let* ((dfn (cadr t)) - (id (or id-override - (tname->id (token-value (car dfn))))) - (params (params->es (cdr dfn))) - (body (body->es (cddr t) #t))) + (let* ((dfn (cadr t)) + (id (or id-override + (tname->id (token-value (car dfn))))) + (params (cdr dfn)) + (fparams (params->es params)) + (fenv (env-params params)) + (body (body->es (cddr t) #t))) ;; this is the final format---each procedure becomes its own function ;; definition in ES (string-append - "function " id "(" params ")\n{\n" body "\n}"))) + "function " id "(" fparams ")\n{" + "return (function(_env){\n" fenv "\n" + body + "\n})(Object.create(_env));}" + (if (string=? id "") "" (string-append ";_env." id " = " id))))) ;; Quote an expression @@ -1064,13 +1194,13 @@ ;; Determine whether the given name NAME represents a macro. ;; ;; If `string->es' is not yet supported, then this procedure always -;; yields `#f'. Otherwise, the compiler runtime `_macros' is consulted. +;; yields `#f'. Otherwise, the compiler runtime `_env.macros' is consulted. ;; ;; See `cdfn-macro' for more information. (define (macro? name) (cond-expand (string->es - (string->es "_macros[$$name] !== undefined")) + (string->es "_env.macros[$$name] !== undefined")) (else #f))) @@ -1084,11 +1214,10 @@ ;; time within the context of the compiled program. (define (apply-proc-or-macro fn args) (if (macro? fn) - (string->es "_macros[$$fn].apply(null,$$args)") + (string->es "_env.macros[$$fn].apply(null,$$args)") ;; Procedures are produced as part of the compiler output. - (let* ((idfn (tname->id fn)) - (argstr (join ", " (map sexp->es args)))) - (string-append idfn "(" argstr ")")))) + (let ((argstr (join ", " (map sexp->es args)))) + (string-append (env-ref fn) "(" argstr ")")))) ;; Primitive special forms. @@ -1283,7 +1412,7 @@ ;; symbols have the same concerns as procedure definitions: the ;; identifiers generated need to be ES-friendly - (("symbol") (tname->id (token-value t))) + (("symbol") (env-ref t)) (else (error (string-append @@ -1300,10 +1429,11 @@ ;; The AST can be generated with `parse-lisp'. (define (rebirth->ecmascript ast) ;; compiled output, wrapped in a self-executing function to limit scope - ;; (note that we no longer depend on libprebirth) - (string-append "(function(){" + ;; and create the toplevel environment (note that we no longer depend on + ;; libprebirth) + (string-append "(function(_env){" (join "\n\n" (map sexp->es ast)) - "})();")) + "})({macros:{}});")) ;; at this point, this program can parse itself and output a CST (sans