rebirth: Primitive environment support

Note that this doesn't yet provide any useful abstractions for creating
discrete environments---it merely provides support for them moving
forward.  Hopefully.

This is the next big step toward rewriting the compiler as a series of macro
passes.  I'll write more on this later; it has profound consequences (well,
as a jump from Rebirth into Ulambda, which is a name that I haven't
mentioned until now).

It was yesterday that I also got word that I'll be speaking at
LibrePlanet 2018 in March.  This is great news, but unfortunate news for
this project---it has had very little of time time to begin with, and now
it's going to have even less until after the conference.

* build-aux/bootstrap/rebirth.scm: Add toplevel note about Ulambda.
    Add notes to "Step 2" regarding environments.
  (es:envf, %es:env): Add macros.
  (lambda, let*, let, set!): Macro forms use environments.
  (%es:has-own-prop, %es:proto-of, %es:envobj-for, %es:setenv): New
    procedures for `set!' macro.
  (es:null?): Add procedure.
  (_macros): Remove ES global (now using `_env.macros').
  (cdfn-macro, macro?): Use `_env.macros' in place of `macros'.
  (tparam->es, tname-verbatim?, env-ref, env-params): Add procedures.
  (tname->id): Use `tname-verbatim?', which extracts the digit check.
  (cdfn-var, cdfn-proc): Also assign to current environment.
  (apply-proc-or-macro): Use `_env.macros'.  Apply function from
    environment.
  (sexp->es)[symbol]: Generate environment reference.
  (rebirth->ecmascript): Generate toplevel environment.
master
Mike Gerwitz 2018-01-07 00:11:17 -05:00
parent 01990614cc
commit 62ab1ee732
Signed by: mikegerwitz
GPG Key ID: 8C917B7F5DC51BA2
1 changed files with 166 additions and 36 deletions

View File

@ -37,6 +37,12 @@
;;; Rererebirth, or Re*birth, or Reⁿbirth---it is a recursively self-hosting
;;; compiler. It adds features to itself each time it compiles itself.
;;;
;;; The ultimate goal after all of those compilation steps is to produce a
;;; compiler that can support the actual Scheme to be used by Gibble:
;;; Ulambda Scheme (abbreviated "Y Scheme", for an upside-down lambda, where
;;; "Y" stands for the Y combinator). More on the name in the future
;;; (perhaps see the Ulambda documentation).
;;;
;;; Note that we're dealing with a small subset of Scheme here, so certain
;;; things might be done differently given a proper implementation.
;;;
@ -81,6 +87,22 @@
;; case. They will be replaced with proper R7RS implementations in the
;; future.
;;
;; These macros have references to `_env', representing the current
;; environment. It is at this point that we also add primitive environment
;; support---this is essential as we move forward into purely macro-based
;; compilation passes, since we need to be able to have discrete
;; environments to run each of those passes in. More on that later.
;;
;; Scheme creates a new environment for every level of scope. Each
;; environment inherits the one above it, which produces the familiar
;; lexical scoping. As it turns out, this structure is represented
;; perfectly by ECMAScript's prototype model---a reference to a key on a
;; prototype chain is transparently proxied up the chain until it is
;; found. Therefore, environments are chained using a simple
;; `Object.create'. For now, anyway---that's the easy solution for now, but
;; in the future (Ulambda) we're likely to have a heap instead, once we have
;; static analysis.
;;
;; Initially, everything here was a near-exact copy of the `fnmap-premacro'
;; forms, re-arranged as needed for compilation (see limitations of
;; `cdfn-macro'), so all changes are clearly visible in the repository
@ -103,6 +125,17 @@
(`quote
(string->es (unquote@ body))))
;; Expand the body BODY into a new environment inherited from the current
;; environment. Environments are currently handled by the ES runtime, so
;; this is easy.
(define-macro (es:envf . body)
(`quote
(string-append
"(function(_env){"
"return "
(unquote@ body)
"})(Object.create(_env))")))
(define-macro (define-es-macro decl . body)
(quasiquote
(define-macro (unquote decl)
@ -110,6 +143,9 @@
(quote string->es)
(string-append (unquote@ body))))))
;; Reference to current environment object.
(define-es-macro (%es:env) "_env")
;; Don't worry---basic tail call support (at least for recursion) is
;; nearing, and then we can get rid of this ugly thing.
(define-es-macro (es:while pred . body)
@ -122,28 +158,33 @@
"__whilebrk=true")
(define-es-macro (lambda fnargs . body)
"function(" (join ", " (map sexp->es fnargs)) "){\n"
(body->es body #t)
"}")
(es:envf
"function(" (join ", " (map tparam->es fnargs)) "){\n"
(env-params fnargs)
(body->es body #t)
"}"))
(define-es-macro (let* bindings . body)
"(function(){\n"
(join "" (map (lambda (binding)
(string-append
" let " (sexp->es (car binding))
"let " (tparam->es (car binding)) ; TODO: BC; remove
" = " (env-ref (car binding))
" = " (sexp->es (cadr binding)) ";\n"))
bindings))
(body->es body #t) "\n"
" })()")
(define-es-macro (let bindings . body)
(let* ((fparams (join ", " (map sexp->es
(map car bindings))))
(fargs (join ", " (map sexp->es
(map cadr bindings)))))
(string-append "(function(" fparams "){\n"
(body->es body #t) "\n"
"})(" fargs ")")))
(let* ((params (map car bindings))
(fparams (join ", " (map tparam->es params)))
(args (map cadr bindings))
(fargs (map sexp->es args)))
(string-append (es:envf
"(function(" fparams "){\n"
(env-params params)
(body->es body #t) "\n"
"})(" fargs ")"))))
(define-es-macro (and . args)
"(function(__and){\n"
@ -193,8 +234,39 @@
(map cdr clauses)))
"}})()")
(define-es-macro (set! varid val)
(sexp->es varid) " = " (sexp->es val))))
;; We unfortunately have to worry about environment mutability in the
;; current implementation. Since variables within environments are
;; implemented using ECMAScript's prototype chain, any sets affect the
;; object that the assignment is performed _on_, _not_ the prototype that
;; contains the key being set. Therefore, we have to traverse up the
;; prototype chain until we find the correct value, and set it on that
;; object.
;;
;; There are other ways to accomplish this. For example, we should
;; define setters for each variable and then not worry about
;; traversing. However, since set! is rare, we wouldn't want to incur a
;; performance hit for every single variable.
(define (%es:has-own-prop o id)
(string->es "Object.hasOwnProperty.call($$o, $$id)"))
(define (%es:proto-of o)
(string->es "Object.getPrototypeOf($$o)"))
(define (%es:envobj-for env id)
(if (and (string=? (es:typeof env) "object")
(not (es:null? env)))
(if (%es:has-own-prop env id)
env
(%es:envobj-for (%es:proto-of env) id))
(error (string-append "unknown variable: `" id "'"))))
(define (%es:setenv env id val)
(let ((envo (%es:envobj-for env id)))
(string->es "$$envo[$$id] = $$val")))
;; set! is then a simple application of `%es:setenv'.
(define-macro (set! varid val)
(`quote
(%es:setenv (%es:env)
(unquote (tname->id (token-lexeme varid)))
(unquote val))))))
@ -230,6 +302,8 @@
(define (es:defined? x)
(let ((id (tname->id x)))
(string->es "eval('typeof ' + $$id) !== 'undefined'")))
(define (es:null? x)
(string->es "$$x === null"))
(define (es:typeof x)
(string->es "typeof $$x"))
@ -343,6 +417,7 @@
(define (es:replace r repl s)
(string->es "$$s.replace($$r, $$repl)"))
(define *fsdata*
(if (string->es "typeof __fsinit === 'undefined'")
(string->es "{}")
@ -404,9 +479,6 @@
;; enlighten the curious reader.
(cond-expand
(string->es
;; Stores macros for compiler runtime.
(string->es "const _macros = {}")
(define (cdfn-macro sexp)
(define (%make-macro-proc sexp)
;; The syntax for a macro definition is the same as a procedure
@ -446,7 +518,7 @@
;; abstractions that would make this dissertation unnecessary.
(string->es "eval('(' + $$proc$_$es + ')')")))
;; We then store the macro by name in memory in `_macros'. When
;; We then store the macro by name in memory in `_env.macros'. When
;; invoked, it will apply the result of the above generated procedure
;; to `macro-compile-result' (defined below), which will produce the
;; ECMAScript code resulting from the macro application.
@ -484,7 +556,7 @@
(let ((macro-proc (%make-macro-proc sexp))
(macro-id (token-value (caadr sexp)))) ; XXX
(string->es
"_macros[$$macro$_$id] = function(){
"_env.macros[$$macro$_$id] = function(){
return $$macro$_$compile$_$result(
$$macro$_$proc.apply(this,arguments))};")
;; Because the macro procedure was evaluated at runtime, it would
@ -821,6 +893,44 @@
(ast-depth ast))))))
;; Generate ECMAScript-friendly parameter name for the given token T.
;;
;; The generated name will not have any environment references and is
;; suitable only for the immediate scope.
(define (tparam->es t)
(tname->id (token-value t)))
;; Predicate determining whether NAME should be output verbatim as an
;; ECMAScript identifier.
;;
;; This only returns #t for numbers.
(define (tname-verbatim? name)
(es:match (es:regexp "^\\d+$") name))
;; Generate ECMAScript to reference the variable associated with the token T
;; in the current environment.
;;
;; The "current" environment is relative to whatever context into which the
;; caller places this generated code---that is, the environment is
;; resolved by the runtime environment.
;;
;; If macro support is not yet compiled in, then this returns the identifier
;; name _without_ the environment, just as Birth.
(define (env-ref t)
(let ((name (if (token? t)
(token-value t)
t)))
(if (tname-verbatim? name)
name
(cond-expand
(cdfn-macro
(string-append "_env." (tname->id name)))
(else
(tname->id name))))))
;; Generate ECMAScript-friendly name from the given id.
;;
;; A subset of special characters that are acceptable in Scheme are
@ -829,7 +939,7 @@
;; reliably distinguished from one-another. Remember: this is temporary
;; code.
(define (tname->id name)
(if (es:match (es:regexp "^\\d+$") name)
(if (tname-verbatim? name)
name
(string-append
"$$" (es:replace (es:regexp "[^a-zA-Z0-9_]" "g")
@ -912,6 +1022,20 @@
(body->es rest ret))))) ; recurse
;; Place parameters PARAMS into the current environment.
;;
;; This is ugly so that Rebirth can support multiple implementations at
;; once---those with environment support and those without.
(define (env-params params)
(join "\n"
(map (lambda (param)
(if (string=? (token-value param) ".")
"" ; next param is the cdr
(string-append (env-ref param) " = "
(tparam->es param) ";")))
params)))
;; Compile variable or procedure definition into ES
;;
;; This performs a crude check to determine whether a procedure definition
@ -930,7 +1054,7 @@
(let* ((dfn (cadr t))
(id (tname->id (token-value dfn)))
(value (sexp->es (caddr t))))
(string-append "let " id "=" value)))
(string-append "let " id "=" value ";_env." id " = " id)))
;; Compile procedure definition into an ES function definition
@ -938,15 +1062,21 @@
;; This will fail if the given token is not a `define'.
(define (cdfn-proc t id-override)
;; e.g. (define (foo ...) body)
(let* ((dfn (cadr t))
(id (or id-override
(tname->id (token-value (car dfn)))))
(params (params->es (cdr dfn)))
(body (body->es (cddr t) #t)))
(let* ((dfn (cadr t))
(id (or id-override
(tname->id (token-value (car dfn)))))
(params (cdr dfn))
(fparams (params->es params))
(fenv (env-params params))
(body (body->es (cddr t) #t)))
;; this is the final format---each procedure becomes its own function
;; definition in ES
(string-append
"function " id "(" params ")\n{\n" body "\n}")))
"function " id "(" fparams ")\n{"
"return (function(_env){\n" fenv "\n"
body
"\n})(Object.create(_env));}"
(if (string=? id "") "" (string-append ";_env." id " = " id)))))
;; Quote an expression
@ -1064,13 +1194,13 @@
;; Determine whether the given name NAME represents a macro.
;;
;; If `string->es' is not yet supported, then this procedure always
;; yields `#f'. Otherwise, the compiler runtime `_macros' is consulted.
;; yields `#f'. Otherwise, the compiler runtime `_env.macros' is consulted.
;;
;; See `cdfn-macro' for more information.
(define (macro? name)
(cond-expand
(string->es
(string->es "_macros[$$name] !== undefined"))
(string->es "_env.macros[$$name] !== undefined"))
(else #f)))
@ -1084,11 +1214,10 @@
;; time within the context of the compiled program.
(define (apply-proc-or-macro fn args)
(if (macro? fn)
(string->es "_macros[$$fn].apply(null,$$args)")
(string->es "_env.macros[$$fn].apply(null,$$args)")
;; Procedures are produced as part of the compiler output.
(let* ((idfn (tname->id fn))
(argstr (join ", " (map sexp->es args))))
(string-append idfn "(" argstr ")"))))
(let ((argstr (join ", " (map sexp->es args))))
(string-append (env-ref fn) "(" argstr ")"))))
;; Primitive special forms.
@ -1283,7 +1412,7 @@
;; symbols have the same concerns as procedure definitions: the
;; identifiers generated need to be ES-friendly
(("symbol") (tname->id (token-value t)))
(("symbol") (env-ref t))
(else (error
(string-append
@ -1300,10 +1429,11 @@
;; The AST can be generated with `parse-lisp'.
(define (rebirth->ecmascript ast)
;; compiled output, wrapped in a self-executing function to limit scope
;; (note that we no longer depend on libprebirth)
(string-append "(function(){"
;; and create the toplevel environment (note that we no longer depend on
;; libprebirth)
(string-append "(function(_env){"
(join "\n\n" (map sexp->es ast))
"})();"))
"})({macros:{}});"))
;; at this point, this program can parse itself and output a CST (sans