rebirth: Unencapsulate all procedures

Many of the procedures were encapsulated within `prebirth->ecmascript';
these have been moved out so that they can be accessed by other procedures,
allowing me to organize the code how I please.

Which also makes me realize that the procedure name is incorrect.

* build-aux/bootstrap/rebirth.scm: Move all procedures out of
    `prebirth->ecmascript'.
master
Mike Gerwitz 2017-12-08 23:55:16 -05:00
parent a839301a12
commit 0f9b034a82
Signed by: mikegerwitz
GPG Key ID: 8C917B7F5DC51BA2
1 changed files with 380 additions and 378 deletions

View File

@ -382,6 +382,7 @@
(define (ast-tree ast) (cadr ast))
(define (ast-stack ast) (caddr ast))
;; perform a leftmost reduction on the token string
(define (toks->ast toks)
(fold
@ -443,388 +444,389 @@
(ast-depth ast))))))
;; Generate ECMAScript-friendly name from the given id.
;;
;; A subset of special characters that are acceptable in Scheme are
;; converted in an identifiable manner; others are simply converted to `$'
;; in a catch-all and therefore could result in conflicts and cannot be
;; reliably distinguished from one-another. Remember: this is temporary
;; code.
(define (tname->id name)
(if (js:match (js:regexp "^\\d+$") name)
name
(string-append
"$$" (js:replace (js:regexp "[^a-zA-Z0-9_]" "g")
(lambda (c)
(case c
(("-") "$_$")
(("?") "$7$")
(("@") "$a$")
(("!") "$b$")
((">") "$g$")
(("#") "$h$")
(("*") "$k$")
(("<") "$l$")
(("&") "$n$")
(("%") "$o$")
(("+") "$p$")
(("=") "$q$")
(("^") "$v$")
(("/") "$w$")
(("$") "$$")
(else "$")))
name))))
;; Join a list of strings XS on a delimiter DELIM
(define (join delim xs)
(if (pair? xs)
(fold (lambda (x str)
(string-append str delim x))
(car xs)
(cdr xs))
""))
;; Compile parameter list.
;;
;; This simply takes the value of the symbol and outputs it (formatted),
;; delimited by commas.
(define (params->es params)
(join ", " (map (lambda (t)
(tname->id (token-value t)))
params)))
;; Compile body s-expressions into ECMAScript
;;
;; This produces a 1:1 mapping of body XS s-expressions to ES statements,
;; recursively. The heavy lifting is done by `sexp->es'.
(define (body->es xs ret)
;; recursively process body XS until we're out of pairs
(if (not (pair? xs))
""
(let* ((x (car xs))
(rest (cdr xs))
(more? (or (not ret) (pair? rest))))
;; the result is a semicolon-delimited string of statements, with
;; the final statement prefixed with `return' unless (not ret)
(string-append
" "
(if more? "" "return ") ; prefix with `return' if last body exp
(sexp->es x) ";" ; process current body expression
(if (pair? rest) "\n" "")
(body->es rest ret))))) ; recurse
;; Compile variable or procedure definition into ES
;;
;; This performs a crude check to determine whether a procedure definition
;; was supplied: if the cadr of the given token T is itself token, then it
;; is considered to be a variable.
(define (cdfn t)
(if (token? (cadr t))
(cdfn-var t) ;; (define foo ...)
(cdfn-proc t))) ;; (define (foo ...) ...)
;; Compile variable definition into ES
;;
;; This compiles the token T into a simple let-assignment.
(define (cdfn-var t)
(let* ((dfn (cadr t))
(id (tname->id (token-value dfn)))
(value (sexp->es (caddr t))))
(string-append "let " id "=" value)))
;; Compile procedure definition into an ES function definition
;;
;; This will fail if the given token is not a `define'.
(define (cdfn-proc t)
;; e.g. (define (foo ...) body)
(let* ((dfn (cadr t))
(id (tname->id (token-value (car dfn))))
(params (params->es (cdr dfn)))
(body (body->es (cddr t) #t)))
;; this is the final format---each procedure becomes its own function
;; definition in ES
(string-append
"function " id "(" params ")\n{\n" body "\n};")))
;; Quote an expression
;;
;; If SEXP is a token, produce an ECMAScript Symbol. Otherwise,
;; recursively apply to each element in the list.
;;
;; TODO: This implementation isn't wholly correct---numbers, for example,
;; should not be converted to symbols, as they already are one.
(define (quote-sexp sexp)
(if (token? sexp)
(string-append "Symbol.for('" (sexp->es sexp) "')")
(string-append
"[" (join "," (map quote-sexp sexp)) "]")))
;; Quasiquote an expression
;;
;; A quasiquoted expression acts just like a quoted expression with one
;; notable exception---quoting can be escaped using special forms. For
;; example, each of these are equivalent:
;;
;; (quasiquote (a 1 2 (unquote (eq? 3 4))))
;; (list (quote a) 1 2 (eq? 3 4))
;; (quasiquote (a (unquote-splicing (list 1 2)) (unquote (eq? 3 4))))
;;
;; TODO/WARNING: Normally "(quasiquote a (unquote-splicing b))" would
;; produce "(a . b)" in a proper Lisp, but we do not yet support proper
;; pairs at the time that this procedure was written; all cdrs are assumed
;; to be lists. So do not do that---always splice lists.
(define (quasiquote-sexp sexp)
;; get type of token at car of pair, unless not a pair
(define (-sexp-maybe-type sexp)
(and (pair? sexp)
(token? (car sexp))
(token-value (car sexp))))
;; recursively process the sexp, handling various types of unquoting
(define (-quote-maybe sexp delim)
(if (pair? sexp)
(let* ((item (car sexp))
(rest (cdr sexp))
(type (-sexp-maybe-type item))
(add-delim (not (string=? type "unquote-splicing"))))
(string-append
(case type
;; escape quoting, nest within
(("unquote")
(string-append (if delim "," "")
(sexp->es (cadr item))))
;; escape quoting, splice list into parent expression
;; (lazy kluge warning)
(("unquote-splicing")
(string-append
"]).concat(" (sexp->es (cadr item)) ").concat(["))
;; anything else, we're still quasiquoting recursively
(else (string-append (if delim "," "")
(quasiquote-sexp item))))
;; continue processing this list
(-quote-maybe rest add-delim)))
""))
;; tokens fall back to normal quoting
(if (token? sexp)
(quote-sexp sexp)
(string-append
"([" (-quote-maybe sexp #f) "])")))
;; Function/procedure aliases and special forms
;;
;; And here we have what is probably the most grotesque part of this file.
;;
;; This map allows for a steady transition---items can be removed as they
;; are written in Prebirth Lisp. This should give us a sane (but still
;; simple) environment with which we can start to self-host.
;;
;; String values are simple function aliases. Function values take over
;; the compilation of that function and allow for defining special forms
;; (in place of macro support). The first argument FN is the name of the
;; function/procedure/form, and ARS is the list of arguments.
;;
;; These are by no means meant to be solid implementations; notable
;; deficiencies are documented, but don't expect this to work properly in
;; every case. They will be replaced with proper R7RS implementations in
;; the future (Rebirth).
(define (fnmap fn args t)
(case fn
(("js:console")
(string-append "console.log(" (map sexp->es args) ")"))
(("js:error")
(string-append "console.error(" (map sexp->es args) ")"))
;; very primitive cond-expand
(("cond-expand")
(let* ((clause (car args))
(feature (token-value (car clause)))
(body (cdr clause)))
(case feature
(("string->es") (body->es body #f))
(else ""))))
;; output raw code into the compiled ECMAScript (what could go wrong?)
(("string->es")
(token-value (car args)))
;; yes, there are more important things to do until we get to the
;; point where it's worth implementing proper tail calls
(("js:while")
(let ((pred (car args))
(body (cdr args)))
(string-append
"(function(__whilebrk){"
"while (" (sexp->es pred) "){\n"
(body->es body #f) " if (__whilebrk) break;\n"
"}\n"
"})(false)")))
(("js:break") "__whilebrk=true")
;; note that the unquote forms are only valid within a quasiquote; see
;; that procedure for the handling of those forms
(("quote") (quote-sexp (car args)))
(("quasiquote") (quasiquote-sexp (car args)))
(("define") (cdfn t))
(("lambda")
(let ((fnargs (car args))
(body (cdr args)))
(string-append
"function(" (join ", " (map sexp->es fnargs)) "){\n"
(body->es body #t)
"}")))
;; simple if statement with optional else, wrapped in a self-executing
;; function to simplify code generation (e.g. returning an if)
(("if")
(let ((pred (car args))
(t (cadr args))
(f (and (pair? (cddr args))
(caddr args))))
(string-append
"(function(){"
"if (_truep(" (sexp->es pred) ")){return " (sexp->es t) ";}"
(if (pair? f)
(string-append "else{return " (sexp->es f) ";}")
"")
"})()")))
;; and short-circuits, so we need to implement it as a special form
;; rather than an alias
(("and")
(string-append
"(function(__and){\n"
(join "" (map (lambda (expr)
(string-append
"__and = " (sexp->es expr) "; "
"if (!_truep(__and)) return false;\n"))
args))
"return __and;})()"))
;; or short-circuits, so we need to implement it as a special form
;; rather than an alias
(("or")
(string-append
"(function(__or){\n"
(join "" (map (lambda (expr)
(string-append
"__or = " (sexp->es expr) "; "
"if (_truep(__or)) return __or;\n"))
args))
"return false;})()"))
;; (let ((binding val) ...) ...body), compiled as a self-executing
;; function which allows us to easily represent the return value of
;; the entire expression while maintaining local scope
(("let*")
(let ((bindings (car args))
(body (cdr args)))
(string-append
"(function(){\n"
(join "" (map (lambda (binding)
(let ((var (car binding))
(init (cadr binding)))
(string-append " let " (sexp->es var)
" = " (sexp->es init) ";\n")))
bindings))
(body->es body #t) "\n"
" })()")))
;; similar to the above, but variables cannot reference one-another
(("let")
(let* ((bindings (car args))
(body (cdr args))
(fparams (join ", " (map sexp->es
(map car bindings))))
(fargs (join ", " (map sexp->es
(map cadr bindings)))))
(string-append "(function(" fparams "){\n"
(body->es body #t) "\n"
"})(" fargs ")")))
;; and here I thought Prebirth Lisp would be simple...but having
;; `case' support really keeps things much more tidy, so here we are
;; (note that it doesn't support the arrow form, nor does it support
;; expressions as data)
(("case")
(let ((key (car args))
(clauses (cdr args)))
(string-append
"(function(){const _key=" (sexp->es key) ";\n"
"switch (_key){\n"
(join ""
(map (lambda (data exprs)
(string-append
(if (and (token? data)
(string=? "else" (token-lexeme data)))
"default:\n"
(join ""
(map (lambda (datum)
(string-append
"case " (sexp->es datum) ":\n"))
data)))
(body->es exprs #t) "\n"))
(map car clauses)
(map cdr clauses)))
"}})()")))
(("set!")
(let ((varid (car args))
(val (cadr args)))
(string-append (sexp->es varid) " = " (sexp->es val))))
;; normal procedure application
(else (let* ((idfn (tname->id fn))
(argstr (join ", " (map sexp->es args))))
(string-append idfn "(" argstr ")")))))
;; Convert s-expressions or scalar into ECMAScript
;;
;; T may be either an array of tokens or a primitive token (e.g. string,
;; symbol). This procedure is applied recursively to T as needed if T is
;; a list.
(define (sexp->es t)
(if (not (list? t))
(error "unexpected non-list for sexp->es token"))
(if (token? t)
(case (token-type t)
;; strings output as-is (note that we don't escape double quotes,
;; because the method of escaping them is the same in Scheme as it
;; is in ECMAScript---a backslash)
(("string") (string-append "\"" (token-value t) "\""))
;; symbols have the same concerns as procedure definitions: the
;; identifiers generated need to be ES-friendly
(("symbol") (tname->id (token-value t)))
(else (error
(string-append
"cannot compile unknown token `" (token-type t) "'"))))
;; otherwise, process the expression
(fnmap (token-value (car t))
(cdr t)
t)))
;; Compile Prebirth Lisp AST into ECMAScript.
;;
;; The AST can be generated with `parse-lisp'.
(define (prebirth->ecmascript ast)
;; Generate ECMAScript-friendly name from the given id.
;;
;; A subset of special characters that are acceptable in Scheme are
;; converted in an identifiable manner; others are simply converted to `$'
;; in a catch-all and therefore could result in conflicts and cannot be
;; reliably distinguished from one-another. Remember: this is temporary
;; code.
(define (tname->id name)
(if (js:match (js:regexp "^\\d+$") name)
name
(string-append
"$$" (js:replace (js:regexp "[^a-zA-Z0-9_]" "g")
(lambda (c)
(case c
(("-") "$_$")
(("?") "$7$")
(("@") "$a$")
(("!") "$b$")
((">") "$g$")
(("#") "$h$")
(("*") "$k$")
(("<") "$l$")
(("&") "$n$")
(("%") "$o$")
(("+") "$p$")
(("=") "$q$")
(("^") "$v$")
(("/") "$w$")
(("$") "$$")
(else "$")))
name))))
;; Join a list of strings XS on a delimiter DELIM
(define (join delim xs)
(if (pair? xs)
(fold (lambda (x str)
(string-append str delim x))
(car xs)
(cdr xs))
""))
;; Compile parameter list.
;;
;; This simply takes the value of the symbol and outputs it (formatted),
;; delimited by commas.
(define (params->es params)
(join ", " (map (lambda (t)
(tname->id (token-value t)))
params)))
;; Compile body s-expressions into ECMAScript
;;
;; This produces a 1:1 mapping of body XS s-expressions to ES statements,
;; recursively. The heavy lifting is done by `sexp->es'.
(define (body->es xs ret)
;; recursively process body XS until we're out of pairs
(if (not (pair? xs))
""
(let* ((x (car xs))
(rest (cdr xs))
(more? (or (not ret) (pair? rest))))
;; the result is a semicolon-delimited string of statements, with
;; the final statement prefixed with `return' unless (not ret)
(string-append
" "
(if more? "" "return ") ; prefix with `return' if last body exp
(sexp->es x) ";" ; process current body expression
(if (pair? rest) "\n" "")
(body->es rest ret))))) ; recurse
;; Compile variable or procedure definition into ES
;;
;; This performs a crude check to determine whether a procedure definition
;; was supplied: if the cadr of the given token T is itself token, then it
;; is considered to be a variable.
(define (cdfn t)
(if (token? (cadr t))
(cdfn-var t) ;; (define foo ...)
(cdfn-proc t))) ;; (define (foo ...) ...)
;; Compile variable definition into ES
;;
;; This compiles the token T into a simple let-assignment.
(define (cdfn-var t)
(let* ((dfn (cadr t))
(id (tname->id (token-value dfn)))
(value (sexp->es (caddr t))))
(string-append "let " id "=" value)))
;; Compile procedure definition into an ES function definition
;;
;; This will fail if the given token is not a `define'.
(define (cdfn-proc t)
;; e.g. (define (foo ...) body)
(let* ((dfn (cadr t))
(id (tname->id (token-value (car dfn))))
(params (params->es (cdr dfn)))
(body (body->es (cddr t) #t)))
;; this is the final format---each procedure becomes its own function
;; definition in ES
(string-append
"function " id "(" params ")\n{\n" body "\n};")))
;; Quote an expression
;;
;; If SEXP is a token, produce an ECMAScript Symbol. Otherwise,
;; recursively apply to each element in the list.
;;
;; TODO: This implementation isn't wholly correct---numbers, for example,
;; should not be converted to symbols, as they already are one.
(define (quote-sexp sexp)
(if (token? sexp)
(string-append "Symbol.for('" (sexp->es sexp) "')")
(string-append
"[" (join "," (map quote-sexp sexp)) "]")))
;; Quasiquote an expression
;;
;; A quasiquoted expression acts just like a quoted expression with one
;; notable exception---quoting can be escaped using special forms. For
;; example, each of these are equivalent:
;;
;; (quasiquote (a 1 2 (unquote (eq? 3 4))))
;; (list (quote a) 1 2 (eq? 3 4))
;; (quasiquote (a (unquote-splicing (list 1 2)) (unquote (eq? 3 4))))
;;
;; TODO/WARNING: Normally "(quasiquote a (unquote-splicing b))" would
;; produce "(a . b)" in a proper Lisp, but we do not yet support proper
;; pairs at the time that this procedure was written; all cdrs are assumed
;; to be lists. So do not do that---always splice lists.
(define (quasiquote-sexp sexp)
;; get type of token at car of pair, unless not a pair
(define (-sexp-maybe-type sexp)
(and (pair? sexp)
(token? (car sexp))
(token-value (car sexp))))
;; recursively process the sexp, handling various types of unquoting
(define (-quote-maybe sexp delim)
(if (pair? sexp)
(let* ((item (car sexp))
(rest (cdr sexp))
(type (-sexp-maybe-type item))
(add-delim (not (string=? type "unquote-splicing"))))
(string-append
(case type
;; escape quoting, nest within
(("unquote")
(string-append (if delim "," "")
(sexp->es (cadr item))))
;; escape quoting, splice list into parent expression
;; (lazy kluge warning)
(("unquote-splicing")
(string-append
"]).concat(" (sexp->es (cadr item)) ").concat(["))
;; anything else, we're still quasiquoting recursively
(else (string-append (if delim "," "")
(quasiquote-sexp item))))
;; continue processing this list
(-quote-maybe rest add-delim)))
""))
;; tokens fall back to normal quoting
(if (token? sexp)
(quote-sexp sexp)
(string-append
"([" (-quote-maybe sexp #f) "])")))
;; Function/procedure aliases and special forms
;;
;; And here we have what is probably the most grotesque part of this file.
;;
;; This map allows for a steady transition---items can be removed as they
;; are written in Prebirth Lisp. This should give us a sane (but still
;; simple) environment with which we can start to self-host.
;;
;; String values are simple function aliases. Function values take over
;; the compilation of that function and allow for defining special forms
;; (in place of macro support). The first argument FN is the name of the
;; function/procedure/form, and ARS is the list of arguments.
;;
;; These are by no means meant to be solid implementations; notable
;; deficiencies are documented, but don't expect this to work properly in
;; every case. They will be replaced with proper R7RS implementations in
;; the future (Rebirth).
(define (fnmap fn args t)
(case fn
(("js:console")
(string-append "console.log(" (map sexp->es args) ")"))
(("js:error")
(string-append "console.error(" (map sexp->es args) ")"))
;; very primitive cond-expand
(("cond-expand")
(let* ((clause (car args))
(feature (token-value (car clause)))
(body (cdr clause)))
(case feature
(("string->es") (body->es body #f))
(else ""))))
;; output raw code into the compiled ECMAScript (what could go wrong?)
(("string->es")
(token-value (car args)))
;; yes, there are more important things to do until we get to the
;; point where it's worth implementing proper tail calls
(("js:while")
(let ((pred (car args))
(body (cdr args)))
(string-append
"(function(__whilebrk){"
"while (" (sexp->es pred) "){\n"
(body->es body #f) " if (__whilebrk) break;\n"
"}\n"
"})(false)")))
(("js:break") "__whilebrk=true")
;; note that the unquote forms are only valid within a quasiquote; see
;; that procedure for the handling of those forms
(("quote") (quote-sexp (car args)))
(("quasiquote") (quasiquote-sexp (car args)))
(("define") (cdfn t))
(("lambda")
(let ((fnargs (car args))
(body (cdr args)))
(string-append
"function(" (join ", " (map sexp->es fnargs)) "){\n"
(body->es body #t)
"}")))
;; simple if statement with optional else, wrapped in a self-executing
;; function to simplify code generation (e.g. returning an if)
(("if")
(let ((pred (car args))
(t (cadr args))
(f (and (pair? (cddr args))
(caddr args))))
(string-append
"(function(){"
"if (_truep(" (sexp->es pred) ")){return " (sexp->es t) ";}"
(if (pair? f)
(string-append "else{return " (sexp->es f) ";}")
"")
"})()")))
;; and short-circuits, so we need to implement it as a special form
;; rather than an alias
(("and")
(string-append
"(function(__and){\n"
(join "" (map (lambda (expr)
(string-append
"__and = " (sexp->es expr) "; "
"if (!_truep(__and)) return false;\n"))
args))
"return __and;})()"))
;; or short-circuits, so we need to implement it as a special form
;; rather than an alias
(("or")
(string-append
"(function(__or){\n"
(join "" (map (lambda (expr)
(string-append
"__or = " (sexp->es expr) "; "
"if (_truep(__or)) return __or;\n"))
args))
"return false;})()"))
;; (let ((binding val) ...) ...body), compiled as a self-executing
;; function which allows us to easily represent the return value of
;; the entire expression while maintaining local scope
(("let*")
(let ((bindings (car args))
(body (cdr args)))
(string-append
"(function(){\n"
(join "" (map (lambda (binding)
(let ((var (car binding))
(init (cadr binding)))
(string-append " let " (sexp->es var)
" = " (sexp->es init) ";\n")))
bindings))
(body->es body #t) "\n"
" })()")))
;; similar to the above, but variables cannot reference one-another
(("let")
(let* ((bindings (car args))
(body (cdr args))
(fparams (join ", " (map sexp->es
(map car bindings))))
(fargs (join ", " (map sexp->es
(map cadr bindings)))))
(string-append "(function(" fparams "){\n"
(body->es body #t) "\n"
"})(" fargs ")")))
;; and here I thought Prebirth Lisp would be simple...but having
;; `case' support really keeps things much more tidy, so here we are
;; (note that it doesn't support the arrow form, nor does it support
;; expressions as data)
(("case")
(let ((key (car args))
(clauses (cdr args)))
(string-append
"(function(){const _key=" (sexp->es key) ";\n"
"switch (_key){\n"
(join ""
(map (lambda (data exprs)
(string-append
(if (and (token? data)
(string=? "else" (token-lexeme data)))
"default:\n"
(join ""
(map (lambda (datum)
(string-append
"case " (sexp->es datum) ":\n"))
data)))
(body->es exprs #t) "\n"))
(map car clauses)
(map cdr clauses)))
"}})()")))
(("set!")
(let ((varid (car args))
(val (cadr args)))
(string-append (sexp->es varid) " = " (sexp->es val))))
;; normal procedure application
(else (let* ((idfn (tname->id fn))
(argstr (join ", " (map sexp->es args))))
(string-append idfn "(" argstr ")")))))
;; Convert s-expressions or scalar into ECMAScript
;;
;; T may be either an array of tokens or a primitive token (e.g. string,
;; symbol). This procedure is applied recursively to T as needed if T is
;; a list.
(define (sexp->es t)
(if (not (list? t))
(error "unexpected non-list for sexp->es token"))
(if (token? t)
(case (token-type t)
;; strings output as-is (note that we don't escape double quotes,
;; because the method of escaping them is the same in Scheme as it
;; is in ECMAScript---a backslash)
(("string") (string-append "\"" (token-value t) "\""))
;; symbols have the same concerns as procedure definitions: the
;; identifiers generated need to be ES-friendly
(("symbol") (tname->id (token-value t)))
(else (error
(string-append
"cannot compile unknown token `" (token-type t) "'"))))
;; otherwise, process the expression
(fnmap (token-value (car t))
(cdr t)
t)))
;; compiled output, wrapped in a self-executing function to limit scope
;; (note that we no longer depend on libprebirth)
(string-append "(function(){"