677 lines
26 KiB
Scheme
677 lines
26 KiB
Scheme
|
;;;; ECMAScript compiler for Rebirth Lisp
|
|||
|
;;;;
|
|||
|
;;;; Copyright (C) 2017, 2018 Mike Gerwitz
|
|||
|
;;;;
|
|||
|
;;;; This file is part of Ulambda Scheme.
|
|||
|
;;;;
|
|||
|
;;;; Ulambda Scheme is free software: you can redistribute it and/or modify
|
|||
|
;;;; it under the terms of the GNU Affero General Public License as
|
|||
|
;;;; published by the Free Software Foundation, either version 3 of the
|
|||
|
;;;; License, or (at your option) any later version.
|
|||
|
;;;;
|
|||
|
;;;; This program is distributed in the hope that it will be useful,
|
|||
|
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|||
|
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|||
|
;;;; GNU General Public License for more details.
|
|||
|
;;;;
|
|||
|
;;;; You should have received a copy of the GNU Affero General Public License
|
|||
|
;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||
|
;;;;
|
|||
|
|
|||
|
|
|||
|
|
|||
|
;;; Commentary:
|
|||
|
|
|||
|
;;; THIS IS BOOTSTRAP CODE INTENDED FOR USE ONLY IN REBIRTH.
|
|||
|
;;;
|
|||
|
;;;
|
|||
|
;;; This defines the lexer, parser, and core of the compiler. Note that
|
|||
|
;;; most of the actual compiling happens with the macros defined in
|
|||
|
;;; `es.scm'.
|
|||
|
;;;
|
|||
|
;;; The core structure persists from Birth, along with much of the original
|
|||
|
;;; code. Certain parts have been simplified or moved into other files,
|
|||
|
;;; while other parts have been extended.
|
|||
|
;;;
|
|||
|
|
|||
|
;;; Code:
|
|||
|
|
|||
|
|
|||
|
|
|||
|
;; Convert source input into a string of tokens.
|
|||
|
;;
|
|||
|
;; This is the lexer. Whitespace is ignored. The grammar consists of
|
|||
|
;; simple s-expressions.
|
|||
|
;;
|
|||
|
;; Tokens are produced with `make-token'. The source SRC will be
|
|||
|
;; left-truncated as input is processed. POS exists for producing metadata
|
|||
|
;; for error reporting---it has no impact on parsing.
|
|||
|
;;
|
|||
|
;; This implementation was originally recursive and immutable, but the stack
|
|||
|
;; was being exhausted, so it was refactored into an inferior
|
|||
|
;; implementation. Note the use of `es:while' and `es:break'---these are
|
|||
|
;; quick fixes to the problem of stack exhaustion in browsers (where we have
|
|||
|
;; no control over the stack limit); proper tail call support will come
|
|||
|
;; later when we have a decent architecture in place.
|
|||
|
;;
|
|||
|
;; The result is a list of tokens. See `token' for the format.
|
|||
|
(define (lex src pos)
|
|||
|
(define (es:match-regexp re s)
|
|||
|
(es:match (es:regexp re) s))
|
|||
|
|
|||
|
(let ((toks (list)))
|
|||
|
(es:while #t ; browser stack workaround
|
|||
|
(let* ((ws (or (es:match-regexp "^\\s+"
|
|||
|
src)
|
|||
|
(list "")))
|
|||
|
(ws-len (string-length (car ws)))
|
|||
|
(trim (substring src ws-len)) ; ignore whitespace, if any
|
|||
|
(newpos (+ pos ws-len))) ; adj pos to account for removed ws
|
|||
|
|
|||
|
(if (string=? "" trim)
|
|||
|
(es:break) ; EOF and we're done
|
|||
|
|
|||
|
;; normally we'd use `string-ref' here, but then we'd have to
|
|||
|
;; implement Scheme characters, so let's keep this simple and keep
|
|||
|
;; with strings
|
|||
|
(let* ((ch (substring trim 0 1))
|
|||
|
(t (case ch
|
|||
|
;; comments extend until the end of the line
|
|||
|
((";") (let ((eol (es:match-regexp "^(.*?)(\\n|$)" trim)))
|
|||
|
(make-token "comment" (cadr eol) trim newpos)))
|
|||
|
|
|||
|
;; left and right parenthesis are handled in the same
|
|||
|
;; manner: they produce distinct tokens with
|
|||
|
;; single-character lexemes
|
|||
|
(("(") (make-token "open" ch trim newpos))
|
|||
|
((")") (make-token "close" ch trim newpos))
|
|||
|
|
|||
|
;; strings are delimited by opening and closing ASCII
|
|||
|
;; double quotes, which can be escaped with a
|
|||
|
;; backslash
|
|||
|
(("\"") (let ((str (es:match-regexp
|
|||
|
"^\"(|(?:.|\\\n)*?[^\\\\])\""
|
|||
|
trim)))
|
|||
|
(or str (parse-error
|
|||
|
src pos
|
|||
|
"missing closing string delimiter"))
|
|||
|
;; a string token consists of the entire
|
|||
|
;; string including quotes as its lexeme,
|
|||
|
;; but its value will be the value of the
|
|||
|
;; string without quotes due to the `str'
|
|||
|
;; match group (see `token')
|
|||
|
(make-token "string" str trim newpos)))
|
|||
|
|
|||
|
(else
|
|||
|
;; anything else is considered a symbol up until
|
|||
|
;; whitespace or any of the aforementioned
|
|||
|
;; delimiters
|
|||
|
(let ((symbol (es:match-regexp "^[^\\s()\"]+"
|
|||
|
trim)))
|
|||
|
(make-token "symbol" symbol trim newpos))))))
|
|||
|
|
|||
|
;; yikes---see notes in docblock with regards to why
|
|||
|
;; we're using mutators here
|
|||
|
(set! toks (append toks (list (car t))))
|
|||
|
(set! src (cadr t))
|
|||
|
(set! pos (caddr t))))))
|
|||
|
toks))
|
|||
|
|
|||
|
|
|||
|
|
|||
|
;; Throw an error with a window of surrounding source code.
|
|||
|
;;
|
|||
|
;; The "window" is simply ten characters to the left and right of the
|
|||
|
;; first character of the source input SRC that resulted in the error.
|
|||
|
;; It's a little more than useless.
|
|||
|
(define (parse-error src pos msg)
|
|||
|
(let ((window (substring src (- pos 10) (+ pos 10))))
|
|||
|
(error (string-append msg " (pos " pos "): " window)
|
|||
|
src)))
|
|||
|
|
|||
|
|
|||
|
|
|||
|
;; Produce a token, left-truncate src, and update pos.
|
|||
|
;;
|
|||
|
;; Unlike the JS Prebirth implementation which uses a key/value object,
|
|||
|
;; we're just using a simple list.
|
|||
|
;;
|
|||
|
;; The expected arguments are: the token type TYPE, the match group or
|
|||
|
;; string MATCH, left-truncated source code SRC, and the position POS
|
|||
|
;; relative to the original source code.
|
|||
|
(define (make-token type match src pos)
|
|||
|
(let* ((parts (if (list? match) match (list match match)))
|
|||
|
(lexeme (car parts))
|
|||
|
;; the value is the first group of the match (indicating what we
|
|||
|
;; are actually interested in), and the lexeme is the full match,
|
|||
|
;; which might include, for example, string delimiters
|
|||
|
(value (or (and (pair? (cdr parts))
|
|||
|
(cadr parts))
|
|||
|
lexeme))
|
|||
|
(len (string-length lexeme)))
|
|||
|
|
|||
|
;; produce token and recurse on `lex', left-truncating the source
|
|||
|
;; string to discard what we have already processed
|
|||
|
(list (list (quote token) type lexeme value pos)
|
|||
|
(substring src len)
|
|||
|
(+ pos len))))
|
|||
|
|
|||
|
|
|||
|
;; various accessor procedures for token lists (we're Birth Lisp here,
|
|||
|
;; so no record support or anything fancy!)
|
|||
|
(define (token? t) (and (pair? t) (symbol=? (quote token) (car t))))
|
|||
|
(define (token-type t) (cadr t))
|
|||
|
(define (token-lexeme t) (caddr t))
|
|||
|
(define (token-value t) (cadddr t))
|
|||
|
(define (token-pos t) (caddddr t))
|
|||
|
|
|||
|
|
|||
|
;; Produce an AST from the given string SRC of sexps
|
|||
|
;;
|
|||
|
;; This is essentially the CST with whitespace removed. It first invokes
|
|||
|
;; the lexer to produce a token string from the input sexps SRC. From this,
|
|||
|
;; it verifies only proper nesting (that SRC does not close sexps too early
|
|||
|
;; and that EOF isn't reached before all sexps are closed) and produces an
|
|||
|
;; AST that is an isomorphism of the original sexps.
|
|||
|
(define (parse-lisp src)
|
|||
|
;; accessor methods to make you and me less confused
|
|||
|
(define (ast-depth ast) (car ast))
|
|||
|
(define (ast-tree ast) (cadr ast))
|
|||
|
(define (ast-stack ast) (caddr ast))
|
|||
|
|
|||
|
|
|||
|
;; perform a leftmost reduction on the token string
|
|||
|
(define (toks->ast toks)
|
|||
|
(fold
|
|||
|
(lambda (token result)
|
|||
|
(let ((depth (ast-depth result))
|
|||
|
(xs (ast-tree result))
|
|||
|
(stack (ast-stack result))
|
|||
|
(type (token-type token))
|
|||
|
(pos (token-pos token)))
|
|||
|
|
|||
|
;; there are very few token types to deal with (again, this is a
|
|||
|
;; very simple bootstrap lisp)
|
|||
|
(case type
|
|||
|
;; ignore comments
|
|||
|
(("comment") result)
|
|||
|
|
|||
|
;; when beginning a new expression, place the expression
|
|||
|
;; currently being processed onto a stack, allocate a new list,
|
|||
|
;; and we'll continue processing into that new list
|
|||
|
(("open") (list (+ depth 1)
|
|||
|
(list)
|
|||
|
(cons xs stack)))
|
|||
|
|
|||
|
;; once we reach the end of the expression, pop the parent off of
|
|||
|
;; the stack and append the new list to it
|
|||
|
(("close") (if (zero? depth)
|
|||
|
(parse-error src pos
|
|||
|
"unexpected closing parenthesis")
|
|||
|
(list (- depth 1)
|
|||
|
(append (car stack) (list xs))
|
|||
|
(cdr stack))))
|
|||
|
|
|||
|
;; strings and symbols (we cheat and just consider everything,
|
|||
|
;; including numbers and such, to be symbols) are just copied
|
|||
|
;; in place
|
|||
|
(("string" "symbol") (list depth
|
|||
|
(append xs (list token))
|
|||
|
stack))
|
|||
|
|
|||
|
;; we should never encounter anything else unless there's a bug
|
|||
|
;; in the tokenizer or we forget a token type above
|
|||
|
(else (parse-error
|
|||
|
src pos (string-append
|
|||
|
"unexpected token `" type "'"))))))
|
|||
|
(list 0 (list) (list)) ; initial 0 depth; empty tree; expr stack
|
|||
|
toks))
|
|||
|
|
|||
|
|
|||
|
;; lex the input SRC and pass it to `toks->ast' to generate the AST;
|
|||
|
;; if the depth is non-zero after we're done, then we're unbalanced.
|
|||
|
(let* ((toks (lex src 0))
|
|||
|
(ast (toks->ast toks)))
|
|||
|
(if (zero? (ast-depth ast))
|
|||
|
(ast-tree ast)
|
|||
|
;; if we terminate at a non-zero depth, that means there ar still
|
|||
|
;; open sexps
|
|||
|
(error (string-append
|
|||
|
"unexpected end of input at depth "
|
|||
|
(ast-depth ast))))))
|
|||
|
|
|||
|
|
|||
|
;; Generate ECMAScript-friendly parameter name for the given token T.
|
|||
|
;;
|
|||
|
;; The generated name will not have any environment references and is
|
|||
|
;; suitable only for the immediate scope.
|
|||
|
(define (tparam->es t)
|
|||
|
(tname->id (token-value t)))
|
|||
|
|
|||
|
|
|||
|
;; Predicate determining whether NAME should be output verbatim as an
|
|||
|
;; ECMAScript identifier.
|
|||
|
;;
|
|||
|
;; This only returns #t for numbers.
|
|||
|
(define (tname-verbatim? name)
|
|||
|
(es:match (es:regexp "^-?(\\d+(\\.\\d+)?|\\.\\d+)$") name))
|
|||
|
|
|||
|
|
|||
|
;; Generate ECMAScript to reference the variable associated with the token T
|
|||
|
;; in the current environment.
|
|||
|
;;
|
|||
|
;; The "current" environment is relative to whatever context into which the
|
|||
|
;; caller places this generated code---that is, the environment is
|
|||
|
;; resolved by the runtime environment.
|
|||
|
;;
|
|||
|
;; If macro support is not yet compiled in, then this returns the identifier
|
|||
|
;; name _without_ the environment, just as Birth.
|
|||
|
(define (env-ref t)
|
|||
|
(let ((name (if (token? t)
|
|||
|
(token-value t)
|
|||
|
t)))
|
|||
|
(if (tname-verbatim? name)
|
|||
|
name
|
|||
|
(cond-expand
|
|||
|
(cdfn-macro
|
|||
|
(string-append "_env." (tname->id name)))
|
|||
|
(else
|
|||
|
(tname->id name))))))
|
|||
|
|
|||
|
|
|||
|
;; Generate ECMAScript-friendly name from the given id.
|
|||
|
;;
|
|||
|
;; A subset of special characters that are acceptable in Scheme are
|
|||
|
;; converted in an identifiable manner; others are simply converted to `$'
|
|||
|
;; in a catch-all and therefore could result in conflicts and cannot be
|
|||
|
;; reliably distinguished from one-another. Remember: this is temporary
|
|||
|
;; code.
|
|||
|
(define (tname->id name)
|
|||
|
(if (tname-verbatim? name)
|
|||
|
name
|
|||
|
(string-append
|
|||
|
"$$" (es:replace (es:regexp "[^a-zA-Z0-9_]" "g")
|
|||
|
(lambda (c)
|
|||
|
(case c
|
|||
|
(("-") "$_$")
|
|||
|
(("?") "$7$")
|
|||
|
(("@") "$a$")
|
|||
|
(("!") "$b$")
|
|||
|
((">") "$g$")
|
|||
|
(("#") "$h$")
|
|||
|
(("*") "$k$")
|
|||
|
(("<") "$l$")
|
|||
|
(("&") "$n$")
|
|||
|
(("%") "$o$")
|
|||
|
(("+") "$p$")
|
|||
|
(("=") "$q$")
|
|||
|
(("^") "$v$")
|
|||
|
(("/") "$w$")
|
|||
|
(("$") "$$")
|
|||
|
(else "$")))
|
|||
|
name))))
|
|||
|
|
|||
|
;; Join a list of strings XS on a delimiter DELIM
|
|||
|
(define (join delim xs)
|
|||
|
(if (pair? xs)
|
|||
|
(fold (lambda (x str)
|
|||
|
(string-append str delim x))
|
|||
|
(car xs)
|
|||
|
(cdr xs))
|
|||
|
""))
|
|||
|
|
|||
|
|
|||
|
;; Compile parameter list.
|
|||
|
;;
|
|||
|
;; This takes the value of the symbol and outputs it (formatted), delimited
|
|||
|
;; by commas.
|
|||
|
;;
|
|||
|
;; Since we do not support actual pairs (yet), the "." syntax that normally
|
|||
|
;; denotes the cdr is retained and presents itself here. The form "(arg1,
|
|||
|
;; arg2 . rest)" creates a list `rest' containing all remaining arguments
|
|||
|
;; after that point. Conveniently, ECMAScript Harmony supports this
|
|||
|
;; natively with the "..." syntax.
|
|||
|
(define (params->es params)
|
|||
|
(define (%param-conv params)
|
|||
|
(let* ((param (car params))
|
|||
|
(name (token-value param))
|
|||
|
(id (tname->id name))
|
|||
|
(rest (cdr params)))
|
|||
|
(if (string=? name ".")
|
|||
|
(list (string-append
|
|||
|
"..." (car (%param-conv rest))))
|
|||
|
(if (pair? rest)
|
|||
|
(cons id (%param-conv rest))
|
|||
|
(list id)))))
|
|||
|
|
|||
|
(if (pair? params)
|
|||
|
(join "," (%param-conv params))
|
|||
|
""))
|
|||
|
|
|||
|
|
|||
|
;; Compile body s-expressions into ECMAScript
|
|||
|
;;
|
|||
|
;; This produces a 1:1 mapping of body XS s-expressions to ES statements,
|
|||
|
;; recursively. The heavy lifting is done by `sexp->es'.
|
|||
|
(define (body->es xs ret)
|
|||
|
;; recursively process body XS until we're out of pairs
|
|||
|
(if (not (pair? xs))
|
|||
|
""
|
|||
|
(let* ((x (car xs))
|
|||
|
(rest (cdr xs))
|
|||
|
(more? (or (not ret) (pair? rest))))
|
|||
|
;; the result is a semicolon-delimited string of statements, with
|
|||
|
;; the final statement prefixed with `return' unless (not ret)
|
|||
|
(string-append
|
|||
|
" "
|
|||
|
(if more? "" "return ") ; prefix with `return' if last body exp
|
|||
|
(sexp->es x) ";" ; process current body expression
|
|||
|
(if (pair? rest) "\n" "")
|
|||
|
(body->es rest ret))))) ; recurse
|
|||
|
|
|||
|
|
|||
|
;; Place parameters PARAMS into the current environment.
|
|||
|
;;
|
|||
|
;; This is ugly so that Rebirth can support multiple implementations at
|
|||
|
;; once---those with environment support and those without.
|
|||
|
(define (env-params params)
|
|||
|
(join "\n"
|
|||
|
(map (lambda (param)
|
|||
|
(if (string=? (token-value param) ".")
|
|||
|
"" ; next param is the cdr
|
|||
|
(string-append (env-ref param) " = "
|
|||
|
(tparam->es param) ";")))
|
|||
|
params)))
|
|||
|
|
|||
|
|
|||
|
;; Compile variable or procedure definition into ES
|
|||
|
;;
|
|||
|
;; This performs a crude check to determine whether a procedure definition
|
|||
|
;; was supplied: if the cadr of the given token T is itself token, then it
|
|||
|
;; is considered to be a variable.
|
|||
|
(define (cdfn t)
|
|||
|
(if (token? (cadr t))
|
|||
|
(cdfn-var t) ;; (define foo ...)
|
|||
|
(cdfn-proc t #f))) ;; (define (foo ...) ...)
|
|||
|
|
|||
|
|
|||
|
;; Compile variable definition into ES
|
|||
|
;;
|
|||
|
;; This compiles the token T into a simple let-assignment.
|
|||
|
(define (cdfn-var t)
|
|||
|
(let* ((dfn (cadr t))
|
|||
|
(id (tname->id (token-value dfn)))
|
|||
|
(value (sexp->es (caddr t))))
|
|||
|
(string-append "let " id "=" value ";_env." id " = " id)))
|
|||
|
|
|||
|
|
|||
|
;; Compile procedure definition into an ES function definition
|
|||
|
;;
|
|||
|
;; This will fail if the given token is not a `define'.
|
|||
|
;;
|
|||
|
;; The output does something peculiar: it not only assigns to the active
|
|||
|
;; scope, but also to the root of the environment, which has the effect of
|
|||
|
;; making the procedure available to _everything_. The reason for this is a
|
|||
|
;; kluge to make procedures available to macros during compilation without
|
|||
|
;; having to wait for a rebirth repass. But this does have its issues and
|
|||
|
;; it's important to understand that this is a temporary solution until
|
|||
|
;; Ulambda has some level of static analysis.
|
|||
|
;;
|
|||
|
;; Care needs to be taken to make sure, regardless of scope, procedures of
|
|||
|
;; the same name are not defined if used within macros, otherwise the latter
|
|||
|
;; (again, regardless of scope) in the file will take precedence. This
|
|||
|
;; behavior will not be observed by execution of compiled code, though,
|
|||
|
;; because the scope will have the correct version. However, if a procedure
|
|||
|
;; is _not_ in scope, then rather than being undefined, the one assigned to
|
|||
|
;; root would be available.
|
|||
|
;;
|
|||
|
;; The generated ECMAScript is evaluated immediately to make it available to
|
|||
|
;; macros during the compilation process.
|
|||
|
(define (cdfn-proc t id-override)
|
|||
|
;; e.g. (define (foo ...) body)
|
|||
|
(let* ((dfn (cadr t))
|
|||
|
(id (or id-override
|
|||
|
(tname->id (token-value (car dfn)))))
|
|||
|
(named? (not (string=? id "")))
|
|||
|
(params (cdr dfn))
|
|||
|
(fparams (params->es params))
|
|||
|
(fenv (env-params params))
|
|||
|
(body (body->es (cddr t) #t)))
|
|||
|
;; this is the final format---each procedure becomes its own function
|
|||
|
;; definition in ES
|
|||
|
(let ((es (string-append
|
|||
|
"function " id "(" fparams ")\n{"
|
|||
|
"return (function(_env){\n" fenv "\n"
|
|||
|
body
|
|||
|
"\n})(Object.create(_env));}"
|
|||
|
(if named?
|
|||
|
(string-append ";_env." id " = " id
|
|||
|
";_env.root." id " = " id ";")
|
|||
|
""))))
|
|||
|
;; Immediately evaluate to make available to macros during
|
|||
|
;; compilation. See procedure notes above.
|
|||
|
(cond-expand
|
|||
|
(string->es
|
|||
|
(if named? (string->es "eval($$es)"))))
|
|||
|
es)))
|
|||
|
|
|||
|
|
|||
|
;; Quote an expression
|
|||
|
;;
|
|||
|
;; If SEXP is a token, produce an ECMAScript Symbol. Otherwise,
|
|||
|
;; recursively apply to each element in the list.
|
|||
|
;;
|
|||
|
;; TODO: This implementation isn't wholly correct---numbers, for example,
|
|||
|
;; should not be converted to symbols, as they already are one.
|
|||
|
(define (quote-sexp sexp)
|
|||
|
(if (token? sexp)
|
|||
|
(case (token-type sexp)
|
|||
|
(("string") (sexp->es sexp))
|
|||
|
(else
|
|||
|
(string-append "Symbol.for('" (token-value sexp) "')")))
|
|||
|
(string-append
|
|||
|
"[" (join "," (map quote-sexp sexp)) "]")))
|
|||
|
|
|||
|
|
|||
|
;; Quasiquote an expression
|
|||
|
;;
|
|||
|
;; A quasiquoted expression acts just like a quoted expression with one
|
|||
|
;; notable exception---quoting can be escaped using special forms. For
|
|||
|
;; example, each of these are equivalent:
|
|||
|
;;
|
|||
|
;; (quasiquote (a 1 2 (unquote (eq? 3 4))))
|
|||
|
;; (list (quote a) 1 2 (eq? 3 4))
|
|||
|
;; (quasiquote (a (unquote-splicing (list 1 2)) (unquote (eq? 3 4))))
|
|||
|
;;
|
|||
|
;; TODO/WARNING: Normally "(quasiquote a (unquote-splicing b))" would
|
|||
|
;; produce "(a . b)" in a proper Lisp, but we do not yet support proper
|
|||
|
;; pairs at the time that this procedure was written; all cdrs are assumed
|
|||
|
;; to be lists. So do not do that---always splice lists.
|
|||
|
(define (quasiquote-sexp sexp)
|
|||
|
;; get type of token at car of pair, unless not a pair
|
|||
|
(define (%sexp-maybe-type sexp)
|
|||
|
(and (pair? sexp)
|
|||
|
(token? (car sexp))
|
|||
|
(token-value (car sexp))))
|
|||
|
|
|||
|
;; recursively process the sexp, handling various types of unquoting
|
|||
|
(define (%quote-maybe sexp delim)
|
|||
|
(if (pair? sexp)
|
|||
|
(let* ((item (car sexp))
|
|||
|
(rest (cdr sexp))
|
|||
|
(type (%sexp-maybe-type item))
|
|||
|
(add-delim (not (or (string=? type "unquote-splicing")
|
|||
|
(string=? type "unquote@")))))
|
|||
|
(string-append
|
|||
|
(case type
|
|||
|
;; escape quoting, nest within
|
|||
|
(("unquote")
|
|||
|
(string-append (if delim "," "")
|
|||
|
(sexp->es (cadr item))))
|
|||
|
|
|||
|
;; escape quoting, splice list into parent expression
|
|||
|
;; (lazy kluge warning), along with an alias for brevity
|
|||
|
;; given that we lack the ",@" syntax right now
|
|||
|
(("unquote-splicing" "unquote@")
|
|||
|
(string-append
|
|||
|
"]).concat(" (sexp->es (cadr item)) ").concat(["))
|
|||
|
|
|||
|
;; anything else, we're still quasiquoting recursively
|
|||
|
(else (string-append (if delim "," "")
|
|||
|
(quasiquote-sexp item))))
|
|||
|
|
|||
|
;; continue processing this list
|
|||
|
(%quote-maybe rest add-delim)))
|
|||
|
""))
|
|||
|
|
|||
|
;; tokens fall back to normal quoting
|
|||
|
(if (token? sexp)
|
|||
|
(quote-sexp sexp)
|
|||
|
(string-append
|
|||
|
"([" (%quote-maybe sexp #f) "])")))
|
|||
|
|
|||
|
|
|||
|
;; Statically expand expressions based on implementation features
|
|||
|
;;
|
|||
|
;; Support for `cond-expand' allows Rebirth to introduce new features each
|
|||
|
;; time that it is compiled. If matched, expressions will be evaluated as
|
|||
|
;; if they were entered in place of the `cond-expand' itself; otherwise,
|
|||
|
;; the entire `cond-expand' expression as a whole will be discarded.
|
|||
|
;;
|
|||
|
;; Birth will always discard `cond-expand' expressions unless they contain
|
|||
|
;; an `else' clause, which permits us to compile on the first pass without
|
|||
|
;; error.
|
|||
|
(define (expand-cond-expand args)
|
|||
|
(if (pair? args)
|
|||
|
(let* ((clause (car args))
|
|||
|
(feature (token-value (car clause)))
|
|||
|
(body (cdr clause)))
|
|||
|
;; now we get meta
|
|||
|
(cond-expand
|
|||
|
(string->es
|
|||
|
(case feature
|
|||
|
(("string->es" "else") (body->es body #f))
|
|||
|
(else (if (es:defined? feature)
|
|||
|
(body->es body #f)
|
|||
|
(expand-cond-expand (cdr args))))))
|
|||
|
;; if we're not yet compiled with Rebirth, then string->es will
|
|||
|
;; not yet be available---but it _will_ be in Rebirth, so
|
|||
|
;; compile cond-expand such that it marks it as supported
|
|||
|
(else
|
|||
|
(case feature
|
|||
|
;; these two are always supported in Rebirth Lisp
|
|||
|
(("string->es" "else") (body->es body #f))
|
|||
|
;; keep recursing until we find something (this allows us to
|
|||
|
;; short-circuit, most notably with "else")
|
|||
|
(else
|
|||
|
(expand-cond-expand (cdr args)))))))
|
|||
|
""))
|
|||
|
|
|||
|
|
|||
|
;; Determine whether the given name NAME represents a macro.
|
|||
|
;;
|
|||
|
;; If `string->es' is not yet supported, then this procedure always
|
|||
|
;; yields `#f'. Otherwise, the compiler runtime `_env.macros' is consulted.
|
|||
|
;;
|
|||
|
;; See `cdfn-macro' for more information.
|
|||
|
(define (macro? name)
|
|||
|
(cond-expand
|
|||
|
(string->es
|
|||
|
(string->es "_env.macros[$$name] !== undefined"))
|
|||
|
(else #f)))
|
|||
|
|
|||
|
|
|||
|
;; Determine if FN is a procedure or macro and apply it accordingly with
|
|||
|
;; arguments ARGS.
|
|||
|
;;
|
|||
|
;; These actions represent two separate environments: If a macro, then the
|
|||
|
;; call needs to be executed immediately within the context of the compiler
|
|||
|
;; runtime. Otherwise, procedure applications are simply compiled to be
|
|||
|
;; produced with the rest of the compiler output and will be run at a later
|
|||
|
;; time within the context of the compiled program.
|
|||
|
(define (apply-proc-or-macro fn args)
|
|||
|
(if (macro? fn)
|
|||
|
(string->es "_env.macros[$$fn].apply(null,$$args)")
|
|||
|
;; Procedures are produced as part of the compiler output.
|
|||
|
(let ((argstr (join ", " (map sexp->es args))))
|
|||
|
(string-append (env-ref fn) "(" argstr ")"))))
|
|||
|
|
|||
|
|
|||
|
;; Primitive special forms.
|
|||
|
;;
|
|||
|
;; These are forms that cannot be re-written as macros because of
|
|||
|
;; chicken-and-egg issues. Since the Rebirth compiler is temporary, we're
|
|||
|
;; not going to worry about getting rid of the rest of these.
|
|||
|
;;
|
|||
|
;; String values are simple function aliases. Function values take over
|
|||
|
;; the compilation of that function and allow for defining special forms
|
|||
|
;; (in place of macro support). The first argument FN is the name of the
|
|||
|
;; function/procedure/form, and ARGS is the list of arguments.
|
|||
|
;;
|
|||
|
;; For the forms previously defined here in Birth, see `fnmap-premacro' in
|
|||
|
;; `es.scm'.
|
|||
|
(define (fnmap fn args t)
|
|||
|
(case fn
|
|||
|
;; output raw code into the compiled ECMAScript (what could go wrong?)
|
|||
|
(("string->es")
|
|||
|
(token-value (car args)))
|
|||
|
|
|||
|
;; very primitive cond-expand
|
|||
|
(("cond-expand") (expand-cond-expand args))
|
|||
|
|
|||
|
;; Note that the unquote forms are only valid within a quasiquote; see
|
|||
|
;; that procedure for the handling of those forms. Since we do not
|
|||
|
;; support the special prefix form, we also offer "`quote" as a
|
|||
|
;; shorthand for quasiquote.
|
|||
|
(("quote") (quote-sexp (car args)))
|
|||
|
(("quasiquote" "`quote") (quasiquote-sexp (car args)))
|
|||
|
|
|||
|
(("define") (cdfn t))
|
|||
|
(("define-macro") (cdfn-macro t)) ; not defined until string->es cond
|
|||
|
|
|||
|
;; Defining `include' is trivial right now since we're not doing any
|
|||
|
;; sort of static analysis---we just need to start compilation of the
|
|||
|
;; requested file and inline it right where we are. Note that this
|
|||
|
;; doesn't enclose the file in `begin', so this isn't yet proper.
|
|||
|
(("include") (rebirth->ecmascript
|
|||
|
(parse-lisp
|
|||
|
(es:file->string (token-value (car args))))))
|
|||
|
|
|||
|
;; If we have macro support (`cdfn-macro'), then assume that they exist
|
|||
|
;; and try to use them; otherwise, continue to use built-in forms, which
|
|||
|
;; have been moved into `fnmap-premacro').
|
|||
|
(else
|
|||
|
(cond-expand
|
|||
|
(cdfn-macro (apply-proc-or-macro fn args))
|
|||
|
(else (fnmap-premacro fn args t))))))
|
|||
|
|
|||
|
|
|||
|
;; Convert s-expressions or scalar into ECMAScript
|
|||
|
;;
|
|||
|
;; T may be either an array of tokens or a primitive token (e.g. string,
|
|||
|
;; symbol). This procedure is applied recursively to T as needed if T is
|
|||
|
;; a list.
|
|||
|
(define (sexp->es t)
|
|||
|
(if (not (list? t))
|
|||
|
(error "unexpected non-list for sexp->es token"))
|
|||
|
|
|||
|
(if (token? t)
|
|||
|
(case (token-type t)
|
|||
|
;; strings output as-is (note that we don't escape double quotes,
|
|||
|
;; because the method of escaping them is the same in Scheme as it
|
|||
|
;; is in ECMAScript---a backslash)
|
|||
|
(("string") (string-append "\"" (token-value t) "\""))
|
|||
|
|
|||
|
;; symbols have the same concerns as procedure definitions: the
|
|||
|
;; identifiers generated need to be ES-friendly
|
|||
|
(("symbol") (env-ref t))
|
|||
|
|
|||
|
(else (error
|
|||
|
(string-append
|
|||
|
"cannot compile unknown token `" (token-type t) "'"))))
|
|||
|
|
|||
|
;; otherwise, process the expression
|
|||
|
(fnmap (token-value (car t))
|
|||
|
(cdr t)
|
|||
|
t)))
|