;;; Prebirth Lisp implemented in Prebirth Lisp (self-hosting) ;;; ;;; Copyright (C) 2017 Mike Gerwitz ;;; ;;; This file is part of Gibble. ;;; ;;; Gibble is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU Affero General Public License as ;;; published by the Free Software Foundation, either version 3 of the ;;; License, or (at your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU Affero General Public License ;;; along with this program. If not, see . ;;; ;;; THIS IS TEMPORARY CODE that will be REWRITTEN IN GIBBLE LISP ITSELF after ;;; a very basic bootstrap is complete. It is retained as an important ;;; artifact for those who wish to build Gibble from scratch without using ;;; another version of Gibble itself. This is called "self-hosting". ;;; ;;; This is the Prebirth Lisp implementation of the JavaScript Prebirth ;;; compiler, found in `prebirth.js'---that compiler can be used to compile ;;; this compiler, which can then be used to compile itself, completing the ;;; bootstrapping process. This process is termed "Birth". ;;; ;;; This is largely a 1:1 translation of `prebirth.js'. Note that we're ;;; dealing with a small subset of Scheme here, so certain things might be ;;; done differently given a proper implementation. See that file for ;;; terminology. ;; pair selection (define (cadr xs) (car (cdr xs))) (define (caadr xs) (car (car (cdr xs)))) (define (caddr xs) (car (cdr (cdr xs)))) (define (js:match-regexp re s) (js:match (js:regexp re) s)) ;; Convert source input into a string of tokens. ;; ;; This is the lexer. Whitespace is ignored. The grammar consists of ;; simple s-expressions. ;; ;; This procedure is mutually recursive with `token'. It expects that ;; the source SRC will be left-truncated as input is processed. POS exists ;; for producing metadata for error reporting---it has no impact on ;; parsing. ;; ;; The result is a list of tokens. See `token' for the format. (define (lex src pos) (let* ((ws (or (js:match-regexp "^\\s+" src) (list ""))) (ws-len (string-length (car ws))) (trim (substring src ws-len)) ; ignore whitespace, if any (newpos (+ pos ws-len))) (if (string=? "" trim) (list) ; EOF and we're done ;; normally we'd use `string-ref' here, but then we'd have to ;; implement Scheme characters, so let's keep this simple and keep ;; with strings (let ((ch (substring trim 0 1))) (case ch ;; comments extend until the end of the line ((";") (let ((eol (js:match-regexp "^(.*?)(\\n|$)" trim))) (token "comment" (cadr eol) trim newpos))) ;; left and right parenthesis are handled in the same manner: ;; they produce distinct tokens with single-character lexemes (("(") (token "open" ch trim newpos)) ((")") (token "close" ch trim newpos)) ;; strings are delimited by opening and closing ASCII double ;; quotes, which can be escaped with a backslash (("\"") (let ((str (js:match-regexp "^\"(|.*?[^\\\\])\"" trim))) (or str (parse-error src pos "missing closing string delimiter")) ;; a string token consists of the entire string ;; including quotes as its lexeme, but its value will ;; be the value of the string without quotes due to ;; the `str' match group (see `token') (token "string" str trim newpos))) (else ;; anything else is considered a symbol up until whitespace or ;; any of the aforementioned delimiters (let ((symbol (js:match-regexp "^[^\\s()\"]+" trim))) (token "symbol" symbol trim newpos)))))))) ;; Throw an error with a window of surrounding source code. ;; ;; The "window" is simply ten characters to the left and right of the ;; first character of the source input SRC that resulted in the error. ;; It's a little more than useless. (define (parse-error src pos msg) (let ((window (substring src (- pos 10) (+ pos 10)))) (error (string-append msg " (pos " pos "): " window) src))) ;; Produce a token and recurse. ;; ;; The token will be concatenated with the result of the mutually ;; recursive procedure `lex'. ;; ;; For the record: I'm not fond of mutual recursion from a clarity ;; standpoint, but this is how the abstraction evolved to de-duplicate ;; code, and I don't much feel like refactoring it. ;; ;; Unlike the JS Prebirth implementation which uses a key/value object, ;; we're just using a simple list. ;; ;; The expected arguments are: the token type TYPE, the match group or ;; string MATCH, left-truncated source code SRC, and the position POS ;; relative to the original source code. (define (token type match src pos) (let* ((parts (if (list? match) match (list match match))) (lexeme (car parts)) ;; the value is the first group of the match (indicating what we ;; are actually interested in), and the lexeme is the full match, ;; which might include, for example, string delimiters (value (or (and (pair? (cdr parts)) (cadr parts)) lexeme)) (len (string-length lexeme))) ;; produce token and recurse on `lex', left-truncating the source ;; string to discard what we have already processed (cons (list type lexeme value pos) (lex (substring src len) (+ pos len))))) ;; at this point, this program can parse itself and output a CST (sans ;; whitespace) (js:console (lex (js:stdin->string) 0))