TAMER: xmlo reader

This introduces the reader for xmlo files produced by the XSLT-based
compiler.  It is an initial implementation but is not complete; see future
commits.
master
Mike Gerwitz 2020-01-09 10:55:55 -05:00
parent db52fcdb30
commit a929c8cae4
7 changed files with 1704 additions and 1 deletions

View File

@ -0,0 +1,244 @@
// Legacy IR
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Legacy IR faithful to the XSLT-based compiler.
//!
//! This represents the intermediate format (IR) used by the `xmlo` files
//! (see [`crate::obj::xmlo`]) originally produced by the XSLT-based
//! compiler.
//! It consists largely of metadata for object symbols.
//!
//! This IR should be converted into a higher-level IR quickly,
//! especially considering that it will be going away in the future.
use crate::sym::Symbol;
use std::convert::TryFrom;
use std::result::Result;
/// Symbol attributes.
///
/// This is a subset of all available attributes available on the
/// `preproc:sym` nodes;
/// more will be added as needed.
///
/// Not all symbols share the same set of attributes,
/// so this represents the union of all possible attribute sets.
///
/// Due to the number of possible attributes,
/// this is not an opaque type.
/// Consequently,
/// valid values should be enforced by the Rust's type system.
#[derive(Debug, Default, PartialEq, Eq)]
pub struct SymAttrs<'i> {
/// Relative path to the package that defined this symbol.
///
/// Object files store relative paths so that they are somewhat
/// portable—the
/// entire project root should be able to be relocated.
pub src: Option<&'i Symbol<'i>>,
/// Symbol type.
///
/// The type describes the purpose of the symbol and determines both how
/// it is compiled and its location in the final executable.
pub ty: Option<SymType>,
/// Number of dimensions.
///
/// This determines the number of subscripts needed to access a scalar
/// value.
/// A value of `0` indicates a scalar;
/// a value of `1` indicates a vector;
/// a value of `2` indicates a matrix;
/// and a value of `n` indicates a multi-dimensional array of
/// depth `n`.
pub dim: Option<u8>,
/// Type of underlying data.
///
/// This is not a primitive,
/// and mostly represents whether or not floating point computations
/// will take place.
pub dtype: Option<SymDtype>,
/// Whether the symbol's location will be determined at link-time.
///
/// Externs allow symbols to be referenced without having yet been given
/// a concrete definition,
/// provided that an eventual concrete definition matches the
/// provided declaration.
/// The linker (see [`crate::ld`]) is responsible for ensuring that the
/// extern is satisfied and properly located in the final executable.
pub extern_: bool,
}
/// Legacy symbol types.
///
/// This enum represents all symbol types represented in the `xmlo` files.
/// They are overly specialized and will be deprecated in favor of more
/// generalized dependent types in later IRs.
#[derive(Debug, PartialEq, Eq)]
pub enum SymType {
/// Classification generator (from `lv:classify/@yields`).
Cgen,
/// Classification (from `lv:classify/@as`).
Class,
/// Constant (from `lv:const/@name`).
Const,
/// Function (from `lv:function/@name`).
Func,
/// Generator (from `lv:rate/@generates`).
Gen,
/// Local function parameter (from `lv:function/lv:param/@name`).
Lparam,
/// Global parameter (from `lv:param/@name`).
Param,
/// Scalar calculation result (from `lv:rate/@yields`).
Rate,
/// Template (from `lv:template/@name`).
Tpl,
/// Typedef (from `lv:type/@name`).
Type,
/// Input map head (meta symbol generated by compiler for each input map).
MapHead,
/// Input field→param mapping (from `lvm:map`, `lvm:pass`).
Map,
/// Input map tail (meta symbol generated by compiler for each input map).
MapTail,
/// Return map head (meta symbol generated by compiler for each return map).
RetMapHead,
/// Return param→field mapping (from `lvm:map`, `lvm:pass`).
RetMap,
/// Return map tail (meta symbol generated by compiler for each return map).
RetMapTail,
/// Arbitrary metadata (from `lv:meta`).
Meta,
/// Rating worksheet (generated by compiler for worksheet packages).
Worksheet,
}
impl TryFrom<&[u8]> for SymType {
type Error = String;
/// Determine symbol type from source `preproc:sym/@type`.
///
/// This raises source `xmlo` data into this IR.
/// See [`crate::obj::xmlo::reader`].
fn try_from(value: &[u8]) -> Result<SymType, Self::Error> {
match value {
b"cgen" => Ok(SymType::Cgen),
b"class" => Ok(SymType::Class),
b"const" => Ok(SymType::Const),
b"func" => Ok(SymType::Func),
b"gen" => Ok(SymType::Gen),
b"lparam" => Ok(SymType::Lparam),
b"param" => Ok(SymType::Param),
b"rate" => Ok(SymType::Rate),
b"tpl" => Ok(SymType::Tpl),
b"type" => Ok(SymType::Type),
b"retmap:head" => Ok(SymType::RetMapHead),
b"retmap" => Ok(SymType::RetMap),
b"retmap:tail" => Ok(SymType::RetMapTail),
b"map:head" => Ok(SymType::MapHead),
b"map" => Ok(SymType::Map),
b"map:tail" => Ok(SymType::MapTail),
b"meta" => Ok(SymType::Meta),
b"worksheet" => Ok(SymType::Worksheet),
_ => Err(format!(
"unknown symbol type `{}`",
String::from_utf8(value.to_vec())
.unwrap_or("(invalid UTF8)".into())
)),
}
}
}
/// Underlying datatype.
///
/// This is the type of scalar data stored within the given symbol.
///
/// *NB:* This was _not enforced_ by the XSLT-based compiler.
#[derive(Debug, PartialEq, Eq)]
pub enum SymDtype {
/// {⊥,} = {0,1} ⊂ ℤ
Boolean,
///
Integer,
///
Float,
/// ∅
Empty,
}
impl TryFrom<&[u8]> for SymDtype {
type Error = String;
/// Determine data type from source `preproc:sym/@dtype`.
///
/// This raises source `xmlo` data into this IR.
/// See [`crate::obj::xmlo::reader`].
fn try_from(value: &[u8]) -> Result<SymDtype, Self::Error> {
match value {
b"boolean" => Ok(SymDtype::Boolean),
b"integer" => Ok(SymDtype::Integer),
b"float" => Ok(SymDtype::Float),
b"empty" => Ok(SymDtype::Empty),
_ => Err(format!(
"unknown symbol dtype `{}`",
String::from_utf8(value.to_vec())
.unwrap_or("(invalid UTF8)".into())
)),
}
}
}
#[cfg(test)]
mod test {
use super::*;
// We're not going to check every possible value here since we'd be
// maintaining the mapping in two places; we can leave that to
// integration tests.
#[test]
fn symtype_from_u8() {
assert_eq!(Ok(SymType::Cgen), SymType::try_from(b"cgen" as &[u8]));
}
#[test]
fn symtype_failure_from_unknown_u8() {
match SymType::try_from(b"unknown" as &[u8]) {
Err(s) => assert!(s.contains("unknown")),
bad => panic!("expected error: {:?}", bad),
}
}
#[test]
fn symdtype_from_u8() {
assert_eq!(
Ok(SymDtype::Integer),
SymDtype::try_from(b"integer" as &[u8])
);
}
#[test]
fn symdtype_failure_from_unknown_u8() {
match SymDtype::try_from(b"unknownd" as &[u8]) {
Err(s) => assert!(s.contains("unknownd")),
bad => panic!("expected error: {:?}", bad),
}
}
}

View File

@ -0,0 +1,41 @@
// Intermediate representations (IRs)
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Intermediate representations for TAME programs.
//!
//! [Intermediate representations][ir] (IRs) are data structures used to
//! represent source code in a manner most suitable for a particular phase
//! of compilation.
//! A single IR may be used by multiple compilation phases,
//! or by multiple systems (e.g. various compilers or [linkers][]).
//!
//! [ir]: https://en.wikipedia.org/wiki/Intermediate_representation
//! [linkers]: crate::ld
//!
//! Each IR is responsible for raising lower-level IRs or source formats.
//!
//! Summary of IRs
//! --------------
//! Each input language begins as an [abstract syntax tree][ast] (AST),
//! produced by the parser.
//! For TAME languages that are XML-based,
//! the production of the AST is handled by [`quick_xml`],
//! and is effectively the same as the source XML.
//!
//! [ast]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
pub mod legacyir;

View File

@ -17,5 +17,7 @@
//! An incremental rewrite of TAME in Rust.
pub mod ir;
pub mod ld;
pub mod obj;
pub mod sym;

View File

@ -0,0 +1,34 @@
// Object files
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Object file construction and processing.
//!
//! An _[object file][]_ contains relocatable compiled code, symbol tables,
//! and other information produced by the compiler.
//! It is the responsibility of the [linker](super::ld) to construct a final
//! executable from these files.
//!
//! [object file]: https://en.wikipedia.org/wiki/Object_file
//!
//! The only object file currently supported by TAMER is the [`xmlo`]
//! format,
//! produced by the XSLT compiler.
//! It will likely be replaced with [ELF] object files in the future.
//!
//! [ELF]: https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
pub mod xmlo;

View File

@ -0,0 +1,75 @@
// xmlo object files
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! `xmlo` object file construction and processing.
//!
//! This object file format exists for compatibility with the old compiler
//! written in XSLT;
//! it will be removed in the future.
//!
//!
//! `xmlo` Object Files
//! ===================
//! An `xmlo` object file is produced by the for each source file.
//! It is a terribly inefficient object format and will be eliminated in the
//! future.
//! The format is XML because the original compiler was written in XSLT.
//!
//! The general structure of an `xmlo` file consists of:
//! - Package metadata as attributes on the root node;
//! - A symbol table along with symbol metadata;
//! - Symbol dependencies (as [adjacency lists][]);
//! - Compiled JavaScript fragments for each applicable symbol; and
//! - Expanded source XML.
//!
//! [adjacency lists]: https://en.wikipedia.org/wiki/Adjacency_list
//!
//! For example (with some extra information omitted):
//!
//! ```xml
//! <package xmlns="http://www.lovullo.com/rater"
//! xmlns:preproc="http://www.lovullo.com/rater/preproc"
//! title="Example Package"
//! name="example/package"
//! __rootpath="../"
//! preproc:elig-class-yields="isEligexamplepackage">
//! <!-- Symbol table -->
//! <preproc:symtable>
//! <preproc:sym name=":class:some-sym" type="class" ... />
//! <!-- ... -->
//! </preproc:symtable>
//!
//! <!-- Dependency graph (adjacency lists) -->
//! <preproc:sym-deps>
//! <preproc:sym-dep name=":class:some-sym">
//! <preproc:sym-ref name="someOtherSym" />
//! <!-- ... -->
//! </preproc:sym-dep>
//! </preproc:sym-deps>
//!
//! <!-- Compiled JS fragments -->
//! <preproc:fragments>
//! <preproc:fragment id=":class:some-sym">
//! classes['some-sym'] = '...generated JS code...';
//! </preproc:fragment>
//! </preproc:fragments>
//!
//! <!-- Expanded src -->
//! </package>
//! ```
pub mod reader;

File diff suppressed because it is too large Load Diff

View File

@ -437,7 +437,9 @@ pub trait Interner<'i> {
/// [`std::str::from_utf8_unchecked`].
/// It is provided for convenience when interning from trusted binary
/// data
/// (such as object files).
/// (such as [object files][]).
///
/// [object files]: crate::obj
unsafe fn intern_utf8_unchecked(&'i self, value: &[u8]) -> &'i Symbol<'i> {
self.intern(std::str::from_utf8_unchecked(value))
}