tamer: frontend: Clean up unused modules

These were part of a POC for frontends quite some time ago.  Some portions
of this concept may be reintroduced, but this was pre-XIR.

DEV-10413
main
Mike Gerwitz 2022-04-07 14:21:08 -04:00
parent 99aacaf7ca
commit 942bf66231
5 changed files with 0 additions and 535 deletions

View File

@ -1,56 +0,0 @@
// TAME frontends
//
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Frontends for the TAME programming language.
//!
//! A _frontend_ represents a source language.
//! The principal frontend for TAME is the XML-based package specification
//! language ([`XmlFrontendParser`]).
//!
//! Parsing
//! =======
//! [Parsers](parser) for frontends are expected to fulfill three primary
//! roles:
//!
//! 1. Produce a sequence tokens from a source input (see [`Token`]);
//! 2. Perform no implicit copying of source buffer data (zero-copy); and
//! 3. Attempt recovery to continue parsing in the event of an error.
//!
//! Recovery allows the parser to find and report more errors at once,
//! rather than requiring a developer to correct and recompile one error
//! at a time.
//! Recovery further makes parsers suitable for static analysis in
//! situations where correctness is non-critical,
//! such as for linting; checkstyle; and language servers.
//!
//! Parsers are expected to be scannerless
//! (that is, not require a separate scanning/lexing process),
//! or to at least encapsulate lexing.
//!
//! *TODO*: Mention IR and guide reader to the next steps in the pipeline.
mod parser;
mod xml;
pub use parser::{
ClosedByteInterval, FrontendError, FrontendEvent, FrontendParser,
FrontendResult, Token,
};
pub use xml::XmlFrontendParser;

View File

@ -1,215 +0,0 @@
// TAME frontend parser
//
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Recovering, zero-copy, scannerless parsers for TAME frontends.
//!
//! See the [parent module](super) for more information.
use std::{borrow::Cow, fmt::Display};
/// Recovering, zero-copy, scannerless parser.
///
/// Note that the lifetime exists on this trait due to the lack of GATs,
/// which prevents us from having a lifetime tied to `parse_next`;
/// this is the same problem that we have with `Iterator`.
/// An alternative would be to forego a trait at all for parsers,
/// but that only pushes the problem up the chain.
/// Remember that the parser produces short-lived tokens that are intended
/// to be immediately lowered,
/// and this problem doesn't exist at lower levels where date are owned
/// by a given IR.
pub trait FrontendParser<'l, T, E> {
/// Human-readable short description of parser.
///
/// TAME consists of a number of source languages, so this should be
/// sufficient to state what parser was chosen for a given source
/// file.
fn desc() -> &'static str;
/// Attempt to parse the next token.
///
/// A [`FrontendEvent::Token`] contains common information about the
/// encountered lexeme and source byte interval,
/// but the token kind is frontend-specific.
///
/// When a parsing error occurs,
/// frontends are encouraged to self-correct if possible.
/// If this is able to happen,
/// [`FrontendEvent::RecoverableError`] will be emitted with zero or
/// more tokens that may be used in place of the erroneous input to
/// possibly continue parsing in a useful way;
/// this can be used for further static analysis or error
/// checking.
///
/// If the end of the file
/// (or end of the parsable region of a file)
/// has been reached,
/// [`FrontendEvent::Eof`] will be emitted,
/// unless a [`FrontendEvent::RecoverableError`] has been previous
/// emitted,
/// in which case [`FrontendError::EofWithRecoverableErrors`]
/// indicates that the caller should take special care in
/// determining whether parsing should be considered be to a
/// failure.
fn parse_next(&'l mut self) -> FrontendResult<FrontendEvent<'l, T, E>, E>;
}
/// Raw input string associated with a token.
#[derive(Debug, PartialEq, Eq)]
pub struct Lexeme<'a>(Cow<'a, [u8]>);
/// A closed interval (range of values including its endpoints) representing
/// source bytes associated with a token.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct ClosedByteInterval<T: Copy = usize>(pub T, pub T);
impl<T: Copy> From<(T, T)> for ClosedByteInterval<T> {
fn from(src: (T, T)) -> Self {
Self(src.0, src.1)
}
}
/// A lexeme combined with a type (kind) and location.
///
/// The `interval` represents the starting and ending offset, inclusive, of
/// the lexeme used to produce this token.
/// The `kind` is the token type,
/// specific to each individual frontend.
///
/// Tokens are intended to be short-lived and lowered into another
/// intermediate representation (IR) for further processing and analysis.
#[derive(Debug, PartialEq, Eq)]
pub struct Token<'l, T> {
/// Token type and associated data.
///
/// The token kind represents the parsed information and should always
/// be used in place of the lexeme (which may not be available),
/// unless referring back to the source input.
pub kind: T,
/// Raw input from which the token was generated.
///
/// A lexeme may not be available if a token was generated by the
/// compiler in a manner that is not associated with any source
/// input.
///
/// Since frontend parsers are zero-copy by default,
/// a lexeme may be available only immediately after a token is
/// emitted,
/// unless the caller wishes to copy its value.
pub lexeme: Option<Lexeme<'l>>,
/// Starting and ending offset of the lexeme, inclusive.
///
/// An interval may not be available if a token was generated by the
/// compiler in a manner that is not associated with any source
/// input.
///
/// A note on terminology: we use "interval" instead of "span" here,
/// because the latter is intended to hold slightly different data as
/// part of a lower-level IR.
pub interval: Option<ClosedByteInterval>,
}
/// Result of attempting to parse input for the next token.
#[derive(Debug, PartialEq)]
pub enum FrontendEvent<'l, T, E> {
/// Successfully parsed token.
Token(Token<'l, T>),
/// An error occurred,
/// but one or more tokens are provided in an attempt to self-correct
/// so parsing may continue.
///
/// The provided interval represents all source bytes consumed for all
/// recovery tokens;
/// parsing will continue at the next byte after the end of that
/// interval.
/// The recovery token may very well be nonsense;
/// the goal is to continue parsing to find more errors,
/// not to infer a correct program.
RecoverableError {
/// Source error.
source: E,
/// Starting and ending offset of all bytes associated with this
/// error, inclusive.
///
/// Note that recovery tokens may not have interval information if
/// their source input is not sensible.
interval: ClosedByteInterval,
/// Zero or more tokens that may be substituted in place of the
/// erroneous input in an attempt to continue parsing.
///
/// These recovery tokens are not guaranteed to be successful,
/// nor can they be used to confidently repair a program with
/// parse errors.
recovery_tokens: Vec<Token<'l, T>>,
},
/// End of the file has been reached with no recoverable errors.
///
/// See also [`FrontendError::EofWithRecoverableErrors`].
Eof,
}
/// Error attempting to parse input for the next token.
#[derive(Debug, PartialEq, Eq)]
pub enum FrontendError<E> {
/// An error occurred during parsing and the parser was either unable to
/// determine how to recover or did not attempt recovery.
UnrecoverableError {
/// Source error.
source: E,
/// Starting and ending byte offsets of source input that produced
/// the error.
interval: ClosedByteInterval,
},
/// EOF reached with recoverable errors.
///
/// This error indicates that the end of the file has been reached,
/// but recoverable errors have been previously omitted,
/// and so parsing should fail.
/// If the caller chooses to ignore this error and accept the recovery
/// tokens,
/// the emitted tokens may not represent a valid program.
/// However,
/// if parsing was performed for syntax checking or static analysis,
/// then this error might be able to be safely ignored.
///
/// See also [`FrontendEvent::Eof`].
EofWithRecoverableErrors,
}
impl<E> Display for FrontendError<E> {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(fmt, "TODO fmt")
}
}
impl<E: std::fmt::Debug> std::error::Error for FrontendError<E> {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
None
}
}
pub type FrontendResult<T, E> = Result<T, FrontendError<E>>;

View File

@ -1,158 +0,0 @@
// XML frontend
//
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! XML frontend for the TAME programming language.
use super::{
ClosedByteInterval, FrontendError, FrontendEvent, FrontendParser,
FrontendResult, Token,
};
use crate::tpwrap::quick_xml::Error as XmlError;
use quick_xml::events::Event as XmlEvent;
use quick_xml::Reader as XmlReader;
use std::fmt::Display;
use std::io::BufRead;
/// Parser for XML-based sources.
pub struct XmlFrontendParser<B>
where
B: BufRead,
{
/// XML parser.
reader: XmlReader<B>,
/// Buffer for all XML data besides namespaces.
buf: Vec<u8>,
/// Buffer for namespace data.
nsbuf: Vec<u8>,
}
impl<B> XmlFrontendParser<B>
where
B: BufRead,
{
pub fn new(buf_read: B) -> Self {
let reader = XmlReader::from_reader(buf_read);
Self {
reader,
buf: Vec::new(),
nsbuf: Vec::new(),
}
}
/// Calculate the closed byte interval representing the bytes associated
/// with a given [`XmlEvent`].
fn calc_interval(
pos_start: usize,
pos_cur: usize,
ev: &XmlEvent,
) -> ClosedByteInterval {
match ev {
XmlEvent::Empty(_) => ClosedByteInterval(pos_start, pos_cur - 1),
_ => ClosedByteInterval(pos_start, pos_start),
}
}
}
impl<'l, B> FrontendParser<'l, XmlToken<'l>, XmlFrontendError>
for XmlFrontendParser<B>
where
B: BufRead,
{
fn desc() -> &'static str {
"XML-based package specification language"
}
fn parse_next(&'l mut self) -> XmlFrontendResult<XmlFrontendEvent<'l>> {
let reader = &mut self.reader;
let pos_start = reader.buffer_position();
reader
.read_namespaced_event(&mut self.buf, &mut self.nsbuf)
.map(|(ns, ev)| match ev {
XmlEvent::Eof => FrontendEvent::Eof,
_ => {
let interval = Some(Self::calc_interval(
pos_start,
reader.buffer_position(),
&ev,
));
FrontendEvent::Token(Token {
kind: XmlToken::RawXmlEvent((ns, ev)),
lexeme: None,
interval,
})
}
})
.map_err(|e| FrontendError::UnrecoverableError {
source: XmlFrontendError::XmlError(e.into()),
interval: ClosedByteInterval(
pos_start,
reader.buffer_position(),
),
})
}
}
pub type XmlFrontendEvent<'l> =
FrontendEvent<'l, XmlToken<'l>, XmlFrontendError>;
type Namespace<'a> = &'a [u8];
type NamespacedXmlEvent<'a> = (Option<Namespace<'a>>, XmlEvent<'a>);
#[derive(Debug)]
pub enum XmlToken<'l> {
RawXmlEvent(NamespacedXmlEvent<'l>),
}
#[derive(Debug, PartialEq)]
pub enum XmlFrontendError {
XmlError(XmlError),
}
impl Display for XmlFrontendError {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::XmlError(e) => e.fmt(fmt),
}
}
}
impl std::error::Error for XmlFrontendError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::XmlError(e) => Some(e),
}
}
}
impl<E: Into<XmlError>> From<E> for XmlFrontendError {
fn from(err: E) -> Self {
Self::XmlError(err.into())
}
}
pub type XmlFrontendResult<T> = FrontendResult<T, XmlFrontendError>;
#[cfg(test)]
mod test;

View File

@ -1,103 +0,0 @@
// Tests for XML frontend
//
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// NB: Due to the complexity and verbosity of mocking XML events,
// these tests are coupled with the XML parser.
// Care should be taken to try to mitigate minor changes to the library's
// output so as not to make these tests overly fragile.
use super::*;
type Sut<B> = XmlFrontendParser<B>;
// TODO: Just for initial testing; empty files shouldn't be valid, since
// they don't give the parser enough information as to what type of file it
// is.
#[test]
fn emits_eof_for_empty_file() {
let stub_data: &[u8] = &[];
let mut sut = Sut::new(stub_data);
let result = sut.parse_next();
assert!(matches!(result, Ok(FrontendEvent::Eof)));
}
// Until the parser is complete, we need raw tokens so that we can echo them
// back out.
#[test]
fn produces_raw_xml_events_as_tokens() -> Result<(), Box<dyn std::error::Error>>
{
let stub_data: &[u8] = r#"<valid-xml xmlns="foons" />"#.as_bytes();
let mut sut = Sut::new(stub_data);
loop {
match sut.parse_next()? {
FrontendEvent::Token(Token {
kind: XmlToken::RawXmlEvent((ns, ev)),
lexeme: _,
interval,
}) => {
if ns.is_none() {
continue;
}
// Interval should be the starting byte offset to the offset
// of the final byte, not the byte after it.
assert!(matches!(
interval,
Some(ClosedByteInterval(0, hi))
if hi == stub_data.len() - 1
));
if let XmlEvent::Empty(start) = ev {
assert_eq!(start.name(), b"valid-xml");
break;
}
}
x => panic!("Unexpected: {:?}", x),
}
}
Ok(())
}
#[test]
fn produces_error_on_xml_parse_failure() {
let stub_data: &[u8] = b"<ok /><!-- EOF in comment";
let mut sut = Sut::new(stub_data);
loop {
match sut.parse_next() {
Ok(FrontendEvent::Eof) => panic!("Expected error"),
Err(e) => match e {
FrontendError::UnrecoverableError {
source: XmlFrontendError::XmlError(_),
interval: ClosedByteInterval(x, y),
} if x == 6 && y >= x => break,
_ => panic!("Error mismatch: {:?}", e),
},
_ => continue,
}
}
}

View File

@ -76,9 +76,6 @@ extern crate static_assertions;
#[macro_use]
extern crate lazy_static;
#[cfg(feature = "wip-frontends")]
pub mod frontend;
#[macro_use]
pub mod xir;