tamer: frontend: Clean up unused modules
These were part of a POC for frontends quite some time ago. Some portions of this concept may be reintroduced, but this was pre-XIR. DEV-10413main
parent
99aacaf7ca
commit
942bf66231
|
@ -1,56 +0,0 @@
|
|||
// TAME frontends
|
||||
//
|
||||
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This file is part of TAME.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Frontends for the TAME programming language.
|
||||
//!
|
||||
//! A _frontend_ represents a source language.
|
||||
//! The principal frontend for TAME is the XML-based package specification
|
||||
//! language ([`XmlFrontendParser`]).
|
||||
//!
|
||||
//! Parsing
|
||||
//! =======
|
||||
//! [Parsers](parser) for frontends are expected to fulfill three primary
|
||||
//! roles:
|
||||
//!
|
||||
//! 1. Produce a sequence tokens from a source input (see [`Token`]);
|
||||
//! 2. Perform no implicit copying of source buffer data (zero-copy); and
|
||||
//! 3. Attempt recovery to continue parsing in the event of an error.
|
||||
//!
|
||||
//! Recovery allows the parser to find and report more errors at once,
|
||||
//! rather than requiring a developer to correct and recompile one error
|
||||
//! at a time.
|
||||
//! Recovery further makes parsers suitable for static analysis in
|
||||
//! situations where correctness is non-critical,
|
||||
//! such as for linting; checkstyle; and language servers.
|
||||
//!
|
||||
//! Parsers are expected to be scannerless
|
||||
//! (that is, not require a separate scanning/lexing process),
|
||||
//! or to at least encapsulate lexing.
|
||||
//!
|
||||
//! *TODO*: Mention IR and guide reader to the next steps in the pipeline.
|
||||
|
||||
mod parser;
|
||||
mod xml;
|
||||
|
||||
pub use parser::{
|
||||
ClosedByteInterval, FrontendError, FrontendEvent, FrontendParser,
|
||||
FrontendResult, Token,
|
||||
};
|
||||
|
||||
pub use xml::XmlFrontendParser;
|
|
@ -1,215 +0,0 @@
|
|||
// TAME frontend parser
|
||||
//
|
||||
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This file is part of TAME.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Recovering, zero-copy, scannerless parsers for TAME frontends.
|
||||
//!
|
||||
//! See the [parent module](super) for more information.
|
||||
|
||||
use std::{borrow::Cow, fmt::Display};
|
||||
|
||||
/// Recovering, zero-copy, scannerless parser.
|
||||
///
|
||||
/// Note that the lifetime exists on this trait due to the lack of GATs,
|
||||
/// which prevents us from having a lifetime tied to `parse_next`;
|
||||
/// this is the same problem that we have with `Iterator`.
|
||||
/// An alternative would be to forego a trait at all for parsers,
|
||||
/// but that only pushes the problem up the chain.
|
||||
/// Remember that the parser produces short-lived tokens that are intended
|
||||
/// to be immediately lowered,
|
||||
/// and this problem doesn't exist at lower levels where date are owned
|
||||
/// by a given IR.
|
||||
pub trait FrontendParser<'l, T, E> {
|
||||
/// Human-readable short description of parser.
|
||||
///
|
||||
/// TAME consists of a number of source languages, so this should be
|
||||
/// sufficient to state what parser was chosen for a given source
|
||||
/// file.
|
||||
fn desc() -> &'static str;
|
||||
|
||||
/// Attempt to parse the next token.
|
||||
///
|
||||
/// A [`FrontendEvent::Token`] contains common information about the
|
||||
/// encountered lexeme and source byte interval,
|
||||
/// but the token kind is frontend-specific.
|
||||
///
|
||||
/// When a parsing error occurs,
|
||||
/// frontends are encouraged to self-correct if possible.
|
||||
/// If this is able to happen,
|
||||
/// [`FrontendEvent::RecoverableError`] will be emitted with zero or
|
||||
/// more tokens that may be used in place of the erroneous input to
|
||||
/// possibly continue parsing in a useful way;
|
||||
/// this can be used for further static analysis or error
|
||||
/// checking.
|
||||
///
|
||||
/// If the end of the file
|
||||
/// (or end of the parsable region of a file)
|
||||
/// has been reached,
|
||||
/// [`FrontendEvent::Eof`] will be emitted,
|
||||
/// unless a [`FrontendEvent::RecoverableError`] has been previous
|
||||
/// emitted,
|
||||
/// in which case [`FrontendError::EofWithRecoverableErrors`]
|
||||
/// indicates that the caller should take special care in
|
||||
/// determining whether parsing should be considered be to a
|
||||
/// failure.
|
||||
fn parse_next(&'l mut self) -> FrontendResult<FrontendEvent<'l, T, E>, E>;
|
||||
}
|
||||
|
||||
/// Raw input string associated with a token.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct Lexeme<'a>(Cow<'a, [u8]>);
|
||||
|
||||
/// A closed interval (range of values including its endpoints) representing
|
||||
/// source bytes associated with a token.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct ClosedByteInterval<T: Copy = usize>(pub T, pub T);
|
||||
|
||||
impl<T: Copy> From<(T, T)> for ClosedByteInterval<T> {
|
||||
fn from(src: (T, T)) -> Self {
|
||||
Self(src.0, src.1)
|
||||
}
|
||||
}
|
||||
|
||||
/// A lexeme combined with a type (kind) and location.
|
||||
///
|
||||
/// The `interval` represents the starting and ending offset, inclusive, of
|
||||
/// the lexeme used to produce this token.
|
||||
/// The `kind` is the token type,
|
||||
/// specific to each individual frontend.
|
||||
///
|
||||
/// Tokens are intended to be short-lived and lowered into another
|
||||
/// intermediate representation (IR) for further processing and analysis.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct Token<'l, T> {
|
||||
/// Token type and associated data.
|
||||
///
|
||||
/// The token kind represents the parsed information and should always
|
||||
/// be used in place of the lexeme (which may not be available),
|
||||
/// unless referring back to the source input.
|
||||
pub kind: T,
|
||||
|
||||
/// Raw input from which the token was generated.
|
||||
///
|
||||
/// A lexeme may not be available if a token was generated by the
|
||||
/// compiler in a manner that is not associated with any source
|
||||
/// input.
|
||||
///
|
||||
/// Since frontend parsers are zero-copy by default,
|
||||
/// a lexeme may be available only immediately after a token is
|
||||
/// emitted,
|
||||
/// unless the caller wishes to copy its value.
|
||||
pub lexeme: Option<Lexeme<'l>>,
|
||||
|
||||
/// Starting and ending offset of the lexeme, inclusive.
|
||||
///
|
||||
/// An interval may not be available if a token was generated by the
|
||||
/// compiler in a manner that is not associated with any source
|
||||
/// input.
|
||||
///
|
||||
/// A note on terminology: we use "interval" instead of "span" here,
|
||||
/// because the latter is intended to hold slightly different data as
|
||||
/// part of a lower-level IR.
|
||||
pub interval: Option<ClosedByteInterval>,
|
||||
}
|
||||
|
||||
/// Result of attempting to parse input for the next token.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum FrontendEvent<'l, T, E> {
|
||||
/// Successfully parsed token.
|
||||
Token(Token<'l, T>),
|
||||
|
||||
/// An error occurred,
|
||||
/// but one or more tokens are provided in an attempt to self-correct
|
||||
/// so parsing may continue.
|
||||
///
|
||||
/// The provided interval represents all source bytes consumed for all
|
||||
/// recovery tokens;
|
||||
/// parsing will continue at the next byte after the end of that
|
||||
/// interval.
|
||||
/// The recovery token may very well be nonsense;
|
||||
/// the goal is to continue parsing to find more errors,
|
||||
/// not to infer a correct program.
|
||||
RecoverableError {
|
||||
/// Source error.
|
||||
source: E,
|
||||
|
||||
/// Starting and ending offset of all bytes associated with this
|
||||
/// error, inclusive.
|
||||
///
|
||||
/// Note that recovery tokens may not have interval information if
|
||||
/// their source input is not sensible.
|
||||
interval: ClosedByteInterval,
|
||||
|
||||
/// Zero or more tokens that may be substituted in place of the
|
||||
/// erroneous input in an attempt to continue parsing.
|
||||
///
|
||||
/// These recovery tokens are not guaranteed to be successful,
|
||||
/// nor can they be used to confidently repair a program with
|
||||
/// parse errors.
|
||||
recovery_tokens: Vec<Token<'l, T>>,
|
||||
},
|
||||
|
||||
/// End of the file has been reached with no recoverable errors.
|
||||
///
|
||||
/// See also [`FrontendError::EofWithRecoverableErrors`].
|
||||
Eof,
|
||||
}
|
||||
|
||||
/// Error attempting to parse input for the next token.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum FrontendError<E> {
|
||||
/// An error occurred during parsing and the parser was either unable to
|
||||
/// determine how to recover or did not attempt recovery.
|
||||
UnrecoverableError {
|
||||
/// Source error.
|
||||
source: E,
|
||||
|
||||
/// Starting and ending byte offsets of source input that produced
|
||||
/// the error.
|
||||
interval: ClosedByteInterval,
|
||||
},
|
||||
|
||||
/// EOF reached with recoverable errors.
|
||||
///
|
||||
/// This error indicates that the end of the file has been reached,
|
||||
/// but recoverable errors have been previously omitted,
|
||||
/// and so parsing should fail.
|
||||
/// If the caller chooses to ignore this error and accept the recovery
|
||||
/// tokens,
|
||||
/// the emitted tokens may not represent a valid program.
|
||||
/// However,
|
||||
/// if parsing was performed for syntax checking or static analysis,
|
||||
/// then this error might be able to be safely ignored.
|
||||
///
|
||||
/// See also [`FrontendEvent::Eof`].
|
||||
EofWithRecoverableErrors,
|
||||
}
|
||||
|
||||
impl<E> Display for FrontendError<E> {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(fmt, "TODO fmt")
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: std::fmt::Debug> std::error::Error for FrontendError<E> {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub type FrontendResult<T, E> = Result<T, FrontendError<E>>;
|
|
@ -1,158 +0,0 @@
|
|||
// XML frontend
|
||||
//
|
||||
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This file is part of TAME.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! XML frontend for the TAME programming language.
|
||||
|
||||
use super::{
|
||||
ClosedByteInterval, FrontendError, FrontendEvent, FrontendParser,
|
||||
FrontendResult, Token,
|
||||
};
|
||||
use crate::tpwrap::quick_xml::Error as XmlError;
|
||||
use quick_xml::events::Event as XmlEvent;
|
||||
use quick_xml::Reader as XmlReader;
|
||||
use std::fmt::Display;
|
||||
use std::io::BufRead;
|
||||
|
||||
/// Parser for XML-based sources.
|
||||
pub struct XmlFrontendParser<B>
|
||||
where
|
||||
B: BufRead,
|
||||
{
|
||||
/// XML parser.
|
||||
reader: XmlReader<B>,
|
||||
|
||||
/// Buffer for all XML data besides namespaces.
|
||||
buf: Vec<u8>,
|
||||
|
||||
/// Buffer for namespace data.
|
||||
nsbuf: Vec<u8>,
|
||||
}
|
||||
|
||||
impl<B> XmlFrontendParser<B>
|
||||
where
|
||||
B: BufRead,
|
||||
{
|
||||
pub fn new(buf_read: B) -> Self {
|
||||
let reader = XmlReader::from_reader(buf_read);
|
||||
|
||||
Self {
|
||||
reader,
|
||||
buf: Vec::new(),
|
||||
nsbuf: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate the closed byte interval representing the bytes associated
|
||||
/// with a given [`XmlEvent`].
|
||||
fn calc_interval(
|
||||
pos_start: usize,
|
||||
pos_cur: usize,
|
||||
ev: &XmlEvent,
|
||||
) -> ClosedByteInterval {
|
||||
match ev {
|
||||
XmlEvent::Empty(_) => ClosedByteInterval(pos_start, pos_cur - 1),
|
||||
|
||||
_ => ClosedByteInterval(pos_start, pos_start),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'l, B> FrontendParser<'l, XmlToken<'l>, XmlFrontendError>
|
||||
for XmlFrontendParser<B>
|
||||
where
|
||||
B: BufRead,
|
||||
{
|
||||
fn desc() -> &'static str {
|
||||
"XML-based package specification language"
|
||||
}
|
||||
|
||||
fn parse_next(&'l mut self) -> XmlFrontendResult<XmlFrontendEvent<'l>> {
|
||||
let reader = &mut self.reader;
|
||||
let pos_start = reader.buffer_position();
|
||||
|
||||
reader
|
||||
.read_namespaced_event(&mut self.buf, &mut self.nsbuf)
|
||||
.map(|(ns, ev)| match ev {
|
||||
XmlEvent::Eof => FrontendEvent::Eof,
|
||||
_ => {
|
||||
let interval = Some(Self::calc_interval(
|
||||
pos_start,
|
||||
reader.buffer_position(),
|
||||
&ev,
|
||||
));
|
||||
|
||||
FrontendEvent::Token(Token {
|
||||
kind: XmlToken::RawXmlEvent((ns, ev)),
|
||||
lexeme: None,
|
||||
interval,
|
||||
})
|
||||
}
|
||||
})
|
||||
.map_err(|e| FrontendError::UnrecoverableError {
|
||||
source: XmlFrontendError::XmlError(e.into()),
|
||||
interval: ClosedByteInterval(
|
||||
pos_start,
|
||||
reader.buffer_position(),
|
||||
),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub type XmlFrontendEvent<'l> =
|
||||
FrontendEvent<'l, XmlToken<'l>, XmlFrontendError>;
|
||||
|
||||
type Namespace<'a> = &'a [u8];
|
||||
type NamespacedXmlEvent<'a> = (Option<Namespace<'a>>, XmlEvent<'a>);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum XmlToken<'l> {
|
||||
RawXmlEvent(NamespacedXmlEvent<'l>),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum XmlFrontendError {
|
||||
XmlError(XmlError),
|
||||
}
|
||||
|
||||
impl Display for XmlFrontendError {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::XmlError(e) => e.fmt(fmt),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for XmlFrontendError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
Self::XmlError(e) => Some(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: Into<XmlError>> From<E> for XmlFrontendError {
|
||||
fn from(err: E) -> Self {
|
||||
Self::XmlError(err.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub type XmlFrontendResult<T> = FrontendResult<T, XmlFrontendError>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
|
@ -1,103 +0,0 @@
|
|||
// Tests for XML frontend
|
||||
//
|
||||
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This file is part of TAME.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// NB: Due to the complexity and verbosity of mocking XML events,
|
||||
// these tests are coupled with the XML parser.
|
||||
// Care should be taken to try to mitigate minor changes to the library's
|
||||
// output so as not to make these tests overly fragile.
|
||||
|
||||
use super::*;
|
||||
|
||||
type Sut<B> = XmlFrontendParser<B>;
|
||||
|
||||
// TODO: Just for initial testing; empty files shouldn't be valid, since
|
||||
// they don't give the parser enough information as to what type of file it
|
||||
// is.
|
||||
#[test]
|
||||
fn emits_eof_for_empty_file() {
|
||||
let stub_data: &[u8] = &[];
|
||||
let mut sut = Sut::new(stub_data);
|
||||
|
||||
let result = sut.parse_next();
|
||||
|
||||
assert!(matches!(result, Ok(FrontendEvent::Eof)));
|
||||
}
|
||||
|
||||
// Until the parser is complete, we need raw tokens so that we can echo them
|
||||
// back out.
|
||||
#[test]
|
||||
fn produces_raw_xml_events_as_tokens() -> Result<(), Box<dyn std::error::Error>>
|
||||
{
|
||||
let stub_data: &[u8] = r#"<valid-xml xmlns="foons" />"#.as_bytes();
|
||||
let mut sut = Sut::new(stub_data);
|
||||
|
||||
loop {
|
||||
match sut.parse_next()? {
|
||||
FrontendEvent::Token(Token {
|
||||
kind: XmlToken::RawXmlEvent((ns, ev)),
|
||||
lexeme: _,
|
||||
interval,
|
||||
}) => {
|
||||
if ns.is_none() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Interval should be the starting byte offset to the offset
|
||||
// of the final byte, not the byte after it.
|
||||
assert!(matches!(
|
||||
interval,
|
||||
Some(ClosedByteInterval(0, hi))
|
||||
if hi == stub_data.len() - 1
|
||||
));
|
||||
|
||||
if let XmlEvent::Empty(start) = ev {
|
||||
assert_eq!(start.name(), b"valid-xml");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
x => panic!("Unexpected: {:?}", x),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn produces_error_on_xml_parse_failure() {
|
||||
let stub_data: &[u8] = b"<ok /><!-- EOF in comment";
|
||||
let mut sut = Sut::new(stub_data);
|
||||
|
||||
loop {
|
||||
match sut.parse_next() {
|
||||
Ok(FrontendEvent::Eof) => panic!("Expected error"),
|
||||
|
||||
Err(e) => match e {
|
||||
FrontendError::UnrecoverableError {
|
||||
source: XmlFrontendError::XmlError(_),
|
||||
interval: ClosedByteInterval(x, y),
|
||||
} if x == 6 && y >= x => break,
|
||||
|
||||
_ => panic!("Error mismatch: {:?}", e),
|
||||
},
|
||||
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -76,9 +76,6 @@ extern crate static_assertions;
|
|||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
#[cfg(feature = "wip-frontends")]
|
||||
pub mod frontend;
|
||||
|
||||
#[macro_use]
|
||||
pub mod xir;
|
||||
|
||||
|
|
Loading…
Reference in New Issue