tamer: xir::parse: Generalize input token type

This adds a `Token` type to `ParseState`.  Everything uses `xir::Token`
currently, but `XmloReader` will use `xir::flat::Object`.

Now that this has been generalized beyond XIR, the parser ought to be
hoisted up a level.

DEV-10863
main
Mike Gerwitz 2022-03-18 15:26:05 -04:00
parent 150b3b9aa4
commit 0360226caa
6 changed files with 109 additions and 74 deletions

View File

@ -21,7 +21,7 @@
use crate::sym::SymbolId;
use crate::tpwrap::quick_xml::{Error as XmlError, InnerXmlError};
use crate::xir::{parse::ParseError, tree::StackError};
use crate::xir::{parse::ParseError, tree::StackError, Token};
use std::fmt::Display;
/// Error during `xmlo` processing.
@ -38,7 +38,7 @@ pub enum XmloError {
/// XML parsing error (legacy, quick-xml).
XmlError(XmlError),
/// XIR parsing error.
XirtError(ParseError<StackError>),
XirtError(ParseError<Token, StackError>),
/// The root node was not an `lv:package`.
UnexpectedRoot,
/// A `preproc:sym` node was found, but is missing `@name`.
@ -70,8 +70,8 @@ impl From<InnerXmlError> for XmloError {
}
}
impl From<ParseError<StackError>> for XmloError {
fn from(e: ParseError<StackError>) -> Self {
impl From<ParseError<Token, StackError>> for XmloError {
fn from(e: ParseError<Token, StackError>) -> Self {
Self::XirtError(e)
}
}

View File

@ -567,11 +567,11 @@ impl Display for Token {
}
}
impl Token {
impl parse::Token for Token {
/// Retrieve the [`Span`] associated with a given [`Token`].
///
/// Every token has an associated span.
pub fn span(&self) -> Span {
fn span(&self) -> Span {
use Token::*;
match self {
@ -588,12 +588,6 @@ impl Token {
}
}
impl From<Token> for Span {
fn from(tok: Token) -> Self {
tok.span()
}
}
#[cfg(test)]
mod test {
use super::*;

View File

@ -23,7 +23,7 @@ use crate::{
span::Span,
xir::{
parse::{ParseState, Transition, TransitionResult},
QName, Token,
QName, Token as XirToken,
},
};
use std::{error::Error, fmt::Display};
@ -45,20 +45,21 @@ pub enum AttrParseState {
}
impl ParseState for AttrParseState {
type Token = XirToken;
type Object = Attr;
type Error = AttrParseError;
fn parse_token(self, tok: Token) -> TransitionResult<Self> {
fn parse_token(self, tok: Self::Token) -> TransitionResult<Self> {
use AttrParseState::{Empty, Name};
match (self, tok) {
(Empty, Token::AttrName(name, span)) => {
(Empty, XirToken::AttrName(name, span)) => {
Transition(Name(name, span)).incomplete()
}
(Empty, invalid) => Transition(Empty).dead(invalid),
(Name(name, nspan), Token::AttrValue(value, vspan)) => {
(Name(name, nspan), XirToken::AttrValue(value, vspan)) => {
Transition(Empty).with(Attr::new(name, value, (nspan, vspan)))
}
@ -87,10 +88,10 @@ impl Default for AttrParseState {
#[derive(Debug, PartialEq, Eq)]
pub enum AttrParseError {
/// [`Token::AttrName`] was expected.
AttrNameExpected(Token),
AttrNameExpected(XirToken),
/// [`Token::AttrValue`] was expected.
AttrValueExpected(QName, Span, Token),
AttrValueExpected(QName, Span, XirToken),
}
impl Display for AttrParseError {
@ -131,7 +132,7 @@ mod test {
#[test]
fn dead_if_first_token_is_non_attr() {
let tok = Token::Open("foo".unwrap_into(), S);
let tok = XirToken::Open("foo".unwrap_into(), S);
let sut = AttrParseState::default();
@ -153,8 +154,8 @@ mod test {
let attr = "attr".unwrap_into();
let val = "val".intern();
let toks =
[Token::AttrName(attr, S), Token::AttrValue(val, S2)].into_iter();
let toks = [XirToken::AttrName(attr, S), XirToken::AttrValue(val, S2)]
.into_iter();
let sut = AttrParseState::parse(toks);
@ -176,17 +177,18 @@ mod test {
// This token indicates that we're expecting a value to come next in
// the token stream.
let (Transition(sut), result) =
sut.parse_token(Token::AttrName(attr, S));
sut.parse_token(XirToken::AttrName(attr, S));
assert_eq!(result, Ok(ParseStatus::Incomplete));
// But we provide something else unexpected.
let (Transition(sut), result) = sut.parse_token(Token::Close(None, S2));
let (Transition(sut), result) =
sut.parse_token(XirToken::Close(None, S2));
assert_eq!(
result,
Err(AttrParseError::AttrValueExpected(
attr,
S,
Token::Close(None, S2)
XirToken::Close(None, S2)
))
);
@ -201,7 +203,7 @@ mod test {
// let's actually attempt a recovery.
let recover = "value".intern();
let (Transition(sut), result) =
sut.parse_token(Token::AttrValue(recover, S2));
sut.parse_token(XirToken::AttrValue(recover, S2));
assert_eq!(
result,
Ok(ParseStatus::Object(Attr::new(attr, recover, (S, S2)))),

View File

@ -41,7 +41,7 @@
use super::{
attr::{Attr, AttrParseError, AttrParseState},
parse::{ParseState, ParseStatus, ParsedResult, TransitionResult},
QName, Token, TokenStream, Whitespace,
QName, Token, Token as XirToken, TokenStream, Whitespace,
};
use crate::{span::Span, sym::SymbolId, xir::parse::Transition};
use arrayvec::ArrayVec;
@ -109,7 +109,7 @@ pub enum Object {
}
/// XIRF-compatible attribute parser.
pub trait FlatAttrParseState = ParseState<Object = Attr>
pub trait FlatAttrParseState = ParseState<Token = XirToken, Object = Attr>
where
<Self as ParseState>::Error: Into<StateError>;
@ -146,6 +146,7 @@ impl<const MAX_DEPTH: usize, SA> ParseState for State<MAX_DEPTH, SA>
where
SA: FlatAttrParseState,
{
type Token = XirToken;
type Object = Object;
type Error = StateError;

View File

@ -19,7 +19,6 @@
//! Basic streaming parsing framework for XIR lowering operations.
use super::{Token, TokenStream};
use crate::span::Span;
use std::fmt::Debug;
use std::mem::take;
@ -31,7 +30,34 @@ pub type ParsedResult<S> = ParseResult<S, Parsed<<S as ParseState>::Object>>;
/// Result of some non-parsing operation on a [`Parser`],
/// with any error having been wrapped in a [`ParseError`].
pub type ParseResult<S, T> = Result<T, ParseError<<S as ParseState>::Error>>;
pub type ParseResult<S, T> =
Result<T, ParseError<<S as ParseState>::Token, <S as ParseState>::Error>>;
/// A single datum from a streaming IR with an associated [`Span`].
///
/// A token may be a lexeme with associated data,
/// or a more structured object having been lowered from other IRs.
pub trait Token: Display + Debug + PartialEq + Eq {
/// Retrieve the [`Span`] representing the source location of the token.
fn span(&self) -> Span;
}
impl<T: Token> From<T> for Span {
fn from(tok: T) -> Self {
tok.span()
}
}
/// An infallible [`Token`] stream.
///
/// If the token stream originates from an operation that could potentially
/// fail and ought to be propagated,
/// use [`TokenResultStream`].
///
/// The name "stream" in place of "iterator" is intended to convey that this
/// type is expected to be processed in real-time as a stream,
/// not read into memory.
pub trait TokenStream<T: Token> = Iterator<Item = T>;
/// A deterministic parsing automaton.
///
@ -54,6 +80,9 @@ pub type ParseResult<S, T> = Result<T, ParseError<<S as ParseState>::Error>>;
/// [`TokenStream`] at the current position for a given parser
/// composition.
pub trait ParseState: Default + PartialEq + Eq + Debug {
/// Input tokens to the parser.
type Token: Token;
/// Objects produced by a parser utilizing these states.
type Object;
@ -64,7 +93,7 @@ pub trait ParseState: Default + PartialEq + Eq + Debug {
///
/// Whether this method is helpful or provides any clarity depends on
/// the context and the types that are able to be inferred.
fn parse<I: TokenStream>(toks: I) -> Parser<Self, I> {
fn parse<I: TokenStream<Self::Token>>(toks: I) -> Parser<Self, I> {
Parser::from(toks)
}
@ -85,7 +114,7 @@ pub trait ParseState: Default + PartialEq + Eq + Debug {
/// which in turn makes it easier to compose parsers
/// (which conceptually involves stitching together state
/// machines).
fn parse_token(self, tok: Token) -> TransitionResult<Self>;
fn parse_token(self, tok: Self::Token) -> TransitionResult<Self>;
/// Whether the current state represents an accepting state.
///
@ -109,8 +138,10 @@ pub trait ParseState: Default + PartialEq + Eq + Debug {
///
/// This is used by [`ParseState::parse_token`];
/// see that function for rationale.
pub type ParseStateResult<S> =
Result<ParseStatus<<S as ParseState>::Object>, <S as ParseState>::Error>;
pub type ParseStateResult<S> = Result<
ParseStatus<<S as ParseState>::Token, <S as ParseState>::Object>,
<S as ParseState>::Error,
>;
/// Denotes a state transition.
///
@ -145,7 +176,7 @@ impl<S: ParseState> Transition<S> {
/// This corresponds to [`ParseStatus::Dead`],
/// and a calling parser should use the provided [`Token`] as
/// lookahead.
pub fn dead(self, tok: Token) -> (Self, ParseStateResult<S>) {
pub fn dead(self, tok: S::Token) -> (Self, ParseStateResult<S>) {
(self, Ok(ParseStatus::Dead(tok)))
}
@ -180,13 +211,13 @@ pub type TransitionResult<S> = (Transition<S>, ParseStateResult<S>);
/// call [`finalize`](Parser::finalize) to ensure that parsing has
/// completed in an accepting state.
#[derive(Debug, PartialEq, Eq)]
pub struct Parser<S: ParseState, I: TokenStream> {
pub struct Parser<S: ParseState, I: TokenStream<S::Token>> {
toks: I,
state: S,
last_span: Option<Span>,
}
impl<S: ParseState, I: TokenStream> Parser<S, I> {
impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// Indicate that no further parsing will take place using this parser,
/// and [`drop`] it.
///
@ -197,7 +228,9 @@ impl<S: ParseState, I: TokenStream> Parser<S, I> {
/// Consequently,
/// the caller should expect [`ParseError::UnexpectedEof`] if the
/// parser is not in an accepting state.
pub fn finalize(self) -> Result<(), (Self, ParseError<S::Error>)> {
pub fn finalize(
self,
) -> Result<(), (Self, ParseError<S::Token, S::Error>)> {
if self.state.is_accepting() {
Ok(())
} else {
@ -207,7 +240,7 @@ impl<S: ParseState, I: TokenStream> Parser<S, I> {
}
}
impl<S: ParseState, I: TokenStream> Iterator for Parser<S, I> {
impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
type Item = ParsedResult<S>;
/// Parse a single [`Token`] according to the current
@ -276,7 +309,7 @@ impl<S: ParseState, I: TokenStream> Iterator for Parser<S, I> {
/// Parsers may return their own unique errors via the
/// [`StateError`][ParseError::StateError] variant.
#[derive(Debug, PartialEq, Eq)]
pub enum ParseError<E: Error + PartialEq + Eq> {
pub enum ParseError<T: Token, E: Error + PartialEq + Eq> {
/// Token stream ended unexpectedly.
///
/// This error means that the parser was expecting more input before
@ -304,15 +337,15 @@ pub enum ParseError<E: Error + PartialEq + Eq> {
/// it may be desirable to be able to query [`ParseState`] for what
/// tokens are acceptable at this point,
/// to provide better error messages.
UnexpectedToken(Token),
UnexpectedToken(T),
/// A parser-specific error associated with an inner
/// [`ParseState`].
StateError(E),
}
impl<EA: Error + PartialEq + Eq> ParseError<EA> {
pub fn inner_into<EB: Error + PartialEq + Eq>(self) -> ParseError<EB>
impl<T: Token, EA: Error + PartialEq + Eq> ParseError<T, EA> {
pub fn inner_into<EB: Error + PartialEq + Eq>(self) -> ParseError<T, EB>
where
EA: Into<EB>,
{
@ -325,13 +358,13 @@ impl<EA: Error + PartialEq + Eq> ParseError<EA> {
}
}
impl<E: Error + PartialEq + Eq> From<E> for ParseError<E> {
impl<T: Token, E: Error + PartialEq + Eq> From<E> for ParseError<T, E> {
fn from(e: E) -> Self {
Self::StateError(e)
}
}
impl<E: Error + PartialEq + Eq> Display for ParseError<E> {
impl<T: Token, E: Error + PartialEq + Eq> Display for ParseError<T, E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnexpectedEof(ospan) => {
@ -350,7 +383,7 @@ impl<E: Error + PartialEq + Eq> Display for ParseError<E> {
}
}
impl<E: Error + PartialEq + Eq + 'static> Error for ParseError<E> {
impl<T: Token, E: Error + PartialEq + Eq + 'static> Error for ParseError<T, E> {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::StateError(e) => Some(e),
@ -359,7 +392,7 @@ impl<E: Error + PartialEq + Eq + 'static> Error for ParseError<E> {
}
}
impl<S: ParseState, I: TokenStream> From<I> for Parser<S, I> {
impl<S: ParseState, I: TokenStream<S::Token>> From<I> for Parser<S, I> {
fn from(toks: I) -> Self {
Self {
toks,
@ -371,7 +404,7 @@ impl<S: ParseState, I: TokenStream> From<I> for Parser<S, I> {
/// Result of a parsing operation.
#[derive(Debug, PartialEq, Eq)]
pub enum ParseStatus<T> {
pub enum ParseStatus<T, O> {
/// Additional tokens are needed to complete parsing of the next object.
Incomplete,
@ -379,7 +412,7 @@ pub enum ParseStatus<T> {
///
/// This does not indicate that the parser is complete,
/// as more objects may be able to be emitted.
Object(T),
Object(O),
/// Parser encountered a dead state relative to the given token.
///
@ -404,7 +437,7 @@ pub enum ParseStatus<T> {
///
/// If there is no parent context to handle the token,
/// [`Parser`] must yield an error.
Dead(Token),
Dead(T),
}
/// Result of a parsing operation.
@ -413,7 +446,7 @@ pub enum ParseStatus<T> {
/// operation,
/// this type is public-facing and used by [`Parser`].
#[derive(Debug, PartialEq, Eq)]
pub enum Parsed<T> {
pub enum Parsed<O> {
/// Additional tokens are needed to complete parsing of the next object.
Incomplete,
@ -421,11 +454,11 @@ pub enum Parsed<T> {
///
/// This does not indicate that the parser is complete,
/// as more objects may be able to be emitted.
Object(T),
Object(O),
}
impl<T> From<ParseStatus<T>> for Parsed<T> {
fn from(status: ParseStatus<T>) -> Self {
impl<T: Token, O> From<ParseStatus<T, O>> for Parsed<O> {
fn from(status: ParseStatus<T, O>) -> Self {
match status {
ParseStatus::Incomplete => Parsed::Incomplete,
ParseStatus::Object(x) => Parsed::Object(x),
@ -440,6 +473,7 @@ impl<T> From<ParseStatus<T>> for Parsed<T> {
pub mod test {
use std::{assert_matches::assert_matches, iter::once};
use super::super::Token as XirToken;
use super::*;
use crate::{span::DUMMY_SPAN as DS, sym::GlobalSymbolIntern};
@ -456,16 +490,17 @@ pub mod test {
}
impl ParseState for EchoState {
type Object = Token;
type Token = XirToken;
type Object = XirToken;
type Error = EchoStateError;
fn parse_token(self, tok: Token) -> TransitionResult<Self> {
fn parse_token(self, tok: XirToken) -> TransitionResult<Self> {
match tok {
Token::Comment(..) => Transition(Self::Done).with(tok),
Token::Close(..) => {
XirToken::Comment(..) => Transition(Self::Done).with(tok),
XirToken::Close(..) => {
Transition(self).err(EchoStateError::InnerError(tok))
}
Token::Text(..) => Transition(self).dead(tok),
XirToken::Text(..) => Transition(self).dead(tok),
_ => Transition(self).with(tok),
}
}
@ -477,7 +512,7 @@ pub mod test {
#[derive(Debug, PartialEq, Eq)]
enum EchoStateError {
InnerError(Token),
InnerError(XirToken),
}
impl Display for EchoStateError {
@ -497,7 +532,7 @@ pub mod test {
#[test]
fn successful_parse_in_accepting_state_with_spans() {
// EchoState is placed into a Done state given Comment.
let tok = Token::Comment("foo".into(), DS);
let tok = XirToken::Comment("foo".into(), DS);
let mut toks = once(tok.clone());
let mut sut = Sut::from(&mut toks);
@ -518,7 +553,7 @@ pub mod test {
#[test]
fn fails_on_end_of_stream_when_not_in_accepting_state() {
let span = Span::new(10, 20, "ctx".intern());
let mut toks = [Token::Close(None, span)].into_iter();
let mut toks = [XirToken::Close(None, span)].into_iter();
let mut sut = Sut::from(&mut toks);
@ -538,8 +573,8 @@ pub mod test {
#[test]
fn returns_state_specific_error() {
// Token::Close causes EchoState to produce an error.
let errtok = Token::Close(None, DS);
// XirToken::Close causes EchoState to produce an error.
let errtok = XirToken::Close(None, DS);
let mut toks = [errtok.clone()].into_iter();
let mut sut = Sut::from(&mut toks);
@ -564,10 +599,10 @@ pub mod test {
// Set up so that we have a single token that we can use for
// recovery as part of the same iterator.
let recovery = Token::Comment("recov".into(), DS);
let recovery = XirToken::Comment("recov".into(), DS);
let mut toks = [
// Used purely to populate a Span.
Token::Close(None, span),
XirToken::Close(None, span),
// Recovery token here:
recovery.clone(),
]
@ -605,7 +640,7 @@ pub mod test {
#[test]
fn unhandled_dead_state_results_in_error() {
// A Text will cause our parser to return Dead.
let tok = Token::Text("dead".into(), DS);
let tok = XirToken::Text("dead".into(), DS);
let mut toks = once(tok.clone());
let mut sut = Sut::from(&mut toks);

View File

@ -173,14 +173,15 @@
//!
//! [state machine]: https://en.wikipedia.org/wiki/Finite-state_machine
use self::super::{
use super::{
attr::{Attr, AttrList, AttrParseError, AttrParseState},
parse::{ParseError, ParseResult, ParseState, ParseStatus, ParsedResult},
parse::{
ParseError, ParseResult, ParseState, ParseStatus, ParsedResult,
TransitionResult,
},
QName, Token, Token as XirToken, TokenResultStream, TokenStream,
};
use super::{
parse::TransitionResult, QName, Token, TokenResultStream, TokenStream,
};
use crate::{span::Span, sym::SymbolId, xir::parse::Transition};
use std::{error::Error, fmt::Display, result};
@ -496,7 +497,7 @@ where
Done,
}
pub trait StackAttrParseState = ParseState<Object = Attr>
pub trait StackAttrParseState = ParseState<Token = XirToken, Object = Attr>
where
<Self as ParseState>::Error: Into<StackError>;
@ -507,10 +508,11 @@ impl<SA: StackAttrParseState> Default for Stack<SA> {
}
impl<SA: StackAttrParseState> ParseState for Stack<SA> {
type Token = XirToken;
type Object = Tree;
type Error = StackError;
fn parse_token(self, tok: Token) -> TransitionResult<Self> {
fn parse_token(self, tok: Self::Token) -> TransitionResult<Self> {
use Stack::*;
match (self, tok) {
@ -768,7 +770,8 @@ pub fn parser_from(
#[inline]
pub fn attr_parser_from<'a>(
toks: impl TokenStream,
) -> impl Iterator<Item = result::Result<Attr, ParseError<StackError>>> {
) -> impl Iterator<Item = result::Result<Attr, ParseError<XirToken, StackError>>>
{
use super::parse::Parsed;
AttrParseState::parse(toks).filter_map(|parsed| match parsed {