tamer: xir::tree: Integrate AttrParserState into Stack
Note that AttrParse{r=>}State needs renaming, and Stack will get a better name down the line too. This commit message is accurate, but confusing. This performs the long-awaited task of trying to observe, concretely, how to combine two automata. This has the effect of stitching together the state machines, such that the union of the two is equivalent to the original monolith. The next step will be to abstract this away. There are some important things to note here. First, this introduces a new "dead" state concept, where here a dead state is defined as an _accepting_ state that has no state transitions for the given input token. This is more strict than a dead state as defined in, for example, the Dragon Book, where backtracking may occur. The reason I chose for a Dead state to be accepting is simple: it represents a lookahead situation. It says, "I don't know what this token is, but I've done my job, so it may be useful in a parent context". The "I've done my job" part is only applicable in an accepting state. If the parser is _not_ in an accepting state, then an unknown token is simply an error; we should _not_ try to backtrack or anything of the sort, because we want only a single token of lookahead. The reason this was done is because it's otherwise difficult to compose the two parsers without requiring that AttrEnd exist in every XIR stream; this has always been an awkward delimiter that was introduced to make the parser LL(0), but I tried to compromise by saying that it was optional. Of course, I knew that decision caused awkward inconsistencies, I had just hoped that those inconsistencies wouldn't manifest in practical issues. Well, now it did, and the benefits of AttrEnd that we had in the previous construction do not exist in this one. Consequently, it makes more sense to simply go from LL(0) to LL(1), which makes AttrEnd unnecessary, and a future commit will remove it entirely. All of this information will be documented, but I want to get further in the implementation first to make sure I don't change course again and therefore waste my time on docs. DEV-11268main
parent
0c7f04e092
commit
61f7a12975
|
@ -252,7 +252,7 @@ fn test_writes_deps() -> TestResult {
|
|||
|
||||
p_syms.enumerate().for_each(|(i, ele)| {
|
||||
let ident = &objs[i];
|
||||
let attrs = ele.attrs().unwrap();
|
||||
let attrs = ele.attrs();
|
||||
|
||||
assert_eq!(attrs.find(QN_NAME).map(|a| a.value()), Some(ident.name()),);
|
||||
|
||||
|
@ -410,7 +410,6 @@ fn test_writes_map_froms() -> TestResult {
|
|||
from.as_element()
|
||||
.unwrap()
|
||||
.attrs()
|
||||
.unwrap()
|
||||
.find(QN_NAME)
|
||||
.expect("expecting @name")
|
||||
.value(),
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
use crate::sym::SymbolId;
|
||||
use crate::tpwrap::quick_xml::{Error as XmlError, InnerXmlError};
|
||||
use crate::xir::tree::StackError as XirtParseError;
|
||||
use crate::xir::tree::{parse::ParseError, StackError};
|
||||
use std::fmt::Display;
|
||||
|
||||
/// Error during `xmlo` processing.
|
||||
|
@ -38,7 +38,7 @@ pub enum XmloError {
|
|||
/// XML parsing error (legacy, quick-xml).
|
||||
XmlError(XmlError),
|
||||
/// XIR parsing error.
|
||||
XirtError(XirtParseError),
|
||||
XirtError(ParseError<StackError>),
|
||||
/// The root node was not an `lv:package`.
|
||||
UnexpectedRoot,
|
||||
/// A `preproc:sym` node was found, but is missing `@name`.
|
||||
|
@ -70,8 +70,8 @@ impl From<InnerXmlError> for XmloError {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<XirtParseError> for XmloError {
|
||||
fn from(e: XirtParseError) -> Self {
|
||||
impl From<ParseError<StackError>> for XmloError {
|
||||
fn from(e: ParseError<StackError>) -> Self {
|
||||
Self::XirtError(e)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -174,21 +174,23 @@
|
|||
//! [state machine]: https://en.wikipedia.org/wiki/Finite-state_machine
|
||||
|
||||
mod attr;
|
||||
mod parse;
|
||||
pub mod parse;
|
||||
|
||||
use self::{
|
||||
attr::AttrParserState,
|
||||
parse::{ParseResult, ParseState, ParseStateResult, ParsedResult},
|
||||
attr::{AttrParseError, AttrParserState},
|
||||
parse::{
|
||||
ParseError, ParseResult, ParseState, ParseStateResult, ParseStatus,
|
||||
ParsedResult,
|
||||
},
|
||||
};
|
||||
|
||||
use super::{QName, Token, TokenResultStream, TokenStream};
|
||||
use crate::{span::Span, sym::SymbolId};
|
||||
use std::{error::Error, fmt::Display, mem::take};
|
||||
use std::{error::Error, fmt::Display, mem::take, result};
|
||||
|
||||
pub use attr::{Attr, AttrList};
|
||||
|
||||
type Parsed = parse::Parsed<Tree>;
|
||||
type ParseStatus = parse::ParseStatus<Tree>;
|
||||
|
||||
/// A XIR tree (XIRT).
|
||||
///
|
||||
|
@ -294,7 +296,7 @@ impl Tree {
|
|||
pub struct Element {
|
||||
name: QName,
|
||||
/// Zero or more attributes.
|
||||
attrs: Option<AttrList>,
|
||||
attrs: AttrList,
|
||||
/// Zero or more child nodes.
|
||||
children: Vec<Tree>,
|
||||
/// Spans for opening and closing tags respectively.
|
||||
|
@ -316,8 +318,8 @@ impl Element {
|
|||
|
||||
/// Attributes of this element.
|
||||
#[inline]
|
||||
pub fn attrs(&self) -> Option<&AttrList> {
|
||||
self.attrs.as_ref()
|
||||
pub fn attrs(&self) -> &AttrList {
|
||||
&self.attrs
|
||||
}
|
||||
|
||||
/// Opens an element for incremental construction.
|
||||
|
@ -330,7 +332,7 @@ impl Element {
|
|||
fn open(name: QName, span: Span) -> Self {
|
||||
Self {
|
||||
name,
|
||||
attrs: None,
|
||||
attrs: AttrList::new(),
|
||||
children: vec![],
|
||||
span: (span, span), // We do not yet know where the span will end
|
||||
}
|
||||
|
@ -417,7 +419,7 @@ impl ElementStack {
|
|||
/// then the returned [`Stack`] will represent the state of the stack
|
||||
/// prior to the child element being opened,
|
||||
/// as stored with [`ElementStack::store`].
|
||||
fn consume_child_or_complete(self) -> Stack {
|
||||
fn consume_child_or_complete<SA: StackAttrParseState>(self) -> Stack<SA> {
|
||||
match self.pstack {
|
||||
Some(parent_stack) => Stack::BuddingElement(
|
||||
parent_stack.consume_element(self.element),
|
||||
|
@ -437,7 +439,7 @@ impl ElementStack {
|
|||
/// Push the provided [`Attr`] onto the attribute list of the inner
|
||||
/// [`Element`].
|
||||
fn consume_attrs(mut self, attr_list: AttrList) -> Self {
|
||||
self.element.attrs.replace(attr_list);
|
||||
self.element.attrs = attr_list;
|
||||
self
|
||||
}
|
||||
|
||||
|
@ -466,7 +468,10 @@ impl ElementStack {
|
|||
/// For more information,
|
||||
/// see the [module-level documentation](self).
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub enum Stack {
|
||||
pub enum Stack<SA = AttrParserState>
|
||||
where
|
||||
SA: StackAttrParseState,
|
||||
{
|
||||
/// Empty stack.
|
||||
Empty,
|
||||
|
||||
|
@ -484,9 +489,8 @@ pub enum Stack {
|
|||
/// An [`AttrList`] that is still under construction.
|
||||
BuddingAttrList(ElementStack, AttrList),
|
||||
|
||||
/// An attribute is awaiting its value,
|
||||
/// after which it will be attached to an element.
|
||||
AttrName(ElementStack, AttrList, QName, Span),
|
||||
/// Parsing has been ceded to `SA` for attribute parsing.
|
||||
AttrState(ElementStack, AttrList, SA),
|
||||
|
||||
/// Parsing has completed relative to the initial context.
|
||||
///
|
||||
|
@ -498,35 +502,64 @@ pub enum Stack {
|
|||
Done,
|
||||
}
|
||||
|
||||
impl Default for Stack {
|
||||
pub trait StackAttrParseState = ParseState<Object = Attr>
|
||||
where
|
||||
<Self as ParseState>::Error: Into<StackError>;
|
||||
|
||||
impl<SA: StackAttrParseState> Default for Stack<SA> {
|
||||
fn default() -> Self {
|
||||
Self::Empty
|
||||
}
|
||||
}
|
||||
|
||||
impl ParseState for Stack {
|
||||
impl<SA: StackAttrParseState> ParseState for Stack<SA> {
|
||||
type Object = Tree;
|
||||
type Error = StackError;
|
||||
|
||||
fn parse_token(&mut self, tok: Token) -> ParseStateResult<Self> {
|
||||
let stack = take(self);
|
||||
|
||||
// This demonstrates how parsers can be combined.
|
||||
// The next step will be to abstract this away.
|
||||
if let Stack::AttrState(estack, attrs, mut sa) = stack {
|
||||
use ParseStatus::*;
|
||||
return match sa.parse_token(tok) {
|
||||
Ok(Incomplete) => {
|
||||
*self = Self::AttrState(estack, attrs, sa);
|
||||
Ok(Incomplete)
|
||||
}
|
||||
Ok(Object(attr)) => {
|
||||
let attrs = attrs.push(attr);
|
||||
*self = Self::AttrState(estack, attrs, sa);
|
||||
Ok(Incomplete)
|
||||
}
|
||||
// This will likely go away with AttrEnd.
|
||||
Ok(Done) => {
|
||||
*self = Self::BuddingElement(estack.consume_attrs(attrs));
|
||||
Ok(Incomplete)
|
||||
}
|
||||
Ok(Dead(lookahead)) => {
|
||||
*self = Self::BuddingElement(estack.consume_attrs(attrs));
|
||||
self.parse_token(lookahead)
|
||||
}
|
||||
Err(x) => Err(x.into()),
|
||||
};
|
||||
}
|
||||
|
||||
match tok {
|
||||
Token::Open(name, span) => stack.open_element(name, span),
|
||||
Token::Close(name, span) => stack.close_element(name, span),
|
||||
Token::AttrName(name, span) => stack.open_attr(name, span),
|
||||
Token::AttrValue(value, span) => stack.close_attr(value, span),
|
||||
Token::AttrEnd(_) => stack.end_attrs(),
|
||||
Token::Text(value, span) => stack.text(value, span),
|
||||
|
||||
// This parse is being rewritten, so we'll address this with a
|
||||
// proper error then.
|
||||
Token::AttrValueFragment(..) => {
|
||||
panic!("AttrValueFragment is not parsable")
|
||||
}
|
||||
|
||||
Token::Comment(..) | Token::CData(..) | Token::Whitespace(..) => {
|
||||
Err(StackError::Todo(tok, stack))
|
||||
_ if self.is_accepting() => return Ok(ParseStatus::Dead(tok)),
|
||||
_ => {
|
||||
todo!(
|
||||
"TODO: `{:?}` unrecognized. The parser is not yet \
|
||||
complete, so this could represent either a missing \
|
||||
feature or a semantic error. Stack: `{:?}`.",
|
||||
tok,
|
||||
stack
|
||||
)
|
||||
}
|
||||
}
|
||||
.map(|new_stack| self.store_or_emit(new_stack))
|
||||
|
@ -537,7 +570,7 @@ impl ParseState for Stack {
|
|||
}
|
||||
}
|
||||
|
||||
impl Stack {
|
||||
impl<SA: StackAttrParseState> Stack<SA> {
|
||||
/// Attempt to open a new element.
|
||||
///
|
||||
/// If the stack is [`Self::Empty`],
|
||||
|
@ -552,25 +585,29 @@ impl Stack {
|
|||
fn open_element(self, name: QName, span: Span) -> Result<Self> {
|
||||
let element = Element::open(name, span);
|
||||
|
||||
Ok(Self::BuddingElement(ElementStack {
|
||||
element,
|
||||
pstack: match self {
|
||||
// Opening a root element (or lack of context).
|
||||
Self::Empty => None,
|
||||
Ok(Self::AttrState(
|
||||
ElementStack {
|
||||
element,
|
||||
pstack: match self {
|
||||
// Opening a root element (or lack of context).
|
||||
Self::Empty => None,
|
||||
|
||||
// Open a child element.
|
||||
Self::BuddingElement(pstack) => Some(pstack.store()),
|
||||
// Open a child element.
|
||||
Self::BuddingElement(pstack) => Some(pstack.store()),
|
||||
|
||||
// Opening a child element in attribute parsing context.
|
||||
// Automatically close the attributes despite a missing
|
||||
// AttrEnd to accommodate non-reader XIR.
|
||||
Self::BuddingAttrList(pstack, attr_list) => {
|
||||
Some(pstack.consume_attrs(attr_list).store())
|
||||
}
|
||||
// Opening a child element in attribute parsing context.
|
||||
// Automatically close the attributes despite a missing
|
||||
// AttrEnd to accommodate non-reader XIR.
|
||||
Self::BuddingAttrList(pstack, attr_list) => {
|
||||
Some(pstack.consume_attrs(attr_list).store())
|
||||
}
|
||||
|
||||
_ => todo! {},
|
||||
_ => todo! {},
|
||||
},
|
||||
},
|
||||
}))
|
||||
Default::default(),
|
||||
SA::default(),
|
||||
))
|
||||
}
|
||||
|
||||
/// Attempt to close an element.
|
||||
|
@ -602,57 +639,6 @@ impl Stack {
|
|||
}
|
||||
}
|
||||
|
||||
/// Begin an attribute on an element.
|
||||
///
|
||||
/// An attribute begins with a [`QName`] representing its name.
|
||||
/// It will be attached to a parent element after being closed with a
|
||||
/// value via [`Stack::close_attr`].
|
||||
fn open_attr(self, name: QName, span: Span) -> Result<Self> {
|
||||
Ok(match self {
|
||||
// Begin construction of an attribute list on a new element.
|
||||
Self::BuddingElement(ele_stack) => {
|
||||
Self::AttrName(ele_stack, Default::default(), name, span)
|
||||
}
|
||||
|
||||
// Continuation of attribute list.
|
||||
Self::BuddingAttrList(ele_stack, attr_list) => {
|
||||
Self::AttrName(ele_stack, attr_list, name, span)
|
||||
}
|
||||
|
||||
_ => todo!("open_attr in state {:?}", self),
|
||||
})
|
||||
}
|
||||
|
||||
/// Assigns a value to an opened attribute and attaches to the parent
|
||||
/// element.
|
||||
fn close_attr(self, value: SymbolId, span: Span) -> Result<Self> {
|
||||
Ok(match self {
|
||||
Self::AttrName(ele_stack, attr_list, name, open_span) => {
|
||||
Self::BuddingAttrList(
|
||||
ele_stack,
|
||||
attr_list.push(Attr::new(name, value, (open_span, span))),
|
||||
)
|
||||
}
|
||||
|
||||
_ => todo! {},
|
||||
})
|
||||
}
|
||||
|
||||
/// End attribute parsing.
|
||||
///
|
||||
/// If parsing occurs within an element context,
|
||||
/// the accumulated [`AttrList`] will be attached to the budding
|
||||
/// [`Element`].
|
||||
fn end_attrs(self) -> Result<Self> {
|
||||
Ok(match self {
|
||||
Self::BuddingAttrList(ele_stack, attr_list) => {
|
||||
Self::BuddingElement(ele_stack.consume_attrs(attr_list))
|
||||
}
|
||||
|
||||
_ => todo!("attr error"),
|
||||
})
|
||||
}
|
||||
|
||||
/// Appends a text node as a child of an element.
|
||||
///
|
||||
/// This is valid only for a [`Stack::BuddingElement`].
|
||||
|
@ -668,7 +654,7 @@ impl Stack {
|
|||
}
|
||||
|
||||
/// Emit a completed object or store the current stack for further processing.
|
||||
fn store_or_emit(&mut self, new_stack: Stack) -> ParseStatus {
|
||||
fn store_or_emit(&mut self, new_stack: Self) -> ParseStatus<Tree> {
|
||||
match new_stack {
|
||||
Stack::ClosedElement(ele) => {
|
||||
ParseStatus::Object(Tree::Element(ele))
|
||||
|
@ -699,14 +685,13 @@ pub enum StackError {
|
|||
close: (QName, Span),
|
||||
},
|
||||
|
||||
AttrError(AttrParseError),
|
||||
|
||||
/// An attribute was expected as the next [`Token`].
|
||||
AttrNameExpected(Token),
|
||||
|
||||
/// Token stream ended before attribute parsing was complete.
|
||||
UnexpectedAttrEof,
|
||||
|
||||
/// Not yet implemented.
|
||||
Todo(Token, Stack),
|
||||
}
|
||||
|
||||
impl Display for StackError {
|
||||
|
@ -725,6 +710,8 @@ impl Display for StackError {
|
|||
)
|
||||
}
|
||||
|
||||
Self::AttrError(e) => Display::fmt(e, f),
|
||||
|
||||
Self::AttrNameExpected(tok) => {
|
||||
write!(f, "attribute name expected, found {}", tok)
|
||||
}
|
||||
|
@ -736,23 +723,16 @@ impl Display for StackError {
|
|||
"unexpected end of input during isolated attribute parsing",
|
||||
)
|
||||
}
|
||||
|
||||
Self::Todo(tok, stack) => {
|
||||
write!(
|
||||
f,
|
||||
"TODO: `{:?}` unrecognized. The parser is not yet \
|
||||
complete, so this could represent either a missing \
|
||||
feature or a semantic error. Stack: `{:?}`.",
|
||||
tok, stack
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for StackError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
None
|
||||
match self {
|
||||
Self::AttrError(e) => Some(e),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -779,7 +759,7 @@ impl Error for StackError {
|
|||
pub fn parse(
|
||||
toks: impl TokenStream,
|
||||
) -> impl Iterator<Item = ParsedResult<Stack>> {
|
||||
Stack::parse(toks)
|
||||
Stack::<AttrParserState>::parse(toks)
|
||||
}
|
||||
|
||||
/// Produce a lazy parser from a given [`TokenStream`],
|
||||
|
@ -808,7 +788,7 @@ pub fn parse(
|
|||
pub fn parser_from(
|
||||
toks: impl TokenStream,
|
||||
) -> impl Iterator<Item = ParseResult<Stack, Tree>> {
|
||||
Stack::parse(toks).filter_map(|parsed| match parsed {
|
||||
Stack::<AttrParserState>::parse(toks).filter_map(|parsed| match parsed {
|
||||
Ok(Parsed::Object(tree)) => Some(Ok(tree)),
|
||||
Ok(Parsed::Incomplete) => None,
|
||||
Err(x) => Some(Err(x)),
|
||||
|
@ -837,31 +817,23 @@ pub fn parser_from(
|
|||
/// see the [module-level documentation](self).
|
||||
#[inline]
|
||||
pub fn attr_parser_from<'a>(
|
||||
toks: &'a mut impl TokenStream,
|
||||
) -> impl Iterator<Item = Result<Attr>> + 'a {
|
||||
toks: impl TokenStream,
|
||||
) -> impl Iterator<Item = result::Result<Attr, ParseError<StackError>>> {
|
||||
use parse::Parsed;
|
||||
|
||||
AttrParserState::parse(toks).filter_map(|parsed| match parsed {
|
||||
Ok(Parsed::Object(attr)) => Some(Ok(attr)),
|
||||
Ok(Parsed::Incomplete) => None,
|
||||
Err(x) => Some(Err(x.into())),
|
||||
Err(ParseError::StateError(e)) => {
|
||||
Some(Err(ParseError::StateError(StackError::AttrError(e))))
|
||||
}
|
||||
Err(e) => Some(Err(e.inner_into())),
|
||||
})
|
||||
}
|
||||
|
||||
// Transitional; this will go away, or at least be refined.
|
||||
impl From<parse::ParseError<attr::AttrParseError>> for StackError {
|
||||
fn from(e: parse::ParseError<attr::AttrParseError>) -> Self {
|
||||
match e {
|
||||
parse::ParseError::UnexpectedEof(_) => Self::UnexpectedAttrEof,
|
||||
|
||||
parse::ParseError::StateError(
|
||||
attr::AttrParseError::AttrNameExpected(tok),
|
||||
) => Self::AttrNameExpected(tok),
|
||||
|
||||
parse::ParseError::StateError(
|
||||
attr::AttrParseError::AttrValueExpected(..),
|
||||
) => Self::UnexpectedAttrEof,
|
||||
}
|
||||
impl From<AttrParseError> for StackError {
|
||||
fn from(e: AttrParseError) -> Self {
|
||||
StackError::AttrError(e)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ impl ParseState for AttrParserState {
|
|||
Ok(ParseStatus::Incomplete)
|
||||
}
|
||||
|
||||
(Empty, invalid) => Err(AttrParseError::AttrNameExpected(invalid)),
|
||||
(Empty, invalid) => return Ok(ParseStatus::Dead(invalid)),
|
||||
|
||||
(Name(name, nspan), Token::AttrValue(value, vspan)) => {
|
||||
Ok(ParseStatus::Object(Attr::new(name, value, (nspan, vspan))))
|
||||
|
@ -86,7 +86,7 @@ impl Default for AttrParserState {
|
|||
}
|
||||
|
||||
/// Attribute parsing error.
|
||||
#[derive(Debug, PartialEq)]
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum AttrParseError {
|
||||
/// [`Token::AttrName`] was expected.
|
||||
AttrNameExpected(Token),
|
||||
|
@ -135,16 +135,14 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn fails_if_first_token_is_non_attr() {
|
||||
fn dead_if_first_token_is_non_attr() {
|
||||
let tok = Token::Open("foo".unwrap_into(), *S);
|
||||
|
||||
let mut sut = AttrParserState::default();
|
||||
|
||||
// Fail immediately.
|
||||
assert_eq!(
|
||||
Err(AttrParseError::AttrNameExpected(tok.clone())),
|
||||
sut.parse_token(tok)
|
||||
);
|
||||
// There is no state that we can transition to,
|
||||
// and we're in an empty accepting state.
|
||||
assert_eq!(Ok(ParseStatus::Dead(tok.clone())), sut.parse_token(tok));
|
||||
|
||||
// Let's just make sure we're in the same state we started in so
|
||||
// that we know we can accommodate recovery token(s).
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
use super::super::{Token, TokenStream};
|
||||
use crate::span::Span;
|
||||
use std::fmt::Debug;
|
||||
use std::{error::Error, fmt::Display};
|
||||
|
||||
/// Result of applying a [`Token`] to a [`ParseState`],
|
||||
|
@ -51,12 +52,12 @@ pub type ParseResult<S, T> = Result<T, ParseError<<S as ParseState>::Error>>;
|
|||
/// this does in fact represent the current state of the entire
|
||||
/// [`TokenStream`] at the current position for a given parser
|
||||
/// composition.
|
||||
pub trait ParseState: Default {
|
||||
pub trait ParseState: Default + PartialEq + Eq + Debug {
|
||||
/// Objects produced by a parser utilizing these states.
|
||||
type Object;
|
||||
|
||||
/// Errors specific to this set of states.
|
||||
type Error: Error + PartialEq;
|
||||
type Error: Error + PartialEq + Eq;
|
||||
|
||||
/// Construct a parser.
|
||||
///
|
||||
|
@ -166,9 +167,21 @@ impl<S: ParseState, I: TokenStream> Iterator for Parser<S, I> {
|
|||
// reporting in case we encounter an EOF.
|
||||
self.last_span = Some(tok.span());
|
||||
|
||||
use ParseStatus::*;
|
||||
match self.state.parse_token(tok) {
|
||||
Ok(ParseStatus::Done) => None,
|
||||
Ok(parsed) => Some(Ok(parsed.into())),
|
||||
Ok(Done) => None,
|
||||
|
||||
// Nothing handled this dead state,
|
||||
// and we cannot discard a lookahead token,
|
||||
// so we have no choice but to produce an error.
|
||||
Ok(Dead(invalid)) => {
|
||||
Some(Err(ParseError::UnexpectedToken(invalid)))
|
||||
}
|
||||
|
||||
Ok(parsed @ (Incomplete | Object(..))) => {
|
||||
Some(Ok(parsed.into()))
|
||||
}
|
||||
|
||||
Err(e) => Some(Err(e.into())),
|
||||
}
|
||||
}
|
||||
|
@ -187,8 +200,8 @@ impl<S: ParseState, I: TokenStream> Iterator for Parser<S, I> {
|
|||
///
|
||||
/// Parsers may return their own unique errors via the
|
||||
/// [`StateError`][ParseError::StateError] variant.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum ParseError<E: Error + PartialEq> {
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum ParseError<E: Error + PartialEq + Eq> {
|
||||
/// Token stream ended unexpectedly.
|
||||
///
|
||||
/// This error means that the parser was expecting more input before
|
||||
|
@ -205,18 +218,45 @@ pub enum ParseError<E: Error + PartialEq> {
|
|||
/// whatever span preceded this invocation.
|
||||
UnexpectedEof(Option<Span>),
|
||||
|
||||
/// The parser reached an unhandled dead state.
|
||||
///
|
||||
/// Once a parser returns [`ParseStatus::Dead`],
|
||||
/// a parent context must use that provided token as a lookahead.
|
||||
/// If that does not occur,
|
||||
/// [`Parser`] produces this error.
|
||||
///
|
||||
/// In the future,
|
||||
/// it may be desirable to be able to query [`ParseState`] for what
|
||||
/// tokens are acceptable at this point,
|
||||
/// to provide better error messages.
|
||||
UnexpectedToken(Token),
|
||||
|
||||
/// A parser-specific error associated with an inner
|
||||
/// [`ParseState`].
|
||||
StateError(E),
|
||||
}
|
||||
|
||||
impl<E: Error + PartialEq> From<E> for ParseError<E> {
|
||||
impl<EA: Error + PartialEq + Eq> ParseError<EA> {
|
||||
pub fn inner_into<EB: Error + PartialEq + Eq>(self) -> ParseError<EB>
|
||||
where
|
||||
EA: Into<EB>,
|
||||
{
|
||||
use ParseError::*;
|
||||
match self {
|
||||
UnexpectedEof(x) => UnexpectedEof(x),
|
||||
UnexpectedToken(x) => UnexpectedToken(x),
|
||||
StateError(e) => StateError(e.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: Error + PartialEq + Eq> From<E> for ParseError<E> {
|
||||
fn from(e: E) -> Self {
|
||||
Self::StateError(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: Error + PartialEq> Display for ParseError<E> {
|
||||
impl<E: Error + PartialEq + Eq> Display for ParseError<E> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::UnexpectedEof(ospan) => {
|
||||
|
@ -227,12 +267,15 @@ impl<E: Error + PartialEq> Display for ParseError<E> {
|
|||
Some(span) => write!(f, "{}", span),
|
||||
}
|
||||
}
|
||||
Self::UnexpectedToken(tok) => {
|
||||
write!(f, "unexpected {}", tok)
|
||||
}
|
||||
Self::StateError(e) => Display::fmt(e, f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: Error + PartialEq + 'static> Error for ParseError<E> {
|
||||
impl<E: Error + PartialEq + Eq + 'static> Error for ParseError<E> {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
match self {
|
||||
Self::StateError(e) => Some(e),
|
||||
|
@ -264,6 +307,31 @@ pub enum ParseStatus<T> {
|
|||
/// see [`ParseStatus::Done`].
|
||||
Object(T),
|
||||
|
||||
/// Parser encountered a dead state relative to the given token.
|
||||
///
|
||||
/// A dead state is an empty accepting state that has no state
|
||||
/// transition for the given token.
|
||||
/// A state is empty if a [`ParseStatus::Object`] will not be lost if
|
||||
/// parsing ends at this point
|
||||
/// (that is---there is no partially-built object).
|
||||
/// This could simply mean that the parser has completed its job and
|
||||
/// that control must be returned to a parent context.
|
||||
///
|
||||
/// If a parser is _not_ in an accepting state,
|
||||
/// then an error ought to occur rather than a dead state;
|
||||
/// the difference between the two is that the token associated with
|
||||
/// a dead state can be used as a lookahead token in order to
|
||||
/// produce a state transition at a higher level,
|
||||
/// whereas an error indicates that parsing has failed.
|
||||
/// Intuitively,
|
||||
/// this means that a [`ParseStatus::Object`] had just been emitted
|
||||
/// and that the token following it isn't something that can be
|
||||
/// parsed.
|
||||
///
|
||||
/// If there is no parent context to handle the token,
|
||||
/// [`Parser`] must yield an error.
|
||||
Dead(Token),
|
||||
|
||||
/// Parsing is complete.
|
||||
///
|
||||
/// This should cause an iterator to yield [`None`].
|
||||
|
@ -294,8 +362,8 @@ impl<T> From<ParseStatus<T>> for Parsed<T> {
|
|||
match status {
|
||||
ParseStatus::Incomplete => Parsed::Incomplete,
|
||||
ParseStatus::Object(x) => Parsed::Object(x),
|
||||
ParseStatus::Done => {
|
||||
unreachable!("Done status must be filtered by Parser")
|
||||
ParseStatus::Dead(_) | ParseStatus::Done => {
|
||||
unreachable!("Dead/Done status must be filtered by Parser")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -303,7 +371,7 @@ impl<T> From<ParseStatus<T>> for Parsed<T> {
|
|||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::{assert_matches::assert_matches, iter::once};
|
||||
|
||||
use super::*;
|
||||
use crate::span::DUMMY_SPAN as DS;
|
||||
|
@ -332,6 +400,7 @@ pub mod test {
|
|||
Token::Close(..) => {
|
||||
return Err(EchoStateError::InnerError(tok))
|
||||
}
|
||||
Token::Comment(..) => return Ok(ParseStatus::Dead(tok)),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
|
@ -343,7 +412,7 @@ pub mod test {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum EchoStateError {
|
||||
InnerError(Token),
|
||||
}
|
||||
|
@ -461,4 +530,19 @@ pub mod test {
|
|||
// able to finalize.
|
||||
assert!(sut.finalize().is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unhandled_dead_state_results_in_error() {
|
||||
// A comment will cause our parser to return Dead.
|
||||
let tok = Token::Comment("dead".into(), DS);
|
||||
let mut toks = once(tok.clone());
|
||||
|
||||
let mut sut = Sut::from(&mut toks);
|
||||
|
||||
// Our parser returns a Dead status,
|
||||
// which is unhandled by any parent context
|
||||
// (since we're not composing parsers),
|
||||
// which causes an error due to an unhandled Dead state.
|
||||
assert_eq!(sut.next(), Some(Err(ParseError::UnexpectedToken(tok))),);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ mod tree {
|
|||
fn element_from_tree() {
|
||||
let ele = Element {
|
||||
name: "foo".unwrap_into(),
|
||||
attrs: None,
|
||||
attrs: AttrList::new(),
|
||||
children: vec![],
|
||||
span: (*S, *S2),
|
||||
};
|
||||
|
@ -90,12 +90,12 @@ fn empty_element_self_close_from_toks() {
|
|||
|
||||
let expected = Element {
|
||||
name,
|
||||
attrs: None,
|
||||
attrs: AttrList::new(),
|
||||
children: vec![],
|
||||
span: (*S, *S2),
|
||||
};
|
||||
|
||||
let mut sut = Stack::parse(toks);
|
||||
let mut sut = parse(toks);
|
||||
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)));
|
||||
assert_eq!(
|
||||
|
@ -116,12 +116,12 @@ fn empty_element_balanced_close_from_toks() {
|
|||
|
||||
let expected = Element {
|
||||
name,
|
||||
attrs: None,
|
||||
attrs: AttrList::new(),
|
||||
children: vec![],
|
||||
span: (*S, *S2),
|
||||
};
|
||||
|
||||
let mut sut = Stack::parse(toks);
|
||||
let mut sut = parse(toks);
|
||||
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)));
|
||||
assert_eq!(
|
||||
|
@ -144,7 +144,7 @@ fn empty_element_unbalanced_close_from_toks() {
|
|||
]
|
||||
.into_iter();
|
||||
|
||||
let mut sut = Stack::parse(toks);
|
||||
let mut sut = parse(toks);
|
||||
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)));
|
||||
assert_eq!(
|
||||
|
@ -179,15 +179,15 @@ fn empty_element_with_attrs_from_toks() {
|
|||
|
||||
let expected = Element {
|
||||
name,
|
||||
attrs: Some(AttrList::from(vec![
|
||||
attrs: AttrList::from(vec![
|
||||
Attr::new(attr1, val1, (*S, *S2)),
|
||||
Attr::new(attr2, val2, (*S, *S3)),
|
||||
])),
|
||||
]),
|
||||
children: vec![],
|
||||
span: (*S, *S2),
|
||||
};
|
||||
|
||||
let mut sut = Stack::parse(toks);
|
||||
let mut sut = parse(toks);
|
||||
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Open
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrName
|
||||
|
@ -226,17 +226,17 @@ fn child_element_after_attrs() {
|
|||
|
||||
let expected = Element {
|
||||
name,
|
||||
attrs: Some(AttrList::from(vec![Attr::new(attr, val, (*S, *S2))])),
|
||||
attrs: AttrList::from(vec![Attr::new(attr, val, (*S, *S2))]),
|
||||
children: vec![Tree::Element(Element {
|
||||
name: child,
|
||||
attrs: None,
|
||||
attrs: AttrList::new(),
|
||||
children: vec![],
|
||||
span: (*S, *S2),
|
||||
})],
|
||||
span: (*S, *S3),
|
||||
};
|
||||
|
||||
let mut sut = Stack::parse(toks);
|
||||
let mut sut = parse(toks);
|
||||
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Open
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrName
|
||||
|
@ -268,17 +268,17 @@ fn element_with_empty_sibling_children() {
|
|||
|
||||
let expected = Element {
|
||||
name: parent,
|
||||
attrs: None,
|
||||
attrs: AttrList::new(),
|
||||
children: vec![
|
||||
Tree::Element(Element {
|
||||
name: childa,
|
||||
attrs: None,
|
||||
attrs: AttrList::new(),
|
||||
children: vec![],
|
||||
span: (*S, *S2),
|
||||
}),
|
||||
Tree::Element(Element {
|
||||
name: childb,
|
||||
attrs: None,
|
||||
attrs: AttrList::new(),
|
||||
children: vec![],
|
||||
span: (*S, *S2),
|
||||
}),
|
||||
|
@ -312,10 +312,10 @@ fn element_with_child_with_attributes() {
|
|||
|
||||
let expected = Element {
|
||||
name: parent,
|
||||
attrs: None,
|
||||
attrs: AttrList::new(),
|
||||
children: vec![Tree::Element(Element {
|
||||
name: child,
|
||||
attrs: Some(AttrList::from([Attr::new(attr, value, (*S, *S2))])),
|
||||
attrs: AttrList::from([Attr::new(attr, value, (*S, *S2))]),
|
||||
children: vec![],
|
||||
span: (*S, *S3),
|
||||
})],
|
||||
|
@ -342,7 +342,7 @@ fn element_with_text() {
|
|||
|
||||
let expected = Element {
|
||||
name: parent,
|
||||
attrs: None,
|
||||
attrs: AttrList::new(),
|
||||
children: vec![Tree::Text(text, *S2)],
|
||||
span: (*S, *S3),
|
||||
};
|
||||
|
@ -369,7 +369,7 @@ fn parser_from_filters_incomplete() {
|
|||
|
||||
let expected = Element {
|
||||
name,
|
||||
attrs: Some(AttrList::from([Attr::new(attr, val, (*S, *S2))])),
|
||||
attrs: AttrList::from([Attr::new(attr, val, (*S, *S2))]),
|
||||
children: vec![],
|
||||
span: (*S, *S2),
|
||||
};
|
||||
|
@ -392,7 +392,7 @@ fn attr_parser_with_non_attr_token() {
|
|||
|
||||
assert_eq!(
|
||||
sut.next(),
|
||||
Some(Err(StackError::AttrNameExpected(Token::Open(name, *S))))
|
||||
Some(Err(ParseError::UnexpectedToken(Token::Open(name, *S))))
|
||||
);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue