diff --git a/tamer/src/xir/tree/attr.rs b/tamer/src/xir/attr.rs similarity index 100% rename from tamer/src/xir/tree/attr.rs rename to tamer/src/xir/attr.rs diff --git a/tamer/src/xir/tree/attr/parse.rs b/tamer/src/xir/attr/parse.rs similarity index 64% rename from tamer/src/xir/tree/attr/parse.rs rename to tamer/src/xir/attr/parse.rs index a5c30184..aa57816a 100644 --- a/tamer/src/xir/tree/attr/parse.rs +++ b/tamer/src/xir/attr/parse.rs @@ -22,11 +22,11 @@ use crate::{ span::Span, xir::{ - parse::{ParseState, ParseStateResult, ParseStatus}, + parse::{ParseState, Transition, TransitionResult}, QName, Token, }, }; -use std::{error::Error, fmt::Display, mem::take}; +use std::{error::Error, fmt::Display}; use super::Attr; @@ -48,25 +48,25 @@ impl ParseState for AttrParseState { type Object = Attr; type Error = AttrParseError; - fn parse_token(&mut self, tok: Token) -> ParseStateResult { - use AttrParseState::*; + fn parse_token(self, tok: Token) -> TransitionResult { + use AttrParseState::{Empty, Name}; - match (take(self), tok) { + match (self, tok) { (Empty, Token::AttrName(name, span)) => { - *self = Name(name, span); - Ok(ParseStatus::Incomplete) + Transition(Name(name, span)).incomplete() } - (Empty, invalid) => return Ok(ParseStatus::Dead(invalid)), + (Empty, invalid) => Transition(Empty).dead(invalid), (Name(name, nspan), Token::AttrValue(value, vspan)) => { - Ok(ParseStatus::Object(Attr::new(name, value, (nspan, vspan)))) + Transition(Empty).with(Attr::new(name, value, (nspan, vspan))) } (Name(name, nspan), invalid) => { // Restore state for error recovery. - *self = Name(name, nspan); - Err(AttrParseError::AttrValueExpected(name, nspan, invalid)) + Transition(Name(name, nspan)).err( + AttrParseError::AttrValueExpected(name, nspan, invalid), + ) } } } @@ -120,31 +120,32 @@ impl Error for AttrParseError { #[cfg(test)] mod test { use super::*; - use crate::{convert::ExpectInto, sym::GlobalSymbolIntern}; + use crate::{ + convert::ExpectInto, + sym::GlobalSymbolIntern, + xir::parse::{ParseStatus, Parsed}, + }; - // TODO: Just make these const - lazy_static! { - static ref S: Span = - Span::from_byte_interval((0, 0), "test case, 1".intern()); - static ref S2: Span = - Span::from_byte_interval((0, 0), "test case, 2".intern()); - static ref S3: Span = - Span::from_byte_interval((0, 0), "test case, 3".intern()); - } + const S: Span = crate::span::DUMMY_SPAN; + const S2: Span = S.offset_add(1).unwrap(); #[test] fn dead_if_first_token_is_non_attr() { - let tok = Token::Open("foo".unwrap_into(), *S); + let tok = Token::Open("foo".unwrap_into(), S); - let mut sut = AttrParseState::default(); + let sut = AttrParseState::default(); // There is no state that we can transition to, // and we're in an empty accepting state. - assert_eq!(Ok(ParseStatus::Dead(tok.clone())), sut.parse_token(tok)); - - // Let's just make sure we're in the same state we started in so - // that we know we can accommodate recovery token(s). - assert_eq!(sut, AttrParseState::default()); + assert_eq!( + ( + // Make sure we're in the same state we started in so that + // we know we can accommodate recovery token(s). + Transition(AttrParseState::default()), + Ok(ParseStatus::Dead(tok.clone())) + ), + sut.parse_token(tok) + ); } #[test] @@ -152,21 +153,17 @@ mod test { let attr = "attr".unwrap_into(); let val = "val".intern(); - let mut sut = AttrParseState::default(); - let expected = Attr::new(attr, val, (*S, *S2)); + let toks = + [Token::AttrName(attr, S), Token::AttrValue(val, S2)].into_iter(); - // First token represents the name, - // and so we are awaiting a value. - assert_eq!( - sut.parse_token(Token::AttrName(attr, *S)), - Ok(ParseStatus::Incomplete) - ); + let sut = AttrParseState::parse(toks); - // Once we have a value, - // an Attr can be emitted. assert_eq!( - sut.parse_token(Token::AttrValue(val, *S2)), - Ok(ParseStatus::Object(expected)) + Ok(vec![ + Parsed::Incomplete, + Parsed::Object(Attr::new(attr, val, (S, S2))), + ]), + sut.collect() ); } @@ -174,22 +171,22 @@ mod test { fn parse_fails_when_attribute_value_missing_but_can_recover() { let attr = "bad".unwrap_into(); - let mut sut = AttrParseState::default(); + let sut = AttrParseState::default(); // This token indicates that we're expecting a value to come next in // the token stream. - assert_eq!( - sut.parse_token(Token::AttrName(attr, *S)), - Ok(ParseStatus::Incomplete) - ); + let (Transition(sut), result) = + sut.parse_token(Token::AttrName(attr, S)); + assert_eq!(result, Ok(ParseStatus::Incomplete)); // But we provide something else unexpected. + let (Transition(sut), result) = sut.parse_token(Token::Close(None, S2)); assert_eq!( - sut.parse_token(Token::Close(None, *S2)), + result, Err(AttrParseError::AttrValueExpected( attr, - *S, - Token::Close(None, *S2) + S, + Token::Close(None, S2) )) ); @@ -203,10 +200,11 @@ mod test { // Rather than checking for that state, // let's actually attempt a recovery. let recover = "value".intern(); - let expected = Attr::new(attr, recover, (*S, *S2)); + let (Transition(sut), result) = + sut.parse_token(Token::AttrValue(recover, S2)); assert_eq!( - sut.parse_token(Token::AttrValue(recover, *S2)), - Ok(ParseStatus::Object(expected)) + result, + Ok(ParseStatus::Object(Attr::new(attr, recover, (S, S2)))), ); // Finally, we should now be in an accepting state. diff --git a/tamer/src/xir/flat.rs b/tamer/src/xir/flat.rs index 67cf2467..5710a075 100644 --- a/tamer/src/xir/flat.rs +++ b/tamer/src/xir/flat.rs @@ -37,16 +37,19 @@ //! of the caller. use super::{ - parse::{ParseState, ParseStateResult, ParseStatus, ParsedResult}, + parse::{ + ParseState, ParseStateResult, ParseStatus, ParsedResult, + TransitionResult, + }, tree::{ attr::{AttrParseError, AttrParseState}, Attr, }, QName, Token, TokenStream, Whitespace, }; -use crate::{span::Span, sym::SymbolId}; +use crate::{span::Span, sym::SymbolId, xir::parse::Transition}; use arrayvec::ArrayVec; -use std::{error::Error, fmt::Display, mem::replace}; +use std::{error::Error, fmt::Display}; /// Tag nesting depth /// (`0` represents the root). @@ -138,10 +141,6 @@ where /// Delegating to attribute parser. AttrExpected(ElementStack, SA), - - /// Temporary state used to catch missing explicit state transitions in - /// `parse_token`. - Invalid, } impl Default for State { @@ -150,11 +149,6 @@ impl Default for State { } } -/// Denotes a state transition. -/// -/// This newtype was created to produce clear, self-documenting code. -struct Transition(T); - impl ParseState for State where SA: FlatAttrParseState, @@ -162,39 +156,31 @@ where type Object = Object; type Error = StateError; - fn parse_token(&mut self, tok: Token) -> ParseStateResult { + fn parse_token(self, tok: Token) -> TransitionResult { use ParseStatus::{Dead, Incomplete, Object as Obj}; - use State::{AttrExpected, Invalid, NodeExpected}; - - let result; + use State::{AttrExpected, NodeExpected}; // This awkward-looking take-reassign forces us to be explicit // about state transitions in every case, // ensuring that we always have documented proof of what state // the system winds up in. // The `Invalid` state prevents using `return`. - (Transition(*self), result) = match (replace(self, Invalid), tok) { + match (self, tok) { (NodeExpected(stack), tok) => Self::parse_node(stack, tok), - (AttrExpected(stack, mut sa), tok) => match sa.parse_token(tok) { - Ok(Incomplete) => { - (Transition(AttrExpected(stack, sa)), Ok(Incomplete)) + (AttrExpected(stack, sa), tok) => match sa.parse_token(tok) { + (Transition(sa), Ok(Incomplete)) => { + Transition(AttrExpected(stack, sa)).incomplete() + } + (Transition(sa), Ok(Obj(attr))) => { + Transition(AttrExpected(stack, sa)).with(Object::Attr(attr)) + } + (_, Ok(Dead(lookahead))) => Self::parse_node(stack, lookahead), + (Transition(sa), Err(x)) => { + Transition(AttrExpected(stack, sa)).err(x) } - Ok(Obj(attr)) => ( - Transition(AttrExpected(stack, sa)), - Ok(Obj(Object::Attr(attr))), - ), - Ok(Dead(lookahead)) => Self::parse_node(stack, lookahead), - Err(x) => (Transition(AttrExpected(stack, sa)), Err(x.into())), }, - - // See comment at the top of this function. - (Invalid, _) => { - unreachable!("XIRF parser reached invalid state") - } - }; - - result + } } /// Whether all elements have been closed. @@ -220,81 +206,71 @@ where mut stack: ElementStack, tok: Token, ) -> (Transition, ParseStateResult) { - use ParseStatus::Object as Obj; + use Object::*; use State::{AttrExpected, NodeExpected}; match tok { - Token::Open(qname, span) if stack.len() == MAX_DEPTH => ( - Transition(NodeExpected(stack)), - Err(StateError::MaxDepthExceeded { - open: (qname, span), - max: Depth(MAX_DEPTH), - }), - ), + Token::Open(qname, span) if stack.len() == MAX_DEPTH => Transition( + NodeExpected(stack), + ) + .err(StateError::MaxDepthExceeded { + open: (qname, span), + max: Depth(MAX_DEPTH), + }), Token::Open(qname, span) => { let depth = stack.len(); stack.push((qname, span)); // Delegate to the attribute parser until it is complete. - ( - Transition(AttrExpected(stack, SA::default())), - Ok(Obj(Object::Open(qname, span, Depth(depth)))), - ) + Transition(AttrExpected(stack, SA::default())).with(Open( + qname, + span, + Depth(depth), + )) } Token::Close(close_oqname, close_span) => { match (close_oqname, stack.pop()) { - (_, None) => ( - Transition(NodeExpected(stack)), - Err(StateError::ExtraClosingTag( - close_oqname, - close_span, - )), + (_, None) => Transition(NodeExpected(stack)).err( + StateError::ExtraClosingTag(close_oqname, close_span), ), (Some(qname), Some((open_qname, open_span))) if qname != open_qname => { - ( - Transition(NodeExpected(stack)), - Err(StateError::UnbalancedTag { + Transition(NodeExpected(stack)).err( + StateError::UnbalancedTag { open: (open_qname, open_span), close: (qname, close_span), - }), + }, ) } (..) => { let depth = stack.len(); - ( - Transition(NodeExpected(stack)), - Ok(Obj(Object::Close( - close_oqname, - close_span, - Depth(depth), - ))), - ) + + Transition(NodeExpected(stack)).with(Close( + close_oqname, + close_span, + Depth(depth), + )) } } } - Token::Comment(sym, span) => ( - Transition(NodeExpected(stack)), - Ok(Obj(Object::Comment(sym, span))), - ), - Token::Text(sym, span) => ( - Transition(NodeExpected(stack)), - Ok(Obj(Object::Text(sym, span))), - ), - Token::CData(sym, span) => ( - Transition(NodeExpected(stack)), - Ok(Obj(Object::CData(sym, span))), - ), - Token::Whitespace(ws, span) => ( - Transition(NodeExpected(stack)), - Ok(Obj(Object::Whitespace(ws, span))), - ), + Token::Comment(sym, span) => { + Transition(NodeExpected(stack)).with(Comment(sym, span)) + } + Token::Text(sym, span) => { + Transition(NodeExpected(stack)).with(Text(sym, span)) + } + Token::CData(sym, span) => { + Transition(NodeExpected(stack)).with(CData(sym, span)) + } + Token::Whitespace(ws, span) => { + Transition(NodeExpected(stack)).with(Whitespace(ws, span)) + } // We should transition to `State::Attr` before encountering any // of these tokens. diff --git a/tamer/src/xir/parse.rs b/tamer/src/xir/parse.rs index c819f90b..3e7cf6e4 100644 --- a/tamer/src/xir/parse.rs +++ b/tamer/src/xir/parse.rs @@ -22,6 +22,7 @@ use super::{Token, TokenStream}; use crate::span::Span; use std::fmt::Debug; +use std::mem::take; use std::{error::Error, fmt::Display}; /// Result of applying a [`Token`] to a [`ParseState`], @@ -69,12 +70,22 @@ pub trait ParseState: Default + PartialEq + Eq + Debug { /// Parse a single [`Token`] and optionally perform a state transition. /// - /// The current state is represented by `self`, - /// which is mutable to allow for a state transition. - /// The result of a parsing operation is either an object or an - /// indication that additional tokens of input are needed; - /// see [`Parsed`] for more information. - fn parse_token(&mut self, tok: Token) -> ParseStateResult; + /// The current state is represented by `self`. + /// The result of a parsing operation is a state transition with + /// associated [`ParseStatus`] data. + /// + /// Note that `self` is owned, + /// for a couple primary reasons: + /// + /// 1. This forces the parser to explicitly consider and document all + /// state transitions, + /// rather than potentially missing unintended behavior through + /// implicit behavior; and + /// 2. It allows for more natural functional composition of state, + /// which in turn makes it easier to compose parsers + /// (which conceptually involves stitching together state + /// machines). + fn parse_token(self, tok: Token) -> TransitionResult; /// Whether the current state represents an accepting state. /// @@ -95,9 +106,65 @@ pub trait ParseState: Default + PartialEq + Eq + Debug { } /// Result of applying a [`Token`] to a [`ParseState`]. +/// +/// This is used by [`ParseState::parse_token`]; +/// see that function for rationale. pub type ParseStateResult = Result::Object>, ::Error>; +/// Denotes a state transition. +/// +/// This newtype was created to produce clear, self-documenting code; +/// parsers can get confusing to read with all of the types involved, +/// so this provides a mental synchronization point. +/// +/// This also provides some convenience methods to help remote boilerplate +/// and further improve code clarity. +#[derive(Debug, PartialEq, Eq)] +pub struct Transition(pub S); + +impl Transition { + /// A state transition with corresponding data. + /// + /// This allows [`ParseState::parse_token`] to emit a parsed object and + /// corresponds to [`ParseStatus::Object`]. + pub fn with(self, obj: S::Object) -> (Self, ParseStateResult) { + (self, Ok(ParseStatus::Object(obj))) + } + + /// A state transition indicating that more data is needed before an + /// object can be emitted. + /// + /// This corresponds to [`ParseStatus::Incomplete`]. + pub fn incomplete(self) -> (Self, ParseStateResult) { + (self, Ok(ParseStatus::Incomplete)) + } + + /// A dead state transition. + /// + /// This corresponds to [`ParseStatus::Dead`], + /// and a calling parser should use the provided [`Token`] as + /// lookahead. + pub fn dead(self, tok: Token) -> (Self, ParseStateResult) { + (self, Ok(ParseStatus::Dead(tok))) + } + + /// A transition with corresponding error. + /// + /// This indicates a parsing failure. + /// The state ought to be suitable for error recovery. + pub fn err>(self, err: E) -> (Self, ParseStateResult) { + (self, Err(err.into())) + } +} + +/// A state transition with associated data. +/// +/// Conceptually, +/// imagine the act of a state transition producing data. +/// See [`Transition`] for convenience methods for producing this tuple. +pub type TransitionResult = (Transition, ParseStateResult); + /// A streaming parser defined by a [`ParseState`] with exclusive /// mutable access to an underlying [`TokenStream`]. /// @@ -167,8 +234,12 @@ impl Iterator for Parser { // reporting in case we encounter an EOF. self.last_span = Some(tok.span()); + let result; + (Transition(self.state), result) = + take(&mut self.state).parse_token(tok); + use ParseStatus::*; - match self.state.parse_token(tok) { + match result { // Nothing handled this dead state, // and we cannot discard a lookahead token, // so we have no choice but to produce an error. @@ -382,19 +453,15 @@ pub mod test { type Object = Token; type Error = EchoStateError; - fn parse_token(&mut self, tok: Token) -> ParseStateResult { + fn parse_token(self, tok: Token) -> TransitionResult { match tok { - Token::Comment(..) => { - *self = Self::Done; - } + Token::Comment(..) => Transition(Self::Done).with(tok), Token::Close(..) => { - return Err(EchoStateError::InnerError(tok)) + Transition(self).err(EchoStateError::InnerError(tok)) } - Token::Text(..) => return Ok(ParseStatus::Dead(tok)), - _ => {} + Token::Text(..) => Transition(self).dead(tok), + _ => Transition(self).with(tok), } - - Ok(ParseStatus::Object(tok)) } fn is_accepting(&self) -> bool { diff --git a/tamer/src/xir/tree.rs b/tamer/src/xir/tree.rs index f24275ae..7feaca27 100644 --- a/tamer/src/xir/tree.rs +++ b/tamer/src/xir/tree.rs @@ -177,15 +177,16 @@ pub mod attr; use self::{ super::parse::{ - ParseError, ParseResult, ParseState, ParseStateResult, ParseStatus, - ParsedResult, + ParseError, ParseResult, ParseState, ParseStatus, ParsedResult, }, attr::{AttrParseError, AttrParseState}, }; -use super::{QName, Token, TokenResultStream, TokenStream}; -use crate::{span::Span, sym::SymbolId}; -use std::{error::Error, fmt::Display, mem::take, result}; +use super::{ + parse::TransitionResult, QName, Token, TokenResultStream, TokenStream, +}; +use crate::{span::Span, sym::SymbolId, xir::parse::Transition}; +use std::{error::Error, fmt::Display, result}; pub use attr::{Attr, AttrList}; @@ -515,65 +516,82 @@ impl ParseState for Stack { type Object = Tree; type Error = StackError; - fn parse_token(&mut self, tok: Token) -> ParseStateResult { + fn parse_token(self, tok: Token) -> TransitionResult { use Stack::*; - match (take(self), tok) { + match (self, tok) { // Open a root element (or lack of context). (Empty, Token::Open(name, span)) => { - Ok(Self::begin_attrs(name, span, None)) + Self::begin_attrs(name, span, None) } // Open a child element. (BuddingElement(pstack), Token::Open(name, span)) => { - Ok(Self::begin_attrs(name, span, Some(pstack.store()))) + Self::begin_attrs(name, span, Some(pstack.store())) } // Open a child element in attribute parsing context. (BuddingAttrList(pstack, attr_list), Token::Open(name, span)) => { - Ok(Self::begin_attrs( + Self::begin_attrs( name, span, Some(pstack.consume_attrs(attr_list).store()), - )) + ) } // Attribute parsing. - (AttrState(estack, attrs, mut sa), tok) => { + (AttrState(estack, attrs, sa), tok) => { use ParseStatus::*; match sa.parse_token(tok) { - Ok(Incomplete) => Ok(AttrState(estack, attrs, sa)), - Ok(Object(attr)) => { - Ok(AttrState(estack, attrs.push(attr), sa)) + (Transition(sa), Ok(Incomplete)) => { + Transition(AttrState(estack, attrs, sa)).incomplete() } - Ok(Dead(lookahead)) => { - *self = BuddingElement(estack.consume_attrs(attrs)); - return self.parse_token(lookahead); + (Transition(sa), Ok(Object(attr))) => { + Transition(AttrState(estack, attrs.push(attr), sa)) + .incomplete() + } + (_, Ok(Dead(lookahead))) => { + BuddingElement(estack.consume_attrs(attrs)) + .parse_token(lookahead) + } + (Transition(sa), Err(x)) => { + Transition(AttrState(estack, attrs, sa)).err(x.into()) } - Err(x) => Err(x.into()), } } (BuddingElement(stack), Token::Close(name, span)) => stack .try_close(name, span) - .map(ElementStack::consume_child_or_complete), + .map(ElementStack::consume_child_or_complete) + .map(|new_stack| match new_stack { + Stack::ClosedElement(ele) => { + Transition(Empty).with(Tree::Element(ele)) + } + _ => Transition(new_stack).incomplete(), + }) + .unwrap_or_else(|err| Transition(Empty).err(err)), (BuddingAttrList(stack, attr_list), Token::Close(name, span)) => { stack .consume_attrs(attr_list) .try_close(name, span) .map(ElementStack::consume_child_or_complete) + .map(|new_stack| match new_stack { + Stack::ClosedElement(ele) => { + Transition(Empty).with(Tree::Element(ele)) + } + _ => Transition(new_stack).incomplete(), + }) + .unwrap_or_else(|err| Transition(Empty).err(err)) } (BuddingElement(mut ele), Token::Text(value, span)) => { ele.element.children.push(Tree::Text(value, span)); - Ok(Self::BuddingElement(ele)) + Transition(BuddingElement(ele)).incomplete() } - (_, tok) if self.is_accepting() => { - return Ok(ParseStatus::Dead(tok)) - } + (st, tok) if st.is_accepting() => Transition(st).dead(tok), (stack, tok) => { todo!( @@ -585,7 +603,6 @@ impl ParseState for Stack { ) } } - .map(|new_stack| self.store_or_emit(new_stack)) } fn is_accepting(&self) -> bool { @@ -598,29 +615,16 @@ impl Stack { name: QName, span: Span, pstack: Option>, - ) -> Self { - Self::AttrState( + ) -> TransitionResult { + Transition(Self::AttrState( ElementStack { element: Element::open(name, span), pstack, }, Default::default(), SA::default(), - ) - } - - /// Emit a completed object or store the current stack for further processing. - fn store_or_emit(&mut self, new_stack: Self) -> ParseStatus { - match new_stack { - Stack::ClosedElement(ele) => { - ParseStatus::Object(Tree::Element(ele)) - } - - _ => { - *self = new_stack; - ParseStatus::Incomplete - } - } + )) + .incomplete() } }