tamer: xir::tree::ParserState: Remove and expose Stack directly

This removes the layer of encapsulation that was hiding Stack, which is the
actual parser.  The new layer of encapsulation is parse::Parser, which will
be introduced here soon.  Baby steps, so it's clear how this evolves.

DEV-11268
main
Mike Gerwitz 2021-12-13 15:00:34 -05:00
parent 24e9b94b37
commit faed32af7e
2 changed files with 54 additions and 113 deletions

View File

@ -22,12 +22,12 @@
//! **This is a work-in-progress implementation.**
//! It will be augmented only as needed.
//!
//! Parsing is handled by [`ParserState::parse_token`].
//! Parsing is handled by [`Stack::parse_token`].
//! An [`Iterator::scan`]-based parser can be constructed using
//! [`parser_from`] or [`parse`][parse()].
//!
//! ```
//! use tamer::xir::tree::{ParserState, parse, parser_from};
//! use tamer::xir::tree::{Stack, parse, parser_from};
//!# use tamer::xir::Token;
//!
//!# let token_stream: std::vec::IntoIter<Token> = vec![].into_iter();
@ -38,7 +38,7 @@
//!# let token_stream: std::vec::IntoIter<Token> = vec![].into_iter();
//! // Consume a single token at a time, yielding either an incomplete state
//! // or the next parsed object.
//! let parser = token_stream.scan(ParserState::new(), parse);
//! let parser = token_stream.scan(Stack::default(), parse);
//! ```
//!
//! `parser_from` Or `parse`?
@ -122,7 +122,7 @@
//! however,
//! perform _[semantic analysis]_ on the token stream.
//! Given that,
//! [`ParserState::parse_token`] returns a [`Result`],
//! [`Stack::parse_token`] returns a [`Result`],
//! with parsing errors represented by this module's [`ParseError`].
//!
//! As an example,
@ -137,8 +137,7 @@
//! Parser Implementation
//! =====================
//! The parser that lowers the XIR [`Token`] stream into a [`Tree`]
//! is implemented on [`ParserState`],
//! which exists to encapsulate the [`Stack`].
//! is implemented on [`Stack`].
//!
//! This parser is a [stack machine],
//! where the stack represents the [`Tree`] that is under construction.
@ -177,7 +176,7 @@
mod attr;
mod parse;
use self::attr::AttrParserState;
use self::{attr::AttrParserState, parse::ParseStateResult};
use super::{QName, Token, TokenResultStream, TokenStream};
use crate::{span::Span, sym::SymbolId, xir::tree::parse::ParseState};
@ -515,6 +514,39 @@ impl Default for Stack {
}
}
impl ParseState for Stack {
type Object = Object;
type Error = ParseError;
fn parse_token(&mut self, tok: Token) -> ParseStateResult<Self> {
let stack = take(self);
match tok {
Token::Open(name, span) => stack.open_element(name, span),
Token::Close(name, span) => stack.close_element(name, span),
Token::AttrName(name, span) => stack.open_attr(name, span),
Token::AttrValue(value, span) => stack.close_attr(value, span),
Token::AttrEnd(_) => stack.end_attrs(),
Token::Text(value, span) => stack.text(value, span),
// This parse is being rewritten, so we'll address this with a
// proper error then.
Token::AttrValueFragment(..) => {
panic!("AttrValueFragment is not parsable")
}
Token::Comment(..) | Token::CData(..) | Token::Whitespace(..) => {
Err(ParseError::Todo(tok, stack))
}
}
.map(|new_stack| self.store_or_emit(new_stack))
}
fn is_accepting(&self) -> bool {
*self == Self::Empty || *self == Self::Done
}
}
impl Stack {
/// Attempt to open a new element.
///
@ -673,97 +705,6 @@ impl Stack {
_ => todo! {},
})
}
}
/// State while parsing a XIR token stream into a tree.
///
/// [`ParserState`] is responsible only for dispatch and bookkeeping;
/// state transitions and stack manipulation are handled by the various
/// methods on [`Stack`].
///
/// This is a stack machine with the interface of a state machine.
/// The stack is encoded into the variants themselves
/// (which Rust will allocate on the stack),
/// which is sensible given that we always know exactly what and how
/// many arguments we need.
/// This gives us both the stack we want and type safety,
/// and has compile-time guarantees to ensure that we cannot produce a
/// stack that is not suitable for the computation at hand.
///
/// Note that this cannot be reasoned about in terms of a pushdown automaton
/// because there is no set language for the stack---it
/// contains arbitrary data holding the state of the current computation,
/// which is (theoretically, but not practically) unbounded by the
/// recursive nature of [`Element`].
///
/// This is very similar to the [XmlWriter](super::writer::XmlWriter),
/// except that a stack is needed to accumulate tokens until we can begin
/// emitting a tree.
#[derive(Debug, Default)]
pub struct ParserState {
stack: Stack,
}
impl ParserState {
/// Create state of a new parser that has not yet seen any input
/// tokens.
///
/// _Consider using [`parser_from`] instead._
///
/// Parsers using this state are suitable only for valid starting
/// contexts,
/// as defined in the [module-level documentation](self).
pub fn new() -> Self {
Self {
stack: Default::default(),
}
}
/// Initialize the state of the parser with the given [`Stack`].
fn with(stack: Stack) -> Self {
Self { stack }
}
/// Consume a single XIR [`Token`] and attempt to parse it within the
/// context of the current [`Stack`].
///
/// Each call to this method represents a [state transition].
/// Invalid state transitions represent either a semantic error
/// (e.g. unbalanced tags)
/// or unimplemented features that will be added as needed.
///
/// This parser is not responsible for validating _syntax_,
/// since valid syntax is already implied by the existence of
/// [`Token`].
/// But it does perform semantic analysis on that token stream.
///
/// All heavy lifting is done by the various methods on [`Stack`].
///
/// See the [module-level documentation](self) for more information on
/// the implementation of the parser.
pub fn parse_token(&mut self, tok: Token) -> Result<ParseStatus> {
let stack = take(&mut self.stack);
match tok {
Token::Open(name, span) => stack.open_element(name, span),
Token::Close(name, span) => stack.close_element(name, span),
Token::AttrName(name, span) => stack.open_attr(name, span),
Token::AttrValue(value, span) => stack.close_attr(value, span),
Token::AttrEnd(_) => stack.end_attrs(),
Token::Text(value, span) => stack.text(value, span),
// This parse is being rewritten, so we'll address this with a
// proper error then.
Token::AttrValueFragment(..) => {
panic!("AttrValueFragment is not parsable")
}
Token::Comment(..) | Token::CData(..) | Token::Whitespace(..) => {
Err(ParseError::Todo(tok, stack))
}
}
.map(|new_stack| self.store_or_emit(new_stack))
}
/// Emit a completed object or store the current stack for further processing.
fn store_or_emit(&mut self, new_stack: Stack) -> ParseStatus {
@ -776,7 +717,7 @@ impl ParserState {
}
Stack::IsolatedAttr(attr) => {
self.stack = Stack::IsolatedAttrEmpty;
*self = Stack::IsolatedAttrEmpty;
ParseStatus::Object(Object::Attr(attr))
}
@ -785,7 +726,7 @@ impl ParserState {
Stack::Done => ParseStatus::Done,
_ => {
self.stack = new_stack;
*self = new_stack;
ParseStatus::Incomplete
}
}
@ -795,7 +736,7 @@ impl ParserState {
/// Result of a XIR tree parsing operation.
pub type Result<T> = std::result::Result<T, ParseError>;
/// Parsing error from [`ParserState`].
/// Parsing error from [`Stack`].
#[derive(Debug, Eq, PartialEq)]
pub enum ParseError {
/// The closing tag does not match the opening tag at the same level of
@ -911,7 +852,7 @@ pub enum Object {
Attr(Attr),
}
/// Wrap [`ParserState::parse_token`] result in [`Some`],
/// Wrap [`Stack::parse_token`] result in [`Some`],
/// suitable for use with [`Iterator::scan`].
///
/// If you do not require a single-step [`Iterator::next`] and simply want
@ -925,15 +866,15 @@ pub enum Object {
/// iterator it scans returns [`None`].
///
/// ```
/// use tamer::xir::tree::{ParserState, parse};
/// use tamer::xir::tree::{Stack, parse};
///# use tamer::xir::Token;
///
///# let token_stream: std::vec::IntoIter<Token> = vec![].into_iter();
/// // The above is equivalent to:
/// let parser = token_stream.scan(ParserState::new(), parse);
/// let parser = token_stream.scan(Stack::default(), parse);
/// ```
pub fn parse(state: &mut ParserState, tok: Token) -> Option<Result<Parsed>> {
Some(ParserState::parse_token(state, tok).map(Into::into))
pub fn parse(state: &mut Stack, tok: Token) -> Option<Result<Parsed>> {
Some(Stack::parse_token(state, tok).map(Into::into))
}
/// Produce a lazy parser from a given [`TokenStream`],
@ -963,7 +904,7 @@ pub fn parse(state: &mut ParserState, tok: Token) -> Option<Result<Parsed>> {
pub fn parser_from(
toks: impl TokenStream,
) -> impl Iterator<Item = Result<Tree>> {
toks.scan(ParserState::new(), parse)
toks.scan(Stack::default(), parse)
.filter_map(|parsed| match parsed {
Ok(Parsed::Object(Object::Tree(tree))) => Some(Ok(tree)),
Ok(Parsed::Incomplete) => None,
@ -997,7 +938,7 @@ pub fn parse_attrs<'a>(
toks: &mut impl TokenStream,
dest: AttrList,
) -> Result<AttrList> {
let mut state = ParserState::with(Stack::BuddingAttrList(None, dest));
let mut state = Stack::BuddingAttrList(None, dest);
loop {
match toks.next().and_then(|tok| parse(&mut state, tok)) {

View File

@ -94,7 +94,7 @@ fn empty_element_self_close_from_toks() {
span: (*S, *S2),
};
let mut sut = toks.scan(ParserState::new(), parse);
let mut sut = toks.scan(Default::default(), parse);
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)));
assert_eq!(
@ -120,7 +120,7 @@ fn empty_element_balanced_close_from_toks() {
span: (*S, *S2),
};
let mut sut = toks.scan(ParserState::new(), parse);
let mut sut = toks.scan(Default::default(), parse);
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)));
assert_eq!(
@ -143,7 +143,7 @@ fn empty_element_unbalanced_close_from_toks() {
]
.into_iter();
let mut sut = toks.scan(ParserState::new(), parse);
let mut sut = toks.scan(Default::default(), parse);
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)));
assert_eq!(
@ -186,7 +186,7 @@ fn empty_element_with_attrs_from_toks() {
span: (*S, *S2),
};
let mut sut = toks.scan(ParserState::new(), parse);
let mut sut = toks.scan(Default::default(), parse);
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Open
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrName
@ -235,7 +235,7 @@ fn child_element_after_attrs() {
span: (*S, *S3),
};
let mut sut = toks.scan(ParserState::new(), parse);
let mut sut = toks.scan(Default::default(), parse);
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Open
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrName