tamer: {xir::=>}parse: Move parser out of XIR

The parsing framework originally created for XIR is now more general and
useful to other things.  We'll see how this evolves.

This needs additional documentation, but I'd like to see how it changes as
I implement XmloReader and then some of the source readers first.

DEV-10863
main
Mike Gerwitz 2022-03-18 16:24:53 -04:00
parent 0360226caa
commit 14638a612f
8 changed files with 72 additions and 37 deletions

View File

@ -77,6 +77,7 @@ pub mod fs;
pub mod iter;
pub mod ld;
pub mod obj;
pub mod parse;
pub mod span;
pub mod sym;
pub mod tpwrap;

View File

@ -19,9 +19,10 @@
//! Errors while processing `xmlo` object files.
use crate::parse::ParseError;
use crate::sym::SymbolId;
use crate::tpwrap::quick_xml::{Error as XmlError, InnerXmlError};
use crate::xir::{parse::ParseError, tree::StackError, Token};
use crate::xir::{tree::StackError, Token};
use std::fmt::Display;
/// Error during `xmlo` processing.

View File

@ -1,4 +1,4 @@
// Basic parsing framework for XIR into XIRT
// Basic streaming parsing framework
//
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
//
@ -17,7 +17,9 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Basic streaming parsing framework for XIR lowering operations.
//! Basic streaming parser framework for lowering operations.
//!
//! _TODO: Some proper docs and examples!_
use crate::span::Span;
use std::fmt::Debug;
@ -59,6 +61,12 @@ impl<T: Token> From<T> for Span {
/// not read into memory.
pub trait TokenStream<T: Token> = Iterator<Item = T>;
/// A [`Token`] stream that may encounter errors during parsing.
///
/// If the stream cannot fail,
/// consider using [`TokenStream`].
pub trait TokenResultStream<T: Token, E: Error> = Iterator<Item = Result<T, E>>;
/// A deterministic parsing automaton.
///
/// These states are utilized by a [`Parser`].
@ -473,10 +481,31 @@ impl<T: Token, O> From<ParseStatus<T, O>> for Parsed<O> {
pub mod test {
use std::{assert_matches::assert_matches, iter::once};
use super::super::Token as XirToken;
use super::*;
use crate::{span::DUMMY_SPAN as DS, sym::GlobalSymbolIntern};
#[derive(Debug, PartialEq, Eq, Clone)]
enum TestToken {
Close(Span),
Comment(Span),
Text(Span),
}
impl Display for TestToken {
fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
unimplemented!("fmt::Display")
}
}
impl Token for TestToken {
fn span(&self) -> Span {
use TestToken::*;
match self {
Close(span) | Comment(span) | Text(span) => *span,
}
}
}
#[derive(Debug, PartialEq, Eq)]
enum EchoState {
Empty,
@ -490,18 +519,17 @@ pub mod test {
}
impl ParseState for EchoState {
type Token = XirToken;
type Object = XirToken;
type Token = TestToken;
type Object = TestToken;
type Error = EchoStateError;
fn parse_token(self, tok: XirToken) -> TransitionResult<Self> {
fn parse_token(self, tok: TestToken) -> TransitionResult<Self> {
match tok {
XirToken::Comment(..) => Transition(Self::Done).with(tok),
XirToken::Close(..) => {
TestToken::Comment(..) => Transition(Self::Done).with(tok),
TestToken::Close(..) => {
Transition(self).err(EchoStateError::InnerError(tok))
}
XirToken::Text(..) => Transition(self).dead(tok),
_ => Transition(self).with(tok),
TestToken::Text(..) => Transition(self).dead(tok),
}
}
@ -512,7 +540,7 @@ pub mod test {
#[derive(Debug, PartialEq, Eq)]
enum EchoStateError {
InnerError(XirToken),
InnerError(TestToken),
}
impl Display for EchoStateError {
@ -532,7 +560,7 @@ pub mod test {
#[test]
fn successful_parse_in_accepting_state_with_spans() {
// EchoState is placed into a Done state given Comment.
let tok = XirToken::Comment("foo".into(), DS);
let tok = TestToken::Comment(DS);
let mut toks = once(tok.clone());
let mut sut = Sut::from(&mut toks);
@ -553,7 +581,7 @@ pub mod test {
#[test]
fn fails_on_end_of_stream_when_not_in_accepting_state() {
let span = Span::new(10, 20, "ctx".intern());
let mut toks = [XirToken::Close(None, span)].into_iter();
let mut toks = [TestToken::Close(span)].into_iter();
let mut sut = Sut::from(&mut toks);
@ -573,8 +601,8 @@ pub mod test {
#[test]
fn returns_state_specific_error() {
// XirToken::Close causes EchoState to produce an error.
let errtok = XirToken::Close(None, DS);
// TestToken::Close causes EchoState to produce an error.
let errtok = TestToken::Close(DS);
let mut toks = [errtok.clone()].into_iter();
let mut sut = Sut::from(&mut toks);
@ -599,10 +627,10 @@ pub mod test {
// Set up so that we have a single token that we can use for
// recovery as part of the same iterator.
let recovery = XirToken::Comment("recov".into(), DS);
let recovery = TestToken::Comment(DS);
let mut toks = [
// Used purely to populate a Span.
XirToken::Close(None, span),
TestToken::Close(span),
// Recovery token here:
recovery.clone(),
]
@ -640,7 +668,7 @@ pub mod test {
#[test]
fn unhandled_dead_state_results_in_error() {
// A Text will cause our parser to return Dead.
let tok = XirToken::Text("dead".into(), DS);
let tok = TestToken::Text(DS);
let mut toks = once(tok.clone());
let mut sut = Sut::from(&mut toks);

View File

@ -53,6 +53,7 @@
//! To parse an entire XML document,
//! see [`reader`].
use crate::parse;
use crate::span::Span;
use crate::sym::{
st_as_sym, CIdentStaticSymbolId, GlobalSymbolIntern,
@ -72,7 +73,6 @@ pub use escape::{DefaultEscaper, Escaper};
pub mod attr;
pub mod flat;
pub mod iter;
pub mod parse;
pub mod pred;
pub mod reader;
pub mod tree;

View File

@ -20,11 +20,9 @@
//! Parse XIR attribute [`TokenStream`][super::super::TokenStream]s.
use crate::{
parse::{ParseState, Transition, TransitionResult},
span::Span,
xir::{
parse::{ParseState, Transition, TransitionResult},
QName, Token as XirToken,
},
xir::{QName, Token as XirToken},
};
use std::{error::Error, fmt::Display};
@ -123,8 +121,8 @@ mod test {
use super::*;
use crate::{
convert::ExpectInto,
parse::{ParseStatus, Parsed},
sym::GlobalSymbolIntern,
xir::parse::{ParseStatus, Parsed},
};
const S: Span = crate::span::DUMMY_SPAN;

View File

@ -40,10 +40,15 @@
use super::{
attr::{Attr, AttrParseError, AttrParseState},
parse::{ParseState, ParseStatus, ParsedResult, TransitionResult},
QName, Token, Token as XirToken, TokenStream, Whitespace,
};
use crate::{span::Span, sym::SymbolId, xir::parse::Transition};
use crate::{
parse::{
ParseState, ParseStatus, ParsedResult, Transition, TransitionResult,
},
span::Span,
sym::SymbolId,
};
use arrayvec::ArrayVec;
use std::{error::Error, fmt::Display};

View File

@ -22,12 +22,11 @@
//! These tests take place within the context of the XIR parsing framework,
//! so they are one layer of abstraction away from unit tests.
use super::super::parse::ParseError;
use super::*;
use crate::convert::ExpectInto;
use crate::parse::{ParseError, Parsed};
use crate::span::DUMMY_SPAN;
use crate::sym::GlobalSymbolIntern;
use crate::xir::parse::Parsed;
const S: Span = DUMMY_SPAN;
const S2: Span = S.offset_add(1).unwrap();

View File

@ -175,17 +175,20 @@
use super::{
attr::{Attr, AttrList, AttrParseError, AttrParseState},
parse::{
ParseError, ParseResult, ParseState, ParseStatus, ParsedResult,
TransitionResult,
},
QName, Token, Token as XirToken, TokenResultStream, TokenStream,
QName, Token, Token as XirToken, TokenStream,
};
use crate::{span::Span, sym::SymbolId, xir::parse::Transition};
use crate::{
parse::{
ParseError, ParseResult, ParseState, ParseStatus, ParsedResult,
Transition, TransitionResult,
},
span::Span,
sym::SymbolId,
};
use std::{error::Error, fmt::Display, result};
type Parsed = super::parse::Parsed<Tree>;
type Parsed = crate::parse::Parsed<Tree>;
/// A XIR tree (XIRT).
///
@ -772,7 +775,7 @@ pub fn attr_parser_from<'a>(
toks: impl TokenStream,
) -> impl Iterator<Item = result::Result<Attr, ParseError<XirToken, StackError>>>
{
use super::parse::Parsed;
use crate::parse::Parsed;
AttrParseState::parse(toks).filter_map(|parsed| match parsed {
Ok(Parsed::Object(attr)) => Some(Ok(attr)),