tamer: xir: Complete parse type migration

A previous commit moved the parser.  This updates the types so that they can
actually be utilized in that context.

DEV-10863
main
Mike Gerwitz 2022-03-21 13:40:54 -04:00
parent 14638a612f
commit ceb00c4df5
6 changed files with 138 additions and 83 deletions

View File

@ -38,6 +38,8 @@ impl PartialEq for Error {
}
}
impl Eq for Error {}
impl From<InnerXmlError> for Error {
fn from(e: InnerXmlError) -> Self {
Self(e)

View File

@ -273,7 +273,7 @@ impl Display for LocalPart {
///
/// Whitespace here is expected to consist of `[ \n\t\r]`
/// (where the first character in that class is a space).
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Whitespace(SymbolId);
impl Deref for Whitespace {

View File

@ -26,7 +26,7 @@
mod parse;
use super::QName;
use crate::{span::Span, sym::SymbolId};
use crate::{parse::Token, span::Span, sym::SymbolId};
use std::fmt::Display;
pub use parse::{AttrParseError, AttrParseState};
@ -64,6 +64,15 @@ impl Attr {
}
}
impl Token for Attr {
fn span(&self) -> Span {
// TODO: This may or may not actually represent the span relative to
// a given parser,
// so we may want to accept a context to bias toward.
self.span.1
}
}
impl Display for Attr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "`@{}=\"{}\"` at {}", self.name, self.value, self.span.0)

View File

@ -85,10 +85,10 @@ impl Default for AttrParseState {
/// Attribute parsing error.
#[derive(Debug, PartialEq, Eq)]
pub enum AttrParseError {
/// [`Token::AttrName`] was expected.
/// [`XirToken::AttrName`] was expected.
AttrNameExpected(XirToken),
/// [`Token::AttrValue`] was expected.
/// [`XirToken::AttrValue`] was expected.
AttrValueExpected(QName, Span, XirToken),
}

View File

@ -40,11 +40,12 @@
use super::{
attr::{Attr, AttrParseError, AttrParseState},
QName, Token, Token as XirToken, TokenStream, Whitespace,
QName, Token as XirToken, TokenStream, Whitespace,
};
use crate::{
parse::{
ParseState, ParseStatus, ParsedResult, Transition, TransitionResult,
ParseState, ParseStatus, ParsedResult, Token, Transition,
TransitionResult,
},
span::Span,
sym::SymbolId,
@ -113,6 +114,47 @@ pub enum Object {
Whitespace(Whitespace, Span),
}
impl Token for Object {
fn span(&self) -> Span {
use Object::*;
match self {
Open(_, span, _)
| Close(_, span, _)
| Comment(_, span)
| Text(_, span)
| CData(_, span)
| Whitespace(_, span) => *span,
Attr(attr) => attr.span(),
}
}
}
impl Display for Object {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use Object::*;
match self {
Open(qname, span, _) => {
Display::fmt(&XirToken::Open(*qname, *span), f)
}
Close(oqname, span, _) => {
Display::fmt(&XirToken::Close(*oqname, *span), f)
}
Attr(attr) => Display::fmt(&attr, f),
Comment(sym, span) => {
Display::fmt(&XirToken::Comment(*sym, *span), f)
}
Text(sym, span) => Display::fmt(&XirToken::Text(*sym, *span), f),
CData(sym, span) => Display::fmt(&XirToken::CData(*sym, *span), f),
Whitespace(ws, span) => {
Display::fmt(&XirToken::Whitespace(*ws, *span), f)
}
}
}
}
/// XIRF-compatible attribute parser.
pub trait FlatAttrParseState = ParseState<Token = XirToken, Object = Attr>
where
@ -155,17 +197,17 @@ where
type Object = Object;
type Error = StateError;
fn parse_token(self, tok: Token) -> TransitionResult<Self> {
fn parse_token(self, tok: Self::Token) -> TransitionResult<Self> {
use ParseStatus::{Dead, Incomplete, Object as Obj};
use State::{AttrExpected, Done, NodeExpected, PreRoot};
match (self, tok) {
// Comments are permitted before and after the first root element.
(st @ (PreRoot | Done), Token::Comment(sym, span)) => {
(st @ (PreRoot | Done), XirToken::Comment(sym, span)) => {
Transition(st).with(Object::Comment(sym, span))
}
(PreRoot, tok @ Token::Open(..)) => {
(PreRoot, tok @ XirToken::Open(..)) => {
Self::parse_node(Default::default(), tok)
}
@ -213,21 +255,22 @@ where
/// Parse a token while in a state expecting a node.
fn parse_node(
mut stack: ElementStack<MAX_DEPTH>,
tok: Token,
tok: <Self as ParseState>::Token,
) -> TransitionResult<Self> {
use Object::*;
use State::{AttrExpected, Done, NodeExpected};
match tok {
Token::Open(qname, span) if stack.len() == MAX_DEPTH => Transition(
NodeExpected(stack),
)
.err(StateError::MaxDepthExceeded {
open: (qname, span),
max: Depth(MAX_DEPTH),
}),
XirToken::Open(qname, span) if stack.len() == MAX_DEPTH => {
Transition(NodeExpected(stack)).err(
StateError::MaxDepthExceeded {
open: (qname, span),
max: Depth(MAX_DEPTH),
},
)
}
Token::Open(qname, span) => {
XirToken::Open(qname, span) => {
let depth = stack.len();
stack.push((qname, span));
@ -239,7 +282,7 @@ where
))
}
Token::Close(close_oqname, close_span) => {
XirToken::Close(close_oqname, close_span) => {
match (close_oqname, stack.pop()) {
(_, None) => unreachable!("parser should be in Done state"),
@ -273,24 +316,24 @@ where
}
}
Token::Comment(sym, span) => {
XirToken::Comment(sym, span) => {
Transition(NodeExpected(stack)).with(Comment(sym, span))
}
Token::Text(sym, span) => {
XirToken::Text(sym, span) => {
Transition(NodeExpected(stack)).with(Text(sym, span))
}
Token::CData(sym, span) => {
XirToken::CData(sym, span) => {
Transition(NodeExpected(stack)).with(CData(sym, span))
}
Token::Whitespace(ws, span) => {
XirToken::Whitespace(ws, span) => {
Transition(NodeExpected(stack)).with(Whitespace(ws, span))
}
// We should transition to `State::Attr` before encountering any
// of these tokens.
Token::AttrName(..)
| Token::AttrValue(..)
| Token::AttrValueFragment(..) => {
XirToken::AttrName(..)
| XirToken::AttrValue(..)
| XirToken::AttrValueFragment(..) => {
unreachable!("attribute token in NodeExpected state: {tok:?}")
}
}
@ -309,7 +352,7 @@ pub fn parse<const MAX_DEPTH: usize>(
#[derive(Debug, Eq, PartialEq)]
pub enum StateError {
/// Opening root element tag was expected.
RootOpenExpected(Token),
RootOpenExpected(XirToken),
/// Opening tag exceeds the maximum nesting depth for this parser.
MaxDepthExceeded { open: (QName, Span), max: Depth },

View File

@ -37,7 +37,7 @@ const S4: Span = S3.offset_add(1).unwrap();
fn empty_element_self_close() {
let name = ("ns", "elem").unwrap_into();
let toks = [Token::Open(name, S), Token::Close(None, S2)].into_iter();
let toks = [XirToken::Open(name, S), XirToken::Close(None, S2)].into_iter();
let sut = parse::<1>(toks);
@ -56,7 +56,8 @@ fn empty_element_self_close() {
fn empty_element_balanced_close() {
let name = ("ns", "openclose").unwrap_into();
let toks = [Token::Open(name, S), Token::Close(Some(name), S2)].into_iter();
let toks =
[XirToken::Open(name, S), XirToken::Close(Some(name), S2)].into_iter();
let sut = parse::<1>(toks);
@ -78,16 +79,16 @@ fn extra_closing_tag() {
let name = ("ns", "openclose").unwrap_into();
let toks = [
// We need an opening tag to actually begin document parsing.
Token::Open(name, S),
Token::Close(Some(name), S2),
Token::Close(Some(name), S3),
XirToken::Open(name, S),
XirToken::Close(Some(name), S2),
XirToken::Close(Some(name), S3),
]
.into_iter();
let sut = parse::<1>(toks);
assert_eq!(
Err(ParseError::UnexpectedToken(Token::Close(Some(name), S3),)),
Err(ParseError::UnexpectedToken(XirToken::Close(Some(name), S3),)),
sut.collect::<Result<Vec<Parsed<Object>>, _>>()
);
}
@ -100,16 +101,16 @@ fn extra_self_closing_tag() {
let name = ("ns", "openclose").unwrap_into();
let toks = [
// We need an opening tag to actually begin document parsing.
Token::Open(name, S),
Token::Close(None, S2),
Token::Close(None, S3),
XirToken::Open(name, S),
XirToken::Close(None, S2),
XirToken::Close(None, S3),
]
.into_iter();
let sut = parse::<1>(toks);
assert_eq!(
Err(ParseError::UnexpectedToken(Token::Close(None, S3),)),
Err(ParseError::UnexpectedToken(XirToken::Close(None, S3),)),
sut.collect::<Result<Vec<Parsed<Object>>, _>>()
);
}
@ -122,8 +123,8 @@ fn empty_element_unbalanced_close() {
let close_name = "unbalanced_name".unwrap_into();
let toks = [
Token::Open(open_name, S),
Token::Close(Some(close_name), S2),
XirToken::Open(open_name, S),
XirToken::Close(Some(close_name), S2),
]
.into_iter();
@ -149,10 +150,10 @@ fn single_empty_child() {
let child = "child".unwrap_into();
let toks = [
Token::Open(name, S),
Token::Open(child, S2),
Token::Close(None, S3),
Token::Close(Some(name), S4),
XirToken::Open(name, S),
XirToken::Open(child, S2),
XirToken::Close(None, S3),
XirToken::Close(Some(name), S4),
]
.into_iter();
@ -175,9 +176,9 @@ fn depth_exceeded() {
let exceed = "exceed".unwrap_into();
let toks = [
Token::Open(name, S),
XirToken::Open(name, S),
// This one exceeds the max depth, ...
Token::Open(exceed, S2),
XirToken::Open(exceed, S2),
]
.into_iter();
@ -206,12 +207,12 @@ fn empty_element_with_attrs() {
let val2 = "val2".intern();
let toks = [
Token::Open(name, S),
Token::AttrName(attr1, S2),
Token::AttrValue(val1, S3),
Token::AttrName(attr2, S3),
Token::AttrValue(val2, S4),
Token::Close(None, S4),
XirToken::Open(name, S),
XirToken::AttrName(attr1, S2),
XirToken::AttrValue(val1, S3),
XirToken::AttrName(attr2, S3),
XirToken::AttrValue(val2, S4),
XirToken::Close(None, S4),
]
.into_iter();
@ -238,12 +239,12 @@ fn child_element_after_attrs() {
let val = "val".intern();
let toks = [
Token::Open(name, S),
Token::AttrName(attr, S),
Token::AttrValue(val, S2),
Token::Open(child, S),
Token::Close(None, S2),
Token::Close(Some(name), S3),
XirToken::Open(name, S),
XirToken::AttrName(attr, S),
XirToken::AttrValue(val, S2),
XirToken::Open(child, S),
XirToken::Close(None, S2),
XirToken::Close(Some(name), S3),
]
.into_iter();
@ -269,12 +270,12 @@ fn element_with_empty_sibling_children() {
let childb = "childb".unwrap_into();
let toks = [
Token::Open(parent, S),
Token::Open(childa, S2),
Token::Close(None, S3),
Token::Open(childb, S2),
Token::Close(None, S3),
Token::Close(Some(parent), S2),
XirToken::Open(parent, S),
XirToken::Open(childa, S2),
XirToken::Close(None, S3),
XirToken::Open(childb, S2),
XirToken::Close(None, S3),
XirToken::Close(Some(parent), S2),
]
.into_iter();
@ -302,12 +303,12 @@ fn element_with_child_with_attributes() {
let value = "attr value".intern();
let toks = [
Token::Open(parent, S),
Token::Open(child, S),
Token::AttrName(attr, S),
Token::AttrValue(value, S2),
Token::Close(None, S3),
Token::Close(Some(parent), S3),
XirToken::Open(parent, S),
XirToken::Open(child, S),
XirToken::AttrName(attr, S),
XirToken::AttrValue(value, S2),
XirToken::Close(None, S3),
XirToken::Close(Some(parent), S3),
]
.into_iter();
@ -332,9 +333,9 @@ fn element_with_text() {
let text = "inner text".into();
let toks = [
Token::Open(parent, S),
Token::Text(text, S2),
Token::Close(Some(parent), S3),
XirToken::Open(parent, S),
XirToken::Text(text, S2),
XirToken::Close(Some(parent), S3),
]
.into_iter();
@ -353,7 +354,7 @@ fn element_with_text() {
#[test]
fn not_accepting_state_if_element_open() {
let name = "unclosed".unwrap_into();
let toks = [Token::Open(name, S)].into_iter();
let toks = [XirToken::Open(name, S)].into_iter();
let mut sut = parse::<1>(toks);
@ -374,10 +375,10 @@ fn comment_before_or_after_root_ok() {
let cend = "end comment".intern();
let toks = [
Token::Comment(cstart, S),
Token::Open(name, S2),
Token::Close(None, S3),
Token::Comment(cend, S4),
XirToken::Comment(cstart, S),
XirToken::Open(name, S2),
XirToken::Close(None, S3),
XirToken::Comment(cend, S4),
]
.into_iter();
@ -407,10 +408,10 @@ fn content_after_root_close_error() {
let name = "root".unwrap_into();
let toks = [
Token::Open(name, S),
Token::Close(None, S2),
XirToken::Open(name, S),
XirToken::Close(None, S2),
// Document ends here
Token::Open(name, S3),
XirToken::Open(name, S3),
]
.into_iter();
@ -418,7 +419,7 @@ fn content_after_root_close_error() {
assert_eq!(
Result::<Vec<Parsed<Object>>, _>::Err(ParseError::UnexpectedToken(
Token::Open(name, S3)
XirToken::Open(name, S3)
)),
sut.collect()
);
@ -429,13 +430,13 @@ fn content_after_root_close_error() {
fn content_before_root_open_error() {
let text = "foo".intern();
let toks = [Token::Text(text, S)].into_iter();
let toks = [XirToken::Text(text, S)].into_iter();
let sut = parse::<1>(toks);
assert_eq!(
Result::<Vec<Parsed<Object>>, _>::Err(ParseError::StateError(
StateError::RootOpenExpected(Token::Text(text, S))
StateError::RootOpenExpected(XirToken::Text(text, S))
)),
sut.collect()
);