2022-03-17 12:20:20 -04:00
|
|
|
// XIR flat (XIRF)
|
|
|
|
//
|
|
|
|
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
|
|
|
|
//
|
|
|
|
// This file is part of TAME.
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
//! Lightly-parsed XIR as a flat stream (XIRF).
|
|
|
|
//!
|
|
|
|
//! XIRF lightly parses a raw XIR [`TokenStream`] into a stream of
|
|
|
|
//! [`Object`]s that are,
|
|
|
|
//! like a [`TokenStream`],
|
|
|
|
//! flat in structure.
|
|
|
|
//! It provides the following features over raw XIR:
|
|
|
|
//!
|
|
|
|
//! 1. All closing tags must correspond to a matching opening tag at the
|
|
|
|
//! same depth;
|
|
|
|
//! 2. [`Object`] exposes the [`Depth`] of each opening/closing tag;
|
|
|
|
//! 3. Attribute tokens are parsed into [`Attr`] objects; and
|
|
|
|
//! 4. Parsing will fail if input ends before all elements have been
|
|
|
|
//! closed.
|
|
|
|
//!
|
|
|
|
//! XIRF lowering does not perform any dynamic memory allocation;
|
|
|
|
//! maximum element nesting depth is set statically depending on the needs
|
|
|
|
//! of the caller.
|
|
|
|
|
|
|
|
use super::{
|
2022-03-17 16:10:56 -04:00
|
|
|
attr::{Attr, AttrParseError, AttrParseState},
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
parse::{
|
|
|
|
ParseState, ParseStateResult, ParseStatus, ParsedResult,
|
|
|
|
TransitionResult,
|
|
|
|
},
|
2022-03-17 12:20:20 -04:00
|
|
|
QName, Token, TokenStream, Whitespace,
|
|
|
|
};
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
use crate::{span::Span, sym::SymbolId, xir::parse::Transition};
|
2022-03-17 12:20:20 -04:00
|
|
|
use arrayvec::ArrayVec;
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
use std::{error::Error, fmt::Display};
|
2022-03-17 12:20:20 -04:00
|
|
|
|
|
|
|
/// Tag nesting depth
|
|
|
|
/// (`0` represents the root).
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
|
|
pub struct Depth(usize);
|
|
|
|
|
|
|
|
impl Display for Depth {
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
Display::fmt(&self.0, f)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// A lightly-parsed XIRF object.
|
|
|
|
///
|
|
|
|
/// Certain XIR [`Token`]s are formed into a single object,
|
|
|
|
/// such as an [`Attr`].
|
|
|
|
/// Other objects retain the same format as their underlying token,
|
|
|
|
/// but are still validated to ensure that they are well-formed and that
|
|
|
|
/// the XML is well-structured.
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
|
|
pub enum Object {
|
|
|
|
/// Opening tag of an element.
|
|
|
|
Open(QName, Span, Depth),
|
|
|
|
|
|
|
|
/// Closing tag of an element.
|
|
|
|
///
|
|
|
|
/// If the name is [`None`],
|
|
|
|
/// then the tag is self-closing.
|
|
|
|
/// If the name is [`Some`],
|
|
|
|
/// then the tag is guaranteed to be balanced
|
|
|
|
/// (matching the depth of its opening tag).
|
|
|
|
Close(Option<QName>, Span, Depth),
|
|
|
|
|
|
|
|
/// An attribute and its value.
|
|
|
|
///
|
|
|
|
/// The associated [`Span`]s can be found on the enclosed [`Attr`]
|
|
|
|
/// object.
|
|
|
|
Attr(Attr),
|
|
|
|
|
|
|
|
/// Comment node.
|
|
|
|
Comment(SymbolId, Span),
|
|
|
|
|
|
|
|
/// Character data as part of an element.
|
|
|
|
///
|
|
|
|
/// See also [`CData`](Object::CData) variant.
|
|
|
|
Text(SymbolId, Span),
|
|
|
|
|
|
|
|
/// CData node (`<![CDATA[...]]>`).
|
|
|
|
///
|
|
|
|
/// _Warning: It is up to the caller to ensure that the string `]]>` is
|
|
|
|
/// not present in the text!_
|
|
|
|
/// This is intended for reading existing XML data where CData is
|
|
|
|
/// already present,
|
|
|
|
/// not for producing new CData safely!
|
|
|
|
CData(SymbolId, Span),
|
|
|
|
|
|
|
|
/// Similar to `Text`,
|
|
|
|
/// but intended for use where only whitespace is allowed,
|
|
|
|
/// such as alignment of attributes.
|
|
|
|
Whitespace(Whitespace, Span),
|
|
|
|
}
|
|
|
|
|
|
|
|
/// XIRF-compatible attribute parser.
|
|
|
|
pub trait FlatAttrParseState = ParseState<Object = Attr>
|
|
|
|
where
|
|
|
|
<Self as ParseState>::Error: Into<StateError>;
|
|
|
|
|
|
|
|
/// Stack of element [`QName`] and [`Span`] pairs,
|
|
|
|
/// representing the current level of nesting.
|
|
|
|
///
|
|
|
|
/// This storage is statically allocated,
|
|
|
|
/// allowing XIRF's parser to avoid memory allocation entirely.
|
|
|
|
type ElementStack<const MAX_DEPTH: usize> = ArrayVec<(QName, Span), MAX_DEPTH>;
|
|
|
|
|
|
|
|
/// XIRF parser state.
|
|
|
|
///
|
|
|
|
/// This parser is a pushdown automaton.
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
|
|
pub enum State<const MAX_DEPTH: usize, SA = AttrParseState>
|
|
|
|
where
|
|
|
|
SA: FlatAttrParseState,
|
|
|
|
{
|
|
|
|
// TODO: Ensure that non-comment nodes are not encountered before the
|
|
|
|
// root,
|
|
|
|
// and that we do not encounter any non-comment nodes after the
|
|
|
|
// root.
|
|
|
|
/// Parsing nodes.
|
|
|
|
NodeExpected(ElementStack<MAX_DEPTH>),
|
|
|
|
|
|
|
|
/// Delegating to attribute parser.
|
|
|
|
AttrExpected(ElementStack<MAX_DEPTH>, SA),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<const MD: usize, SA: FlatAttrParseState> Default for State<MD, SA> {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::NodeExpected(Default::default())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<const MAX_DEPTH: usize, SA> ParseState for State<MAX_DEPTH, SA>
|
|
|
|
where
|
|
|
|
SA: FlatAttrParseState,
|
|
|
|
{
|
|
|
|
type Object = Object;
|
|
|
|
type Error = StateError;
|
|
|
|
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
fn parse_token(self, tok: Token) -> TransitionResult<Self> {
|
2022-03-17 12:20:20 -04:00
|
|
|
use ParseStatus::{Dead, Incomplete, Object as Obj};
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
use State::{AttrExpected, NodeExpected};
|
2022-03-17 12:20:20 -04:00
|
|
|
|
|
|
|
// This awkward-looking take-reassign forces us to be explicit
|
|
|
|
// about state transitions in every case,
|
|
|
|
// ensuring that we always have documented proof of what state
|
|
|
|
// the system winds up in.
|
|
|
|
// The `Invalid` state prevents using `return`.
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
match (self, tok) {
|
2022-03-17 12:20:20 -04:00
|
|
|
(NodeExpected(stack), tok) => Self::parse_node(stack, tok),
|
|
|
|
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
(AttrExpected(stack, sa), tok) => match sa.parse_token(tok) {
|
|
|
|
(Transition(sa), Ok(Incomplete)) => {
|
|
|
|
Transition(AttrExpected(stack, sa)).incomplete()
|
|
|
|
}
|
|
|
|
(Transition(sa), Ok(Obj(attr))) => {
|
|
|
|
Transition(AttrExpected(stack, sa)).with(Object::Attr(attr))
|
|
|
|
}
|
|
|
|
(_, Ok(Dead(lookahead))) => Self::parse_node(stack, lookahead),
|
|
|
|
(Transition(sa), Err(x)) => {
|
|
|
|
Transition(AttrExpected(stack, sa)).err(x)
|
2022-03-17 12:20:20 -04:00
|
|
|
}
|
|
|
|
},
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
}
|
2022-03-17 12:20:20 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Whether all elements have been closed.
|
|
|
|
///
|
|
|
|
/// Parsing will fail if there are any open elements.
|
|
|
|
/// Intuitively,
|
|
|
|
/// this means that the parser must have encountered the closing tag
|
|
|
|
/// for the root element.
|
|
|
|
fn is_accepting(&self) -> bool {
|
|
|
|
// TODO: It'd be nice if we could also return additional context to
|
|
|
|
// aid the user in diagnosing the problem,
|
|
|
|
// e.g. what element(s) still need closing.
|
|
|
|
matches!(self, Self::NodeExpected(stack) if stack.len() == 0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<const MAX_DEPTH: usize, SA> State<MAX_DEPTH, SA>
|
|
|
|
where
|
|
|
|
SA: FlatAttrParseState,
|
|
|
|
{
|
|
|
|
/// Parse a token while in a state expecting a node.
|
|
|
|
fn parse_node(
|
|
|
|
mut stack: ElementStack<MAX_DEPTH>,
|
|
|
|
tok: Token,
|
|
|
|
) -> (Transition<Self>, ParseStateResult<Self>) {
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
use Object::*;
|
2022-03-17 12:20:20 -04:00
|
|
|
use State::{AttrExpected, NodeExpected};
|
|
|
|
|
|
|
|
match tok {
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
Token::Open(qname, span) if stack.len() == MAX_DEPTH => Transition(
|
|
|
|
NodeExpected(stack),
|
|
|
|
)
|
|
|
|
.err(StateError::MaxDepthExceeded {
|
|
|
|
open: (qname, span),
|
|
|
|
max: Depth(MAX_DEPTH),
|
|
|
|
}),
|
2022-03-17 12:20:20 -04:00
|
|
|
|
|
|
|
Token::Open(qname, span) => {
|
|
|
|
let depth = stack.len();
|
|
|
|
stack.push((qname, span));
|
|
|
|
|
|
|
|
// Delegate to the attribute parser until it is complete.
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
Transition(AttrExpected(stack, SA::default())).with(Open(
|
|
|
|
qname,
|
|
|
|
span,
|
|
|
|
Depth(depth),
|
|
|
|
))
|
2022-03-17 12:20:20 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
Token::Close(close_oqname, close_span) => {
|
|
|
|
match (close_oqname, stack.pop()) {
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
(_, None) => Transition(NodeExpected(stack)).err(
|
|
|
|
StateError::ExtraClosingTag(close_oqname, close_span),
|
2022-03-17 12:20:20 -04:00
|
|
|
),
|
|
|
|
|
|
|
|
(Some(qname), Some((open_qname, open_span)))
|
|
|
|
if qname != open_qname =>
|
|
|
|
{
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
Transition(NodeExpected(stack)).err(
|
|
|
|
StateError::UnbalancedTag {
|
2022-03-17 12:20:20 -04:00
|
|
|
open: (open_qname, open_span),
|
|
|
|
close: (qname, close_span),
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
},
|
2022-03-17 12:20:20 -04:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
(..) => {
|
|
|
|
let depth = stack.len();
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
|
|
|
|
Transition(NodeExpected(stack)).with(Close(
|
|
|
|
close_oqname,
|
|
|
|
close_span,
|
|
|
|
Depth(depth),
|
|
|
|
))
|
2022-03-17 12:20:20 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
tamer: xir::parse::Transition: Generalize flat::Transition
XIRF introduced the concept of `Transition` to help document code and
provide mental synchronization points that make it easier to reason about
the system. I decided to hoist this into XIR's parser itself, and have
`parse_token` accept an owned state and require a new state to be returned,
utilizing `Transition`.
Together with the convenience methods introduced on `Transition` itself,
this produces much clearer code, as is evidenced by tree::Stack (XIRT's
parser). Passing an owned state is something that I had wanted to do
originally, but I thought it'd lead to more concise code to use a mutable
reference. Unfortunately, that concision lead to code that was much more
difficult than necessary to understand, and ended up having a net negative
benefit by leading to some more boilerplate for the nested types (granted,
that could have been alleviated in other ways).
This also opens up the possibility to do something that I wasn't able to
before, which was continue to abstract away parser composition by stitching
their state machines together. I don't know if this'll be done immediately,
but because the actual parsing operations are now able to compose
functionally without mutability getting the way, the previous state coupling
issues with the parent parser go away.
DEV-10863
2022-03-17 15:50:35 -04:00
|
|
|
Token::Comment(sym, span) => {
|
|
|
|
Transition(NodeExpected(stack)).with(Comment(sym, span))
|
|
|
|
}
|
|
|
|
Token::Text(sym, span) => {
|
|
|
|
Transition(NodeExpected(stack)).with(Text(sym, span))
|
|
|
|
}
|
|
|
|
Token::CData(sym, span) => {
|
|
|
|
Transition(NodeExpected(stack)).with(CData(sym, span))
|
|
|
|
}
|
|
|
|
Token::Whitespace(ws, span) => {
|
|
|
|
Transition(NodeExpected(stack)).with(Whitespace(ws, span))
|
|
|
|
}
|
2022-03-17 12:20:20 -04:00
|
|
|
|
|
|
|
// We should transition to `State::Attr` before encountering any
|
|
|
|
// of these tokens.
|
|
|
|
Token::AttrName(..)
|
|
|
|
| Token::AttrValue(..)
|
|
|
|
| Token::AttrValueFragment(..) => {
|
|
|
|
unreachable!("attribute token in NodeExpected state: {tok:?}")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Produce a streaming parser lowering a XIR [`TokenStream`] into a XIRF
|
|
|
|
/// stream.
|
|
|
|
pub fn parse<const MAX_DEPTH: usize>(
|
|
|
|
toks: impl TokenStream,
|
|
|
|
) -> impl Iterator<Item = ParsedResult<State<MAX_DEPTH>>> {
|
|
|
|
State::<MAX_DEPTH>::parse(toks)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Parsing error from [`State`].
|
|
|
|
#[derive(Debug, Eq, PartialEq)]
|
|
|
|
pub enum StateError {
|
|
|
|
/// Opening tag exceeds the maximum nesting depth for this parser.
|
|
|
|
MaxDepthExceeded { open: (QName, Span), max: Depth },
|
|
|
|
|
|
|
|
/// The closing tag does not match the opening tag at the same level of
|
|
|
|
/// nesting.
|
|
|
|
UnbalancedTag {
|
|
|
|
open: (QName, Span),
|
|
|
|
close: (QName, Span),
|
|
|
|
},
|
|
|
|
|
|
|
|
/// Attempt to close a tag with no corresponding opening tag
|
|
|
|
/// (which would result in a negative depth).
|
|
|
|
ExtraClosingTag(Option<QName>, Span),
|
|
|
|
|
|
|
|
/// Error from the attribute parser.
|
|
|
|
AttrError(AttrParseError),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Display for StateError {
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
use StateError::*;
|
|
|
|
|
|
|
|
match self {
|
|
|
|
MaxDepthExceeded {
|
|
|
|
open: (name, span),
|
|
|
|
max,
|
|
|
|
} => {
|
|
|
|
write!(
|
|
|
|
f,
|
|
|
|
"maximum element nesting depth of {max} exceeded \
|
|
|
|
by `{name}` at {span}"
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
UnbalancedTag {
|
|
|
|
open: (open_name, open_span),
|
|
|
|
close: (close_name, close_span),
|
|
|
|
} => {
|
|
|
|
write!(
|
|
|
|
f,
|
|
|
|
"expected closing tag `{open_name}`, \
|
|
|
|
but found `{close_name}` at {close_span} \
|
|
|
|
(opening tag at {open_span})",
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
ExtraClosingTag(Some(name), span) => {
|
|
|
|
write!(f, "closing tag `{name}` at {span} has no opening tag",)
|
|
|
|
}
|
|
|
|
|
|
|
|
// If this occurs, its likely that something generated invalid
|
|
|
|
// XIR;
|
|
|
|
// it should be a parsing error on read and no generator
|
|
|
|
// should ever produce this.
|
|
|
|
ExtraClosingTag(None, span) => {
|
|
|
|
write!(f, "self-closing tag at {span} has no opening tag")
|
|
|
|
}
|
|
|
|
|
|
|
|
AttrError(e) => Display::fmt(e, f),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Error for StateError {
|
|
|
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
|
|
|
todo!()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<AttrParseError> for StateError {
|
|
|
|
fn from(e: AttrParseError) -> Self {
|
|
|
|
Self::AttrError(e)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod test;
|