(
local_sym: &T,
) -> Self {
Self(None, LocalPart(NCName(st_as_sym(local_sym))))
}
}
impl TryFrom<(P, L)> for QName
where
P: TryInto,
L: TryInto,
{
type Error = P::Error;
fn try_from(value: (P, L)) -> Result {
Ok(Self(Some(value.0.try_into()?), value.1.try_into()?))
}
}
impl TryFrom<(Option
, L)> for QName
where
P: TryInto,
L: TryInto,
{
type Error = P::Error;
fn try_from(value: (Option, L)) -> Result {
let ns = match value.0 {
None => None,
Some(ns) => Some(ns.try_into()?),
};
Ok(Self(ns, value.1.try_into()?))
}
}
impl TryFrom<&str> for QName {
type Error = SpanlessError;
fn try_from(value: &str) -> Result {
Ok(QName(None, value.try_into()?))
}
}
impl TryFrom<&[u8]> for QName {
type Error = SpanlessError;
/// Attempt to parse a byte slice into a [`QName`].
///
/// The byte slice must represent a valid QName in UTF-8.
/// If a colon is present,
/// it delimits the namespace [`Prefix`] and [`LocalPart`],
/// and therefore must not be in the first or last byte position.
fn try_from(name: &[u8]) -> Result {
match memchr(b':', name) {
// Leading colon means we're missing a prefix, trailing means
// that we have no local part.
Some(pos) if pos == 0 || pos == name.len() - 1 => {
Err(SpanlessError::InvalidQName(name.intern_utf8()?))
}
// There is _at least_ one colon in the string.
Some(pos) => {
// The prefix is before the first colon,
// and so itself must not contain a colon and is therefore
// a valid NCName.
let prefix = NCName(name[..pos].intern_utf8()?);
// But there could be a _second_ colon,
// so the local part requires validation.
let local = NCName::try_from(&name[(pos + 1)..])?;
Ok(Self::new(Some(prefix.into()), local.into()))
}
// There are no colons in the string, so the entire string is
// both a local part and a valid NCName.
None => Ok(Self::new(None, NCName(name.intern_utf8()?).into())),
}
}
}
impl Display for QName {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
QName(Some(local), suffix) => write!(f, "{}:{}", local, suffix),
QName(None, suffix) => suffix.fmt(f),
}
}
}
/// Lightly-structured XML tokens with associated [`Span`]s.
///
/// This is a streamable IR for XML.
/// A writer requires knowledge only of a previous state,
/// such as whether a node is open,
/// and so this IR can be processed by a simple state machine
/// (see [`writer::WriterState`]).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Token {
/// Opening tag of an element.
Open(QName, Span),
/// Closing tag of an element.
///
/// If the name is [`None`],
/// then the tag is self-closing.
/// This is intended primarily as a safety measure:
/// It allows writers to act as simple state machines without having
/// to ensure balancing by indicating that a node was intended to
/// self-close.
/// Otherwise,
/// we wouldn't know whether to self-close or to close and then
/// create a new closing tag;
/// if we blindly did the former,
/// we risk losing a closing tag when it wasn't intended.
/// Instead of losing tags,
/// writers can error,
/// indicating a bug in the stream.
///
/// The reason for using an option here rather than a variant is to
/// simplify pattern matching,
/// given especially that bindings after `@` in patterns have not
/// yet been stabalized at the time of writing (but are very
/// close!).
Close(Option, Span),
/// Element attribute name.
AttrName(QName, Span),
/// Element attribute value.
AttrValue(SymbolId, Span),
/// A portion of an element attribute value.
///
/// This allows for concatenating values into an attribute value without
/// having to copy values.
/// The last fragment must be a [`Token::AttrValue`].
///
/// Since each fragment contains a span,
/// this also potentially gives higher resolution for the origin of
/// components of generated attribute values.
///
/// _This should be used only for writing._
/// These will never be encountered during reading,
/// and so to keep the parsers and IRs simple,
/// there is no support for fragments beyond XIR.
/// (There was in the past,
/// but it was removed.)
AttrValueFragment(SymbolId, Span),
/// Comment node.
Comment(SymbolId, Span),
/// Character data as part of an element.
///
/// See also [`CData`](Token::CData) variant.
Text(SymbolId, Span),
/// CData node (``).
///
/// _Warning: It is up to the caller to ensure that the string `]]>` is
/// not present in the text!_
/// This is intended for reading existing XML data where CData is
/// already present,
/// not for producing new CData safely!
CData(SymbolId, Span),
/// Similar to `Text`,
/// but intended for use where only whitespace is allowed,
/// such as alignment of attributes.
Whitespace(Whitespace, Span),
}
impl Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// _Do not_ render large amounts of text here;
// this is not only a risk depending on what is output,
// but the diagnostic system also quote source lines to provide
// the necessary context.
match self {
Self::Open(qname, _) => write!(f, "`<{}>`", qname),
Self::Close(Some(qname), _) => write!(f, "`{}>`", qname),
// Its context is contained within the Open,
// and hopefully any user-visible errors will display that instead.
Self::Close(None, _) => {
write!(f, "`/>`")
}
Self::AttrName(qname, _) => {
write!(f, "`@{}`", qname)
}
Self::AttrValue(attr_val, _) => {
write!(f, "attribute value `{}`", attr_val)
}
Self::AttrValueFragment(attr_val, _) => {
write!(f, "attribute value fragment `{}`", attr_val)
}
Self::Comment(..) => write!(f, "comment"),
Self::Text(..) => write!(f, "text"),
Self::CData(..) => write!(f, "CDATA"),
Self::Whitespace(..) => write!(f, "whitespace"),
}
}
}
impl parse::Token for Token {
/// Retrieve the [`Span`] associated with a given [`Token`].
///
/// Every token has an associated span.
fn span(&self) -> Span {
use Token::*;
match self {
Open(_, span)
| Close(_, span)
| AttrName(_, span)
| AttrValue(_, span)
| AttrValueFragment(_, span)
| Comment(_, span)
| Text(_, span)
| CData(_, span)
| Whitespace(_, span) => *span,
}
}
}
impl parse::Object for Token {}
#[cfg(test)]
mod test {
use super::*;
use crate::sym::GlobalSymbolIntern;
use std::convert::TryInto;
type TestResult = Result<(), Box>;
lazy_static! {
static ref S: Span =
Span::from_byte_interval((0, 0), "test case".intern());
}
mod name {
use super::*;
#[test]
fn ncname_comparable_to_sym() {
let foo = "foo".intern();
assert_eq!(NCName(foo), foo);
}
#[test]
fn ncname_try_into_from_str_no_colon() -> TestResult {
let name: NCName = "no-colon".try_into()?;
assert_eq!(name, "no-colon".intern());
Ok(())
}
#[test]
fn ncname_try_into_from_str_fails_with_colon() {
assert_eq!(
NCName::try_from("look:a-colon"),
Err(SpanlessError::NCColon("look:a-colon".into()))
);
}
#[test]
fn ncname_from_byte_slice() -> TestResult {
let name: NCName = (b"no-colon" as &[u8]).try_into()?;
assert_eq!(name, "no-colon".intern());
Ok(())
}
#[test]
fn ncname_from_byte_slice_fails_with_colon() {
assert_eq!(
NCName::try_from(b"a:colon" as &[u8]),
Err(SpanlessError::NCColon("a:colon".into()))
);
}
#[test]
fn local_name_from_local_part_only() -> TestResult {
let name = QName::new_local("foo".try_into()?);
assert_eq!(name.local_name(), "foo".try_into()?);
assert_eq!(None, name.prefix());
Ok(())
}
#[test]
fn local_name_from_option_tuple() -> TestResult {
let name: QName = (Option::<&str>::None, "foo").try_into()?;
assert_eq!(name.local_name(), "foo".try_into()?);
assert_eq!(None, name.prefix());
Ok(())
}
#[test]
fn fully_qualified_name() -> TestResult {
let name: QName = ("foons", "foo").try_into()?;
assert_eq!(name.prefix(), Some("foons".try_into()?));
assert_eq!(name.local_name(), "foo".try_into()?);
Ok(())
}
}
#[test]
fn whitespace() -> TestResult {
assert_eq!(Whitespace::try_from(" ")?, " ".try_into()?);
assert_eq!(Whitespace::try_from(" \t ")?, " \t ".try_into()?);
assert_eq!(
Whitespace::try_from("not ws!"),
Err(SpanlessError::NotWhitespace("not ws!".into(),))
);
Ok(())
}
#[test]
fn whitespace_as_text() -> TestResult {
assert_eq!(" ".intern(), Whitespace::try_from(" ")?.into(),);
Ok(())
}
}