tamer: xir::reader: Correct empty element whitespace handling

This both adds clarifying tests and corrects the case of `<foo/>`, where the
offset was erroneously off by one---it saw that there were no attributes and
added a byte thinking it'd include `>`, as in `<foo>`.

DEV-7145
main
Mike Gerwitz 2022-06-22 10:28:44 -04:00
parent adc45d90df
commit e5c8a218c3
2 changed files with 53 additions and 10 deletions

View File

@ -137,6 +137,7 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
ele,
prev_pos,
ctx,
true,
)
.and_then(|open| {
let new_pos = self.reader.buffer_position();
@ -160,6 +161,7 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
ele,
prev_pos,
ctx,
false,
)),
QuickXmlEvent::End(ele) => Some({
@ -297,6 +299,7 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
ele: BytesStart,
pos: usize,
ctx: Context,
empty_tag: bool,
) -> Result<Token> {
// Starts after the opening tag `<`, so adjust.
let addr = ele.as_ptr() as usize - 1;
@ -340,13 +343,13 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
.map_err(Error::from_with_span(ctx.span_or_zz(pos + 1, len)))
.and_then(|qname| {
let has_attrs = ele.attributes_raw().len() > 0;
let noattr_add: usize = (!has_attrs).into();
let noattr_add: usize = (!has_attrs && !empty_tag).into();
// <tag ... />
// [--] name + '<'
// <tag ... /> <tag/>
// [--] name + '<' [--] `noattr_add` must be 0
//
// <tag>..</tag>
// [---] name + '<' + '>'
// <tag>...</tag> <tag ...>...</tag>
// [---] name + '<' + '>' [--] name + '<'
let span = ctx.span_or_zz(pos, len + 1 + noattr_add);
if has_attrs {

View File

@ -94,14 +94,36 @@ macro_rules! new_sut {
}
#[test]
fn empty_node_without_prefix_or_attributes() {
new_sut!(sut = "<empty-node />");
// [---------] []
fn empty_node_without_prefix_or_attributes_or_whitespace() {
new_sut!(sut = "<empty-node/>");
// [---------][]
// 0 10
// A B
// A B
let a = DC.span(0, 11);
let b = DC.span(12, 2);
let b = DC.span(11, 2);
assert_eq!(
Ok(vec![
O(Token::Open("empty-node".unwrap_into(), a)),
O(Token::Close(None, b)),
]),
sut.collect(),
);
}
#[test]
fn empty_node_without_prefix_or_attributes() {
new_sut!(sut = "<empty-node />");
// [---------] []
// 0 10 14
// A B
//
// (extra WS intentional to test
// how it accommodates with spans)
let a = DC.span(0, 11);
let b = DC.span(14, 2);
assert_eq!(
Ok(vec![
@ -808,3 +830,21 @@ fn empty_element_qname_with_space_with_attr() {
sut.collect::<SutResultCollect>()
);
}
// Same as above test except that we have no attrs.
// We just want to be sure that we can't have a QName that starts with
// whitespace.
#[test]
fn space_before_element_name() {
new_sut!(sut = r#"< foo />"#);
// |
// 1
// quick-xml interprets the space as a "" QName
let span = DC.span(1, 0);
assert_eq!(
Err(PE(Error::InvalidQName("".intern(), span))),
sut.collect::<SutResultCollect>()
);
}