tame/tamer/src/xir/parse/ele/test.rs

// XIR element parser generator tests
//
//  Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
//  This file is part of TAME.
//
//  This program is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.

//! Element parser generator tests.
//!
//! It is expected to be understood for these tests that `ele_parse`
//!   directly invokes `attr_parse` to perform all attribute parsing,
//!     and so testing of that parsing is not duplicated here.
//! A brief visual inspection of the implementation of `ele_parse`
//!   should suffice to verify this claim.

use crate::{
    convert::ExpectInto,
    parse::{Object, ParseError, ParseState, Parsed},
    span::{Span, DUMMY_SPAN},
    sym::SymbolId,
    xir::{
        attr::{Attr, AttrSpan},
        flat::{Depth, XirfToken},
        st::qname::*,
        CloseSpan, EleNameLen, EleSpan, OpenSpan,
    },
};

const S1: Span = DUMMY_SPAN;
const S2: Span = S1.offset_add(1).unwrap();
const S3: Span = S2.offset_add(1).unwrap();
const S4: Span = S3.offset_add(1).unwrap();
const S5: Span = S4.offset_add(1).unwrap();
const S6: Span = S5.offset_add(1).unwrap();

// Some number (value does not matter).
const N: EleNameLen = 10;

#[test]
fn empty_element_no_attrs_no_close() {
    #[derive(Debug, PartialEq, Eq)]
    struct Foo;
    impl Object for Foo {}

    ele_parse! {
        type Object = Foo;

        Sut := QN_PACKAGE {
            @ {} => Foo,
        }
    }

    let toks = vec![
        // Length (second argument) here is arbitrary.
        XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
        XirfToken::Close(None, CloseSpan::empty(S2), Depth(0)),
    ];

    assert_eq!(
        Ok(vec![
            Parsed::Incomplete,  // [Sut]  Open
            Parsed::Object(Foo), // [Sut@] Close (>LA)
            Parsed::Incomplete,  // [Sut]  Close (<LA)
        ]),
        Sut::parse(toks.into_iter()).collect(),
    );
}

// Same as above,
//   but with an object emitted on Close rather than Incomplete.
#[test]
fn empty_element_no_attrs_with_close() {
    #[derive(Debug, PartialEq, Eq)]
    enum Foo {
        Attr,
        Close,
    }

    impl Object for Foo {}

    ele_parse! {
        type Object = Foo;

        Sut := QN_PACKAGE {
            @ {} => Foo::Attr,
            / => Foo::Close,
        }
    }

    let toks = vec![
        // Length (second argument) here is arbitrary.
        XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
        XirfToken::Close(None, CloseSpan::empty(S2), Depth(0)),
    ];

    assert_eq!(
        Ok(vec![
            Parsed::Incomplete,         // [Sut]  Open
            Parsed::Object(Foo::Attr),  // [Sut@] Close (>LA)
            Parsed::Object(Foo::Close), // [Sut]  Close (<LA)
        ]),
        Sut::parse(toks.into_iter()).collect(),
    );
}

#[test]
fn empty_element_with_attr_bindings() {
    #[derive(Debug, PartialEq, Eq)]
    struct Foo(SymbolId, SymbolId, (Span, Span));
    impl Object for Foo {}

    ele_parse! {
        type Object = Foo;

        // In practice we wouldn't actually use Attr
        //   (we'd use an appropriate newtype),
        //     but for the sake of this test we'll keep things simple.
        Sut := QN_PACKAGE {
            @ {
                name: (QN_NAME) => Attr,
                value: (QN_VALUE) => Attr,
            } => Foo(
                name.value(),
                value.value(),
                (name.attr_span().value_span(), value.attr_span().value_span())
            ),
        }
    }

    let name_val = "bar".into();
    let value_val = "baz".into();

    let toks = vec![
        XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
        // Purposefully out of order just to demonstrate that order does
        //   not matter.
        XirfToken::Attr(Attr(QN_VALUE, value_val, AttrSpan(S2, S3))),
        XirfToken::Attr(Attr(QN_NAME, name_val, AttrSpan(S4, S5))),
        XirfToken::Close(None, CloseSpan::empty(S6), Depth(0)),
    ];

    assert_eq!(
        Ok(vec![
            Parsed::Incomplete,                                 // Open
            Parsed::Incomplete,                                 // Attr
            Parsed::Incomplete,                                 // Attr
            Parsed::Object(Foo(name_val, value_val, (S5, S3))), // Close
            Parsed::Incomplete,                                 // Close (LA)
        ]),
        Sut::parse(toks.into_iter()).collect(),
    );
}

// An unexpected element produces an error for the offending token and
//   then employs a recovery strategy so that parsing may continue.
#[test]
fn unexpected_element() {
    ele_parse! {
        type Object = ();

        Sut := QN_PACKAGE {
            // symbol soup
            @ {} => (),
        }
    }

    let unexpected = "unexpected".unwrap_into();
    let span = OpenSpan(S1, 3);

    // Note that the depth is >0 just to ensure that we don't
    //   hard-code some assumption that `0` means "root".
    const DEPTH_ROOT: Depth = Depth(5);
    const DEPTH_CHILD: Depth = Depth(6);

    // Implied here is that we have valid XIRF.
    // This means that,
    //   in the context of the larger real-world system
    //     (not these test cases),
    //   even as we discard tokens,
    //   XIRF is still doing its job before feeding them to us,
    //     meaning that we get XIRF's parsing even though we've chosen
    //     to ignore further input for this element.
    // In other words---our
    //   decision to skip tokens does not skip the validations that XIRF
    //   performs,
    //     such as ensuring proper nesting.
    let toks = vec![
        // Any name besides `QN_PACKAGE`
        XirfToken::Open(unexpected, span, DEPTH_ROOT),
        // From this point on we are in a recovery state,
        //   and will not emit tokens
        //     (or further errors)
        //   for these inputs.
        XirfToken::Attr(Attr(QN_VALUE, "ignored".into(), AttrSpan(S2, S3))),
        XirfToken::Open(QN_NAME, OpenSpan(S4, N), DEPTH_CHILD),
        // This ensures that closing at a different depth will not count
        //   as the closing node for recovery.
        XirfToken::Close(None, CloseSpan::empty(S5), DEPTH_CHILD),
        // This final token closes the element that caused the error,
        //   and so brings us into an accepting state.
        XirfToken::Close(Some(unexpected), CloseSpan(S6, 3), DEPTH_ROOT),
    ];

    let mut sut = Sut::parse(toks.into_iter());

    // The first token of input is the unexpected element,
    //   and so should result an error.
    // The referenced span should be the _name_ of the element,
    //   not the tag,
    //   since the error is referring not to the fact that an element
    //     was encountered
    //       (which was expected),
    //       but to the fact that the name was not the one expected.
    assert_eq!(
        // TODO: This references generated identifiers.
        Some(Err(ParseError::StateError(SutError_::UnexpectedEle_(
            unexpected,
            span.name_span()
        )))),
        sut.next(),
    );

    // We should have now entered a recovery mode whereby we discard
    //   input until we close the element that introduced the error.
    assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Attr
    assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Open  (C)
    assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Close (C)

    // The recovery state must not be in an accepting state,
    //   because we didn't close at the root depth yet.
    let (mut sut, _) =
        sut.finalize().expect_err("recovery must not be accepting");

    // The next token should close the element that is in error,
    //   and bring us into an accepting state.
    // But since we are not emitting tokens,
    //   we'll still be marked as incomplete.
    assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Close (R)
    sut.finalize()
        .expect("recovery must complete in an accepting state");
}

#[test]
fn single_child_element() {
    #[derive(Debug, PartialEq, Eq)]
    enum Foo {
        RootAttr,
        ChildAttr,
    }

    impl Object for Foo {}

    ele_parse! {
        type Object = Foo;

        Sut := QN_PACKAGE {
            @ {} => Foo::RootAttr,

            Child,
        }

        Child := QN_CLASSIFY {
            @ {} => Foo::ChildAttr,
        }
    }

    let toks = vec![
        XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
        XirfToken::Open(QN_CLASSIFY, OpenSpan(S2, N), Depth(1)),
        XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
        XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S4, N), Depth(0)),
    ];

    assert_eq!(
        Ok(vec![
            Parsed::Incomplete,             // [Sut]    Root Open
            Parsed::Object(Foo::RootAttr),  // [Sut@]   Child Open (>LA)
            Parsed::Incomplete,             // [Child]  Child Open (<LA)
            Parsed::Object(Foo::ChildAttr), // [Child@] Child Close (>LA)
            Parsed::Incomplete,             // [Child]  Child Close (<LA)
            Parsed::Incomplete,             // [Sut]    Root Close
        ]),
        Sut::parse(toks.into_iter()).collect(),
    );
}

/// Expands off of [`single_child_element`],
///   but the former provides a clear indication of whether a single state
///   is properly recognized without having to worry about how nonterminals'
///   states transition to one-another in sequence.
#[test]
fn multiple_child_elements_sequential() {
    #[derive(Debug, PartialEq, Eq)]
    enum Foo {
        RootOpen,
        ChildAOpen,
        ChildAClose,
        ChildBOpen,
        ChildBClose,
        RootClose,
    }

    impl Object for Foo {}

    ele_parse! {
        type Object = Foo;

        Sut := QN_PACKAGE {
            @ {} => Foo::RootOpen,
            / => Foo::RootClose,

            // Order matters here.
            ChildA,
            ChildB,
        }

        ChildA := QN_CLASSIFY {
            @ {} => Foo::ChildAOpen,
            / => Foo::ChildAClose,
        }

        ChildB := QN_EXPORT {
            @ {} => Foo::ChildBOpen,
            / => Foo::ChildBClose,
        }
    }

    let toks = vec![
        XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
        // ChildA
        XirfToken::Open(QN_CLASSIFY, OpenSpan(S2, N), Depth(1)),
        XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
        // Child B
        XirfToken::Open(QN_EXPORT, OpenSpan(S3, N), Depth(1)),
        XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
        XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S5, N), Depth(0)),
    ];

    assert_eq!(
        Ok(vec![
            Parsed::Incomplete,               // [Sut]     Root Open
            Parsed::Object(Foo::RootOpen),    // [Sut@]    ChildA Open (>LA)
            Parsed::Incomplete,               // [ChildA]  ChildA Open (<LA)
            Parsed::Object(Foo::ChildAOpen),  // [ChildA@] ChildA Close (>LA)
            Parsed::Object(Foo::ChildAClose), // [ChildA]  ChildA Close (<LA)
            Parsed::Incomplete,               // [ChildB]  ChildB Open
            Parsed::Object(Foo::ChildBOpen),  // [ChildB@] ChildB Close (>LA)
            Parsed::Object(Foo::ChildBClose), // [ChildB]  ChildB Close (<LA)
            Parsed::Object(Foo::RootClose),   // [Sut]     Root Close
        ]),
        Sut::parse(toks.into_iter()).collect(),
    );
}

// TODO: This error recovery seems to be undesirable,
//     both consuming an element and skipping the requirement;
//       it is beneficial only in showing that recovery is possible and
//       accounted for.
//  Let's revisit once we're further along and have concrete examples to
//    determine if there is a proper umbrella recovery strategy,
//      or if it needs to be configurable depending on context.
#[test]
fn child_error_and_recovery() {
    #[derive(Debug, PartialEq, Eq)]
    enum Foo {
        Root,
        ChildABad, // Will not yield this one.
        ChildB,
    }

    impl Object for Foo {}

    ele_parse! {
        type Object = Foo;

        Sut := QN_PACKAGE {
            @ {} => Foo::Root,

            // This is what we're expecting,
            //   but not what we will provide.
            ChildA,

            // But we _will_ provide this expected value,
            //   after error recovery ignores the above.
            ChildB,
        }

        ChildA := QN_CLASSIFY {
            @ {} => Foo::ChildABad,
        }

        ChildB := QN_EXPORT {
            @ {} => Foo::ChildB,
        }
    }

    let unexpected = "unexpected".unwrap_into();
    let span = OpenSpan(S2, N);

    let toks = vec![
        // The first token is the expected root.
        XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
        // --> But this one is unexpected (name).
        XirfToken::Open(unexpected, span, Depth(1)),
        // And so we should ignore it up to this point.
        XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
        // At this point,
        //   having encountered the closing tag,
        //   the next token should result in a dead state,
        //     which should then result in a transition away from the state
        //     for `ChildA`,
        //       which means that we expect `ChildB`.
        // Parsing continues normally.
        XirfToken::Open(QN_EXPORT, OpenSpan(S4, N), Depth(1)),
        XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
        XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S4, N), Depth(0)),
    ];

    let mut sut = Sut::parse(toks.into_iter());

    // The first token is expected,
    //   and we enter attribute parsing for `Sut`.
    assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Sut] Open 0

    // The second token _will_ be unexpected,
    //   but we're parsing attributes for `Sut`,
    //   so we don't know that yet.
    // Instead,
    //   the `Open` ends attribute parsing and yields a token of lookahead.
    assert_eq!(
        Some(Ok(Parsed::Object(Foo::Root))), // [Sut@] Open 1 (>LA)
        sut.next()
    );

    // The token of lookahead (`Open`) is unexpected for `ChildA`,
    //   which must throw an error and enter a recovery state.
    // The token should be consumed and returned in the error,
    //   _not_ produced as a token of lookahead,
    //   since we do not want to reprocess bad input.
    assert_eq!(
        // TODO: This references generated identifiers.
        Some(Err(ParseError::StateError(SutError_::ChildA(
            ChildAError_::UnexpectedEle_(unexpected, span.name_span())
        )))),
        sut.next(),
    );

    // The next token is the self-closing `Close` for the unexpected opening
    //   tag.
    // Since we are in recovery,
    //   it should be ignored.
    assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [ChildA!] Close 1

    // Having recovered from the error,
    //   we should happily accept the remaining tokens starting with
    //   `ChildB`.
    // An intelligent system ought to accept `ChildA` if it didn't produce
    //   any output for the erroneous input,
    //     but that's not what we're doing yet.
    assert_eq!(
        Ok(vec![
            Parsed::Incomplete,          // [ChildB]  Open 1
            Parsed::Object(Foo::ChildB), // [ChildB@] Close 1 (>LA)
            Parsed::Incomplete,          // [ChildB]  Close 1 (<LA)
            Parsed::Incomplete,          // [Sut]     Close 0
        ]),
        sut.collect()
    );
}