tamer: obj::xmle::xir: Write l:map-from

This contains some awkward coupling for opening and closing tags to reduce
the complexity of the `Iterator` types that must be manually
specified.  That may be addressed shortly.
main
Mike Gerwitz 2021-10-05 16:13:47 -04:00
parent d54ef62a0d
commit 7f5064c665
5 changed files with 156 additions and 19 deletions

View File

@ -27,6 +27,7 @@
use super::IdentObjectData;
use crate::sym::SymbolId;
use fxhash::FxHashSet;
use std::collections::hash_set;
use std::iter::Chain;
use std::slice::Iter;
@ -196,7 +197,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> {
///
/// Multiple mappings may reference the same source field,
/// which would produce duplicate values if they are not filtered.
pub fn iter_map_froms_uniq(&self) -> impl Iterator<Item = SymbolId> {
pub fn iter_map_froms_uniq(&self) -> hash_set::IntoIter<SymbolId> {
self.iter_map()
.filter_map(|ident| {
ident.src().expect("internal error: missing map src").from

View File

@ -31,8 +31,8 @@
use crate::span::Span;
use crate::sym::{
CIdentStaticSymbolId, GlobalSymbolIntern, StaticSymbolId, SymbolId,
UriStaticSymbolId,
st_as_sym, CIdentStaticSymbolId, GlobalSymbolIntern, StaticSymbolId,
SymbolId, TameIdentStaticSymbolId, UriStaticSymbolId,
};
use std::convert::{TryFrom, TryInto};
use std::fmt::Display;
@ -44,10 +44,11 @@ pub mod writer;
pub trait QNameCompatibleStaticSymbolId: StaticSymbolId {}
impl QNameCompatibleStaticSymbolId for CIdentStaticSymbolId {}
impl QNameCompatibleStaticSymbolId for TameIdentStaticSymbolId {}
macro_rules! qname_const_inner {
($name:ident = :$local:ident) => {
const $name: QName = QName::st_cid_local($local);
const $name: QName = QName::st_cid_local(&$local);
};
($name:ident = $prefix:ident:$local:ident) => {
@ -284,21 +285,23 @@ impl QName {
}
/// Construct a constant QName from static C-style symbols.
pub const fn st_cid<T: QNameCompatibleStaticSymbolId>(
prefix_sym: &T,
local_sym: &T,
) -> Self {
use crate::sym;
pub const fn st_cid<T, U>(prefix_sym: &T, local_sym: &U) -> Self
where
T: QNameCompatibleStaticSymbolId,
U: QNameCompatibleStaticSymbolId,
{
Self(
Some(Prefix(NCName(sym::st_as_sym(prefix_sym)))),
LocalPart(NCName(sym::st_as_sym(local_sym))),
Some(Prefix(NCName(st_as_sym(prefix_sym)))),
LocalPart(NCName(st_as_sym(local_sym))),
)
}
/// Construct a constant QName with a local name only from a static
/// C-style symbol.
pub const fn st_cid_local(local_sym: CIdentStaticSymbolId) -> Self {
Self(None, LocalPart(NCName(local_sym.as_sym())))
pub const fn st_cid_local<T: QNameCompatibleStaticSymbolId>(
local_sym: &T,
) -> Self {
Self(None, LocalPart(NCName(st_as_sym(local_sym))))
}
}
@ -376,7 +379,7 @@ pub enum Text {
/// we know that the generated text could not possibly require escaping.
/// This does, however, put the onus on the caller to ensure that they got
/// the escaping status correct.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum AttrValue {
/// Value that requires escaping.
///

View File

@ -30,8 +30,8 @@ use crate::{
sym::{st::*, SymbolId},
};
use arrayvec::ArrayVec;
use std::array;
use std::iter::Chain;
use std::{array, collections::hash_set};
qname_const! {
QN_DESC: :L_DESC,
@ -52,6 +52,8 @@ qname_const! {
QN_XMLNS_L: L_XMLNS:L_L,
QN_XMLNS_PREPROC: L_XMLNS:L_PREPROC,
QN_YIELDS: :L_YIELDS,
QN_L_MAP_FROM: L_L:L_MAP_FROM,
QN_L_FROM: L_L:L_FROM,
}
const HEADER_SIZE: usize = 16;
@ -177,7 +179,7 @@ impl<'a, T: IdentObjectData> DepListIter<'a, T> {
/// _The provided `value` must be escaped;_
/// it is blindly wrapped in [`AttrValue::Escaped`]!
///
/// Like [`refill_toks`](DepsListIter::refill_toks),
/// Like [`refill_toks`](DepListIter::refill_toks),
/// we push in reverse.
#[inline]
fn toks_push_attr(&mut self, name: QName, value: Option<SymbolId>) {
@ -235,26 +237,78 @@ impl<'a, T: IdentObjectData> Iterator for DepListIter<'a, T> {
}
}
// Maximum size of token buffer.
//
// See [`MapFromIter::refill_toks`].
const MAP_FROM_TOK_SIZE: usize = 3;
/// Generate `l:map-from` section.
struct MapFromsIter {
/// Source data to lower into `l:deps`.
iter: hash_set::IntoIter<SymbolId>,
/// Token buffer.
toks: ArrayVec<Token, MAP_FROM_TOK_SIZE>,
}
impl MapFromsIter {
fn new<'a, T: IdentObjectData>(sections: &'a Sections<T>) -> Self {
let mut iter = Self {
iter: sections.iter_map_froms_uniq(),
// Most of the time we have a single `from` (4 tokens).
toks: ArrayVec::new(),
};
// reverse
iter.toks.push(Token::Open(QN_L_MAP_FROM, LSPAN));
iter.toks.push(Token::Close(Some(QN_L_DEP), LSPAN));
iter
}
fn refill_toks(&mut self) -> Option<Token> {
self.iter.next().and_then(|from| {
self.toks.push(Token::Close(None, LSPAN));
self.toks
.push(Token::AttrValue(AttrValue::Escaped(from), LSPAN));
self.toks.push(Token::AttrName(QN_NAME, LSPAN));
Some(Token::Open(QN_L_FROM, LSPAN))
})
}
}
impl Iterator for MapFromsIter {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.toks.pop().or_else(|| self.refill_toks())
}
}
const FOOTER_SIZE: usize = 2;
type FooterIter = array::IntoIter<Token, FOOTER_SIZE>;
#[inline]
fn footer() -> FooterIter {
[
Token::Close(Some(QN_L_DEP), LSPAN),
Token::Close(Some(QN_L_MAP_FROM), LSPAN),
Token::Close(Some(QN_PACKAGE), LSPAN),
]
.into_iter()
}
/// Iterator that lazily lowers `xmle` object files into Xir.
/// Iterator that lazily lowers `xmle` object files into XIR.
///
/// This serves primarily to encapsulate the nasty iterator type without
/// having to resort to dynamic dispatch,
/// since this iterator will receive over a million calls on larger
/// programs (and hundreds of thousands on smaller).
pub struct LowerIter<'a, T: IdentObjectData>(
Chain<Chain<HeaderIter, DepListIter<'a, T>>, FooterIter>,
Chain<
Chain<Chain<HeaderIter, DepListIter<'a, T>>, MapFromsIter>,
FooterIter,
>,
);
impl<'a, T: IdentObjectData> Iterator for LowerIter<'a, T> {
@ -280,6 +334,7 @@ pub fn lower_iter<'a, T: IdentObjectData>(
LowerIter(
header(pkg_name, relroot)
.chain(DepListIter::new(sections, relroot))
.chain(MapFromsIter::new(sections))
.chain(footer()),
)
}

View File

@ -28,6 +28,7 @@ use crate::ir::{
},
};
use crate::sym::{GlobalSymbolIntern, GlobalSymbolResolve};
use std::collections::HashSet;
type TestResult = Result<(), Box<dyn std::error::Error>>;
@ -337,3 +338,71 @@ fn test_writes_deps() -> TestResult {
Ok(())
}
#[test]
fn test_writes_map_froms() -> TestResult {
let mut sections = Sections::new();
let relroot = "relroot-deps".intern();
let a = IdentObject::Ident(
"a".intern(),
IdentKind::Map,
Source {
from: Some("froma".intern()),
..Default::default()
},
);
let b = IdentObject::Ident(
"a".intern(),
IdentKind::Map,
Source {
from: Some("fromb".intern()),
..Default::default()
},
);
// Add a duplicate just to ensure that we're using the right method on
// `Sections` for uniqueness.
sections.map.push_body(&a);
sections.map.push_body(&a);
sections.map.push_body(&b);
let mut iter = parser_from(
lower_iter(&sections, "pkg".intern(), relroot)
.skip_while(not(open(QN_L_MAP_FROM))),
);
let given = iter
.next()
.expect("tree object expected")
.unwrap() // Tree
.into_element()
.expect("element expected");
// Sanity check to ensure we have the element we're expecting.
assert_eq!(QN_L_MAP_FROM, given.name());
let froms = given.children();
let mut found = HashSet::new();
froms.iter().for_each(|from| {
assert_eq!(QN_L_FROM, from.as_element().unwrap().name());
found.insert(
from.as_element()
.unwrap()
.attrs()
.find(QN_NAME)
.expect("expecting @name")
.value_atom()
.unwrap(),
);
});
assert!(found.contains(&AttrValue::Escaped("froma".intern())));
assert!(found.contains(&AttrValue::Escaped("fromb".intern())));
Ok(())
}

View File

@ -295,6 +295,13 @@ static_symbol_newtypes! {
/// its string value is incidental and should not be relied upon.
mark: MarkStaticSymbolId<global::ProgSymSize>,
/// A symbol suitable as a TAME identifier.
///
/// This is [`CIdentStaticSymbolId`] with `-` added:
/// `[a-zA-Z_-][a-zA-Z0-9_-]*`.
/// This is also suitable as an XML node or attribute name.
tid: TameIdentStaticSymbolId<global::ProgSymSize>,
/// Symbol representing a URI.
///
/// This is intended for use primarily as an XML namespace.
@ -392,12 +399,14 @@ pub mod st {
L_FALSE: cid "false",
L_FLOAT: cid "float",
L_FUNC: cid "func",
L_FROM: cid "from",
L_GEN: cid "gen",
L_GENERATED: cid "generated",
L_INTEGER: cid "integer",
L_L: cid "l",
L_LPARAM: cid "lparam",
L_MAP: cid "map",
L_MAP_FROM: tid "map-from",
L_MAP_HEAD: qname "map:head",
L_MAP_TAIL: qname "map:tail",
L_META: cid "meta",