tame: Introduce __pkguniq and preproc:pkg-generate-id to replace generate-id
This modifies the XSLT-based compiler to generate ids that are expected to be unique across packages. No such guarantee exists today; `generate-id()` relies on the position of the node within a tree, which could easily be the same across multiple compiler invocations for separate packages. This situation seldom occurs, but has happened with increased frequency lately in a system with >1000 packages. It is more likely to occur in packages that are very similar to one-another or where the beginning of the package is similar (such as packages used as configuration for taxes for each individual state). This derives a SHA-256 hash from the canonical package name (well, not canonical acccording to TAMER, but close: without the leading slash), truncating it to 32 bits. I used a birthday attack to estimate what the size of this value ought to be: sqrt(2^32) = 65536, which is way more packages than the poor XSLT-based compiler is going to handle. If ever it needs to be increased due to conflicts, that is simple enough. DEV-14965main
parent
bdd98a5d92
commit
418bd34005
|
@ -26,7 +26,9 @@
|
|||
template that is intended for use with dslc should include this.
|
||||
-->
|
||||
<stylesheet version="2.0"
|
||||
xmlns="http://www.w3.org/1999/XSL/Transform">
|
||||
xmlns="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:preproc="http://www.lovullo.com/rater/preproc">
|
||||
|
||||
<!--
|
||||
Absolute path to root of TAME
|
||||
|
@ -80,6 +82,26 @@
|
|||
-->
|
||||
<param name="__rseed" />
|
||||
|
||||
<!--
|
||||
A package-unique string
|
||||
|
||||
You should use `preproc:pkg-generate-id` instead of this value directly.
|
||||
|
||||
This value is deterministic, derived from `__srcpkg`, and so will not
|
||||
change between runs; it can be used to generate identifier names that are
|
||||
unique across packages, which is not something that we can rely on
|
||||
`generate-id()` for on its own.
|
||||
|
||||
In practice, this can be concatenated with other generated strings,
|
||||
including `generate-id()`-derived strings.
|
||||
|
||||
_There is no guarantee that this string will begin with a letter_, so you
|
||||
should generate your identifiers accordingly.
|
||||
|
||||
See `DslCompiler.java` for implementation.
|
||||
-->
|
||||
<param name="__pkguniq" as="xs:string" />
|
||||
|
||||
|
||||
<!--
|
||||
Root node of template on which stylesheet was invoked
|
||||
|
@ -114,4 +136,12 @@
|
|||
</choose>
|
||||
</template>
|
||||
|
||||
<function name="preproc:pkg-generate-id" as="xs:string">
|
||||
<param name="refnode" as="node()" />
|
||||
|
||||
<sequence select="concat(
|
||||
'_pu', $__pkguniq, '_',
|
||||
generate-id( $refnode ) )" />
|
||||
</function>
|
||||
|
||||
</stylesheet>
|
||||
|
|
|
@ -166,7 +166,7 @@
|
|||
<template match="c:let[ not( @name ) ]" mode="preproc:expand" priority="5">
|
||||
<copy>
|
||||
<sequence select="@*" />
|
||||
<attribute name="name" select="generate-id(.)" />
|
||||
<attribute name="name" select="preproc:pkg-generate-id(.)" />
|
||||
|
||||
<apply-templates select="*" mode="preproc:expand" />
|
||||
</copy>
|
||||
|
|
|
@ -415,11 +415,7 @@
|
|||
<template match="lv:any|lv:all" mode="preproc:class-groupgen" priority="5">
|
||||
<param name="legacy-classify" as="xs:boolean" tunnel="yes" />
|
||||
|
||||
<!-- this needs to be unique enough that there is unlikely to be a conflict
|
||||
between generated ids in various packages; generate-id is not enough for
|
||||
cross-package guarantees (indeed, I did witness conflicts), so there is
|
||||
a random seed passed into the stylesheet externally -->
|
||||
<variable name="id" select="concat( $__rseed, generate-id(.) )" />
|
||||
<variable name="id" select="preproc:pkg-generate-id(.)" />
|
||||
|
||||
<variable name="parent-name" select="ancestor::lv:classify/@as" />
|
||||
<variable name="yields" select="concat( 'is', $id )" />
|
||||
|
|
|
@ -411,7 +411,7 @@
|
|||
with an optional looping construct
|
||||
-->
|
||||
<template match="lv:inline-template" mode="preproc:macros" priority="5">
|
||||
<variable name="name" select="concat( '___i', generate-id(.), '___' )" />
|
||||
<variable name="name" select="concat( '___i', preproc:pkg-generate-id(.), '___' )" />
|
||||
<variable name="inline" select="." />
|
||||
|
||||
<!-- generate template -->
|
||||
|
@ -1155,7 +1155,7 @@
|
|||
tunnel="yes" />
|
||||
|
||||
<value-of select="." />
|
||||
<value-of select="generate-id( $apply )" />
|
||||
<value-of select="preproc:pkg-generate-id( $apply )" />
|
||||
</template>
|
||||
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ package com.lovullo.dslc;
|
|||
import java.io.*;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.security.MessageDigest;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import javax.xml.transform.Source;
|
||||
|
@ -163,6 +164,10 @@ public class DslCompiler
|
|||
new QName( "__rseed" ),
|
||||
XdmValue.makeValue( (int)( Math.random() * 10e6 ) )
|
||||
);
|
||||
t.setParameter(
|
||||
new QName( "__pkguniq" ),
|
||||
XdmValue.makeValue( _createPkgUniq( srcpkg ) )
|
||||
);
|
||||
|
||||
_setTemplateParams( t, params );
|
||||
|
||||
|
@ -173,6 +178,51 @@ public class DslCompiler
|
|||
}
|
||||
|
||||
|
||||
// Generate an identifier that is expected to be unique given a
|
||||
// canonical package name.
|
||||
//
|
||||
// This produces a string that is expected to
|
||||
// - Be usable as a non-leading component of a C-style identifier;
|
||||
// - Provide enough entropy so as to be unlikely to cause
|
||||
// conflicts between thousands of packages; and
|
||||
// - Is reasonably short so as not to bloat generated identifier
|
||||
// sizes too greatly.
|
||||
private String _createPkgUniq(String srcpkg) throws Exception {
|
||||
// This is used only for uniqueness, not security. The choice
|
||||
// of hash function is not particularly important so long as it
|
||||
// provides a good distribution. If collisions are encountered
|
||||
// between packages with a good algorithm, increase the
|
||||
// truncation length.
|
||||
MessageDigest md = MessageDigest.getInstance( "SHA-256" );
|
||||
byte[] digest = md.digest( srcpkg.getBytes( "UTF-8" ) );
|
||||
|
||||
// Ensure we received a string of the expected length, otherwise
|
||||
// we risk sending a non-unique id off to the XSLT-based
|
||||
// compiler.
|
||||
int len_expected = 32; // 256 bits
|
||||
int len = digest.length;
|
||||
if ( len != len_expected ) {
|
||||
throw new Exception(
|
||||
String.format(
|
||||
"Unexpected pkguniq length (expected %d): %d",
|
||||
len_expected,
|
||||
len
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// 32 bits = 4 bytes = 8 hex chars.
|
||||
// Birthday attack: sqrt(2^32) = 65536 packages
|
||||
StringBuilder hex = new StringBuilder();
|
||||
int use_len = 4;
|
||||
for ( int i = 0; i < use_len; i++ ) {
|
||||
hex.append( String.format( "%02x", digest[i] ) );
|
||||
}
|
||||
|
||||
return hex.toString();
|
||||
}
|
||||
|
||||
|
||||
private void _setTemplateParams(
|
||||
XsltTransformer t,
|
||||
HashMap<String,String> params
|
||||
|
|
Loading…
Reference in New Issue