tame: Introduce __pkguniq and preproc:pkg-generate-id to replace generate-id
This modifies the XSLT-based compiler to generate ids that are expected to be unique across packages. No such guarantee exists today; `generate-id()` relies on the position of the node within a tree, which could easily be the same across multiple compiler invocations for separate packages. This situation seldom occurs, but has happened with increased frequency lately in a system with >1000 packages. It is more likely to occur in packages that are very similar to one-another or where the beginning of the package is similar (such as packages used as configuration for taxes for each individual state). This derives a SHA-256 hash from the canonical package name (well, not canonical acccording to TAMER, but close: without the leading slash), truncating it to 32 bits. I used a birthday attack to estimate what the size of this value ought to be: sqrt(2^32) = 65536, which is way more packages than the poor XSLT-based compiler is going to handle. If ever it needs to be increased due to conflicts, that is simple enough. DEV-14965main
parent
bdd98a5d92
commit
418bd34005
|
@ -26,7 +26,9 @@
|
||||||
template that is intended for use with dslc should include this.
|
template that is intended for use with dslc should include this.
|
||||||
-->
|
-->
|
||||||
<stylesheet version="2.0"
|
<stylesheet version="2.0"
|
||||||
xmlns="http://www.w3.org/1999/XSL/Transform">
|
xmlns="http://www.w3.org/1999/XSL/Transform"
|
||||||
|
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||||
|
xmlns:preproc="http://www.lovullo.com/rater/preproc">
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
Absolute path to root of TAME
|
Absolute path to root of TAME
|
||||||
|
@ -80,6 +82,26 @@
|
||||||
-->
|
-->
|
||||||
<param name="__rseed" />
|
<param name="__rseed" />
|
||||||
|
|
||||||
|
<!--
|
||||||
|
A package-unique string
|
||||||
|
|
||||||
|
You should use `preproc:pkg-generate-id` instead of this value directly.
|
||||||
|
|
||||||
|
This value is deterministic, derived from `__srcpkg`, and so will not
|
||||||
|
change between runs; it can be used to generate identifier names that are
|
||||||
|
unique across packages, which is not something that we can rely on
|
||||||
|
`generate-id()` for on its own.
|
||||||
|
|
||||||
|
In practice, this can be concatenated with other generated strings,
|
||||||
|
including `generate-id()`-derived strings.
|
||||||
|
|
||||||
|
_There is no guarantee that this string will begin with a letter_, so you
|
||||||
|
should generate your identifiers accordingly.
|
||||||
|
|
||||||
|
See `DslCompiler.java` for implementation.
|
||||||
|
-->
|
||||||
|
<param name="__pkguniq" as="xs:string" />
|
||||||
|
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
Root node of template on which stylesheet was invoked
|
Root node of template on which stylesheet was invoked
|
||||||
|
@ -114,4 +136,12 @@
|
||||||
</choose>
|
</choose>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
|
<function name="preproc:pkg-generate-id" as="xs:string">
|
||||||
|
<param name="refnode" as="node()" />
|
||||||
|
|
||||||
|
<sequence select="concat(
|
||||||
|
'_pu', $__pkguniq, '_',
|
||||||
|
generate-id( $refnode ) )" />
|
||||||
|
</function>
|
||||||
|
|
||||||
</stylesheet>
|
</stylesheet>
|
||||||
|
|
|
@ -166,7 +166,7 @@
|
||||||
<template match="c:let[ not( @name ) ]" mode="preproc:expand" priority="5">
|
<template match="c:let[ not( @name ) ]" mode="preproc:expand" priority="5">
|
||||||
<copy>
|
<copy>
|
||||||
<sequence select="@*" />
|
<sequence select="@*" />
|
||||||
<attribute name="name" select="generate-id(.)" />
|
<attribute name="name" select="preproc:pkg-generate-id(.)" />
|
||||||
|
|
||||||
<apply-templates select="*" mode="preproc:expand" />
|
<apply-templates select="*" mode="preproc:expand" />
|
||||||
</copy>
|
</copy>
|
||||||
|
|
|
@ -415,11 +415,7 @@
|
||||||
<template match="lv:any|lv:all" mode="preproc:class-groupgen" priority="5">
|
<template match="lv:any|lv:all" mode="preproc:class-groupgen" priority="5">
|
||||||
<param name="legacy-classify" as="xs:boolean" tunnel="yes" />
|
<param name="legacy-classify" as="xs:boolean" tunnel="yes" />
|
||||||
|
|
||||||
<!-- this needs to be unique enough that there is unlikely to be a conflict
|
<variable name="id" select="preproc:pkg-generate-id(.)" />
|
||||||
between generated ids in various packages; generate-id is not enough for
|
|
||||||
cross-package guarantees (indeed, I did witness conflicts), so there is
|
|
||||||
a random seed passed into the stylesheet externally -->
|
|
||||||
<variable name="id" select="concat( $__rseed, generate-id(.) )" />
|
|
||||||
|
|
||||||
<variable name="parent-name" select="ancestor::lv:classify/@as" />
|
<variable name="parent-name" select="ancestor::lv:classify/@as" />
|
||||||
<variable name="yields" select="concat( 'is', $id )" />
|
<variable name="yields" select="concat( 'is', $id )" />
|
||||||
|
|
|
@ -411,7 +411,7 @@
|
||||||
with an optional looping construct
|
with an optional looping construct
|
||||||
-->
|
-->
|
||||||
<template match="lv:inline-template" mode="preproc:macros" priority="5">
|
<template match="lv:inline-template" mode="preproc:macros" priority="5">
|
||||||
<variable name="name" select="concat( '___i', generate-id(.), '___' )" />
|
<variable name="name" select="concat( '___i', preproc:pkg-generate-id(.), '___' )" />
|
||||||
<variable name="inline" select="." />
|
<variable name="inline" select="." />
|
||||||
|
|
||||||
<!-- generate template -->
|
<!-- generate template -->
|
||||||
|
@ -1155,7 +1155,7 @@
|
||||||
tunnel="yes" />
|
tunnel="yes" />
|
||||||
|
|
||||||
<value-of select="." />
|
<value-of select="." />
|
||||||
<value-of select="generate-id( $apply )" />
|
<value-of select="preproc:pkg-generate-id( $apply )" />
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ package com.lovullo.dslc;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
|
import java.security.MessageDigest;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import javax.xml.transform.Source;
|
import javax.xml.transform.Source;
|
||||||
|
@ -163,6 +164,10 @@ public class DslCompiler
|
||||||
new QName( "__rseed" ),
|
new QName( "__rseed" ),
|
||||||
XdmValue.makeValue( (int)( Math.random() * 10e6 ) )
|
XdmValue.makeValue( (int)( Math.random() * 10e6 ) )
|
||||||
);
|
);
|
||||||
|
t.setParameter(
|
||||||
|
new QName( "__pkguniq" ),
|
||||||
|
XdmValue.makeValue( _createPkgUniq( srcpkg ) )
|
||||||
|
);
|
||||||
|
|
||||||
_setTemplateParams( t, params );
|
_setTemplateParams( t, params );
|
||||||
|
|
||||||
|
@ -173,6 +178,51 @@ public class DslCompiler
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Generate an identifier that is expected to be unique given a
|
||||||
|
// canonical package name.
|
||||||
|
//
|
||||||
|
// This produces a string that is expected to
|
||||||
|
// - Be usable as a non-leading component of a C-style identifier;
|
||||||
|
// - Provide enough entropy so as to be unlikely to cause
|
||||||
|
// conflicts between thousands of packages; and
|
||||||
|
// - Is reasonably short so as not to bloat generated identifier
|
||||||
|
// sizes too greatly.
|
||||||
|
private String _createPkgUniq(String srcpkg) throws Exception {
|
||||||
|
// This is used only for uniqueness, not security. The choice
|
||||||
|
// of hash function is not particularly important so long as it
|
||||||
|
// provides a good distribution. If collisions are encountered
|
||||||
|
// between packages with a good algorithm, increase the
|
||||||
|
// truncation length.
|
||||||
|
MessageDigest md = MessageDigest.getInstance( "SHA-256" );
|
||||||
|
byte[] digest = md.digest( srcpkg.getBytes( "UTF-8" ) );
|
||||||
|
|
||||||
|
// Ensure we received a string of the expected length, otherwise
|
||||||
|
// we risk sending a non-unique id off to the XSLT-based
|
||||||
|
// compiler.
|
||||||
|
int len_expected = 32; // 256 bits
|
||||||
|
int len = digest.length;
|
||||||
|
if ( len != len_expected ) {
|
||||||
|
throw new Exception(
|
||||||
|
String.format(
|
||||||
|
"Unexpected pkguniq length (expected %d): %d",
|
||||||
|
len_expected,
|
||||||
|
len
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 32 bits = 4 bytes = 8 hex chars.
|
||||||
|
// Birthday attack: sqrt(2^32) = 65536 packages
|
||||||
|
StringBuilder hex = new StringBuilder();
|
||||||
|
int use_len = 4;
|
||||||
|
for ( int i = 0; i < use_len; i++ ) {
|
||||||
|
hex.append( String.format( "%02x", digest[i] ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
return hex.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private void _setTemplateParams(
|
private void _setTemplateParams(
|
||||||
XsltTransformer t,
|
XsltTransformer t,
|
||||||
HashMap<String,String> params
|
HashMap<String,String> params
|
||||||
|
|
Loading…
Reference in New Issue