From 418bd34005045b098312a3a5eaffa0a28590e7c5 Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Tue, 19 Sep 2023 16:18:04 -0400 Subject: [PATCH] tame: Introduce __pkguniq and preproc:pkg-generate-id to replace generate-id This modifies the XSLT-based compiler to generate ids that are expected to be unique across packages. No such guarantee exists today; `generate-id()` relies on the position of the node within a tree, which could easily be the same across multiple compiler invocations for separate packages. This situation seldom occurs, but has happened with increased frequency lately in a system with >1000 packages. It is more likely to occur in packages that are very similar to one-another or where the beginning of the package is similar (such as packages used as configuration for taxes for each individual state). This derives a SHA-256 hash from the canonical package name (well, not canonical acccording to TAMER, but close: without the leading slash), truncating it to 32 bits. I used a birthday attack to estimate what the size of this value ought to be: sqrt(2^32) = 65536, which is way more packages than the poor XSLT-based compiler is going to handle. If ever it needs to be increased due to conflicts, that is simple enough. DEV-14965 --- src/current/include/dslc-base.xsl | 32 +++++++++++- src/current/include/preproc/expand.xsl | 2 +- src/current/include/preproc/macros.xsl | 6 +-- src/current/include/preproc/template.xsl | 4 +- .../src/com/lovullo/dslc/DslCompiler.java | 50 +++++++++++++++++++ 5 files changed, 85 insertions(+), 9 deletions(-) diff --git a/src/current/include/dslc-base.xsl b/src/current/include/dslc-base.xsl index 7950b8ab..c232ea04 100644 --- a/src/current/include/dslc-base.xsl +++ b/src/current/include/dslc-base.xsl @@ -26,7 +26,9 @@ template that is intended for use with dslc should include this. --> + xmlns="http://www.w3.org/1999/XSL/Transform" + xmlns:xs="http://www.w3.org/2001/XMLSchema" + xmlns:preproc="http://www.lovullo.com/rater/preproc"> + + + - + diff --git a/src/current/include/preproc/template.xsl b/src/current/include/preproc/template.xsl index dcb46f82..295d1cf4 100644 --- a/src/current/include/preproc/template.xsl +++ b/src/current/include/preproc/template.xsl @@ -411,7 +411,7 @@ with an optional looping construct --> diff --git a/src/current/src/com/lovullo/dslc/DslCompiler.java b/src/current/src/com/lovullo/dslc/DslCompiler.java index 87de3ddc..f3d2db57 100644 --- a/src/current/src/com/lovullo/dslc/DslCompiler.java +++ b/src/current/src/com/lovullo/dslc/DslCompiler.java @@ -31,6 +31,7 @@ package com.lovullo.dslc; import java.io.*; import java.nio.file.Path; import java.nio.file.Paths; +import java.security.MessageDigest; import java.util.HashMap; import java.util.Map; import javax.xml.transform.Source; @@ -163,6 +164,10 @@ public class DslCompiler new QName( "__rseed" ), XdmValue.makeValue( (int)( Math.random() * 10e6 ) ) ); + t.setParameter( + new QName( "__pkguniq" ), + XdmValue.makeValue( _createPkgUniq( srcpkg ) ) + ); _setTemplateParams( t, params ); @@ -173,6 +178,51 @@ public class DslCompiler } + // Generate an identifier that is expected to be unique given a + // canonical package name. + // + // This produces a string that is expected to + // - Be usable as a non-leading component of a C-style identifier; + // - Provide enough entropy so as to be unlikely to cause + // conflicts between thousands of packages; and + // - Is reasonably short so as not to bloat generated identifier + // sizes too greatly. + private String _createPkgUniq(String srcpkg) throws Exception { + // This is used only for uniqueness, not security. The choice + // of hash function is not particularly important so long as it + // provides a good distribution. If collisions are encountered + // between packages with a good algorithm, increase the + // truncation length. + MessageDigest md = MessageDigest.getInstance( "SHA-256" ); + byte[] digest = md.digest( srcpkg.getBytes( "UTF-8" ) ); + + // Ensure we received a string of the expected length, otherwise + // we risk sending a non-unique id off to the XSLT-based + // compiler. + int len_expected = 32; // 256 bits + int len = digest.length; + if ( len != len_expected ) { + throw new Exception( + String.format( + "Unexpected pkguniq length (expected %d): %d", + len_expected, + len + ) + ); + } + + // 32 bits = 4 bytes = 8 hex chars. + // Birthday attack: sqrt(2^32) = 65536 packages + StringBuilder hex = new StringBuilder(); + int use_len = 4; + for ( int i = 0; i < use_len; i++ ) { + hex.append( String.format( "%02x", digest[i] ) ); + } + + return hex.toString(); + } + + private void _setTemplateParams( XsltTransformer t, HashMap params