Improve symbol table processing time

preproc:symtable-process-symbols is run on each pass (e.g. during initial
processing and after each template expansion) to introduce new symbols into
the symbol table from imports and newly discovered symbols.

This processing was previously optimized a bit using maps to reduce the cost
of symbol table lookups, but the processing was still inefficient, relying
on XSLT1-style processing (as originally written) for deduplication.  This
now uses `for-each-group` and `perform-sort` to offload the expensive
computation onto Saxon, which is much more efficient.

Symbol table processing has long been a culprit, but I hadn't attempted to
optimize further in recent months because of TAMER work.  Since TAMER has
been on pause for a few months with other things needing my attention, I
needed to provide a short-term performance improvement to keep up with
increasing build times.

DEV-11716
main
Mike Gerwitz 2022-02-17 12:30:25 -05:00
parent 1796753940
commit ce0da76ccf
3 changed files with 32 additions and 104 deletions

View File

@ -27,6 +27,9 @@ Compiler
- `TAMED_RUNTAB_OUT`, if set, will aggregate all runners' runtabs into a - `TAMED_RUNTAB_OUT`, if set, will aggregate all runners' runtabs into a
single file as jobs are completed. See `tamed --help` for more single file as jobs are completed. See `tamed --help` for more
information and examples. information and examples.
- Improved symbol table processing performance.
- For packages/maps with thousands of dependenices, this may improve
processing time by a minute or more.
Documentation Documentation
------------- -------------

View File

@ -88,7 +88,7 @@
<template match="preproc:symtable" mode="preproc:depgen" priority="9"> <template match="preproc:symtable" mode="preproc:depgen" priority="9">
<variable name="symtable" select="." /> <variable name="symtable" select="." as="element( preproc:symtable )" />
<variable name="symtable-map" as="map( xs:string, element( preproc:sym ) )" <variable name="symtable-map" as="map( xs:string, element( preproc:sym ) )"
select="map:merge( for $sym in $symtable/preproc:sym select="map:merge( for $sym in $symtable/preproc:sym

View File

@ -243,12 +243,6 @@
</call-template> </call-template>
</variable> </variable>
<!-- remove duplicates (if any) -->
<for-each-group select="$extresults/preproc:sym"
group-by="@name">
<sequence select="current-group()[ 1 ]" />
</for-each-group>
<sequence select="$extresults//preproc:error" /> <sequence select="$extresults//preproc:error" />
<!-- process symbols (except imported externs) --> <!-- process symbols (except imported externs) -->
@ -260,48 +254,9 @@
</call-template> </call-template>
</variable> </variable>
<!-- contains duplicates -->
<variable name="new-seq-map" as="map( xs:string, element( preproc:sym )+ )"
select="map:merge(
for $sym in $newresult/preproc:sym
return map{ string( $sym/@name ) : $sym },
map{ 'duplicates' : 'combine' } )" />
<variable name="new-typed-map" as="map( xs:string, element( preproc:sym ) )"
select="map:merge(
for $sym in $newresult/preproc:sym[ @type ]
return map{ string( $sym/@name ) : $sym } )" />
<variable name="nonlocals-map" as="map( xs:string, element( preproc:sym ) )"
select="map:merge(
for $sym in $newresult/preproc:sym[ not( @local = 'true' ) ]
return map{ string( $sym/@name ) : $sym } )" />
<!-- TODO: revisit this logic -->
<variable name="dedup" as="element( preproc:sym )*"
select="$newresult/preproc:sym[
not(
(
@pollute='true'
and not( @type )
and (
(
( count( $new-seq-map( @name ) ) gt 1 )
and @name=preceding-sibling::preproc:sym/@name
)
or exists( $new-typed-map( @name ) )
)
)
or (
@local = 'true'
and exists( $nonlocals-map( @name ) )
)
)
]" />
<apply-templates mode="preproc:symtable-complete" <apply-templates mode="preproc:symtable-complete"
select="$dedup"> select="$newresult/preproc:sym">
<with-param name="syms" select="$dedup" /> <with-param name="syms" select="$newresult/preproc:sym" />
</apply-templates> </apply-templates>
</preproc:symtable> </preproc:symtable>
@ -444,7 +399,6 @@
</function> </function>
<!-- TODO: revisit this mess -->
<template name="preproc:symtable-process-symbols"> <template name="preproc:symtable-process-symbols">
<param name="extresults" as="element( preproc:syms )" /> <param name="extresults" as="element( preproc:syms )" />
<param name="new" as="element( preproc:syms )" /> <param name="new" as="element( preproc:syms )" />
@ -454,90 +408,61 @@
select="preproc:symtable/preproc:sym[ select="preproc:symtable/preproc:sym[
not( @held = 'true' ) ]" /> not( @held = 'true' ) ]" />
<variable name="cursym-map" as="map( xs:string, element( preproc:sym ) )"
select="map:merge(
for $sym in $cursym
return map{ string( $sym/@name ) : $sym } )" />
<variable name="extresults-map" as="map( xs:string, element( preproc:sym ) )"
select="map:merge(
for $sym in $extresults/preproc:sym
return map{ string( $sym/@name ) : $sym } )" />
<!-- contains duplicates -->
<variable name="new-seq-map" as="map( xs:string, element( preproc:sym )+ )"
select="map:merge(
for $sym in $new/preproc:sym
return map{ string( $sym/@name ) : $sym },
map{ 'duplicates' : 'combine' } )" />
<variable name="new-overrides-map" as="map( xs:string, element( preproc:sym ) )" <variable name="new-overrides-map" as="map( xs:string, element( preproc:sym ) )"
select="map:merge( select="map:merge(
for $sym in $new/preproc:sym[ @override = 'true' ] for $sym in $new/preproc:sym[ @override = 'true' ]
return map{ string( $sym/@name ) : $sym } )" /> return map{ string( $sym/@name ) : $sym } )" />
<preproc:syms> <preproc:syms>
<sequence select="$cursym" /> <for-each-group select="$cursym,
$extresults/preproc:sym,
$new/preproc:sym[ not( @extern='true' and @src ) ]"
group-by="@name">
<!-- Unfortuantely, <sort> in this context does not sort the resulting
groups, so we must do so separately -->
<variable name="sorted" as="element( preproc:sym )*">
<perform-sort select="current-group()">
<sort select="@local" />
<sort select="@held" />
</perform-sort>
</variable>
<for-each select="$new/preproc:sym[ not( @extern='true' and @src ) ]"> <!-- first symbol of this name with non-local taking precedence -->
<variable name="name" select="@name" /> <variable name="first" select="$sorted[ 1 ]" />
<variable name="src" select="@src" />
<variable name="dupall" as="element( preproc:sym )*" <variable name="name" select="current-grouping-key()" />
select="$cursym-map( $name ), <variable name="src" select="$first/@src" />
$extresults-map( $name ),
if ( count( $new-seq-map( $name ) ) gt 1 ) then
preceding-sibling::preproc:sym[ @name = $name ]
else
()" />
<variable name="override" as="element( preproc:sym )?" <variable name="override" as="element( preproc:sym )?"
select="$new-overrides-map( @name )" /> select="$new-overrides-map( $name )" />
<choose> <choose>
<when test="@pollute='true' and not( @type )"> <when test="$first/@pollute='true' and not( $first/@type )">
<!-- we'll strip these out later --> <!-- we'll strip these out later -->
<sequence select="." /> <sequence select="$first" />
</when>
<when test="exists( $override ) and not( $override is . )">
<!-- overridden; we're obsolete :( -->
</when> </when>
<!-- if we've gotten this far, then the override is good; clear it <!-- if we've gotten this far, then the override is good; clear it
so as not to trigger override errors --> so as not to trigger override errors -->
<when test="@override='true'"> <when test="$override">
<copy> <preproc:sym>
<sequence select="@*[ not( name()='override' ) ]" /> <sequence select="$override/@*[ not( name()='override' ) ]" />
<!-- mark this has having been overridden for the linker (see <!-- mark this has having been overridden for the linker (see
TAMER; we'll hopefully be getting rid of overrides in the TAMER; we'll hopefully be getting rid of overrides in the
future) --> future) -->
<attribute name="isoverride" select="'true'" /> <attribute name="isoverride" select="'true'" />
<sequence select="*" /> <sequence select="$override/*" />
</copy> </preproc:sym>
</when>
<!-- if we have already imported the symbol as local, but this one
is non-local (exportable), then this one takes precedence -->
<when test="not( @local = 'true' )
and $dupall[ @local = 'true' ]
and not( $dupall[ not( @local = 'true' ) ] )">
<sequence select="." />
</when>
<when test="$dupall[ @type ]">
<!-- there is already a symbol of this name from the same package;
let's not add duplicates -->
</when> </when>
<otherwise> <otherwise>
<!-- this symbol is good; use it --> <!-- this symbol is good; use it -->
<sequence select="." /> <sequence select="$first" />
</otherwise> </otherwise>
</choose> </choose>
</for-each> </for-each-group>
</preproc:syms> </preproc:syms>
</template> </template>