diff --git a/tools/csv2xml b/tools/csv2xml new file mode 100755 index 00000000..60bd7451 --- /dev/null +++ b/tools/csv2xml @@ -0,0 +1,126 @@ +#!/usr/bin/awk -f +# +# Compiles the given CSV into a table definition +# +# Copyright (C) 2016 LoVullo Associates, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +## + + +function columngen( header ) +{ + # output a field constant for each field in the header + i = 0 + while ( field = header[ ++i ] ) + { + printf " \n", + field, + ( i - 1 ), + ( seq[ i ] ) ? "true" : "false" + } +} + + +function seqchk( last ) +{ + # if there's no last row, then do not bother + i = 0 + while ( i++ < NF ) + { + if ( seq[ i ] == "" ) seq[ i ] = 1 + + # this field is sequential if it is greater than or equal to the last field + # (we don't check for descending [yet]); note that on the first check, last + # will be empty and therefore this check will succeed (properly + # initializing seq[i] to 1) + seq[ i ] = seq[ i ] && ( $(i) >= last[ i ] ) + } +} + + +# header +BEGIN { + rootpath = "../../../" + file = ARGV[1] + + # grab only the filename (remove all preceding directories and the file ext) + name = gensub( /^.*\/|\.[^.]+$/, "", "g", file ) + + + # output package header + printf \ + "\n" \ + "\n\n" \ + " \n\n" \ + " \n" \ + " \n\n", \ + rootpath, name + + # the first row of the CSV is the header representing the column identifiers + getline + split( $0, header, /,/ ) + + # table constant identifier + tconst = toupper( gensub( /-/, "_", "g", name ) ) "_RATE_TABLE" + + # generate the header for the table constant + printf " \n", name + + printf "%s", " 1 ) ? "," : "" ) $(i) + } + + print ";" + + seqchk( last ) + split( $0, last ) +} + + +# footer +END { + # end of table-rows node + print "\" />" + + # columns can't be generated until after we know which ones represent + # sequential data + columngen( header ) + + print " " + print "" +} diff --git a/tools/csvi b/tools/csvi new file mode 100755 index 00000000..6b9af78c --- /dev/null +++ b/tools/csvi @@ -0,0 +1,138 @@ +#!/usr/bin/awk -f +# +# Performs interpolation for columns in a CSV and outputs the result +# +# Copyright (C) 2016 LoVullo Associates, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Configurable values (use -vname=value from command line): +# step - use predeterminated step instead of calculating from first two rows +## + +function storeline() +{ + for ( i = 1; i <= hlen; i++ ) { + prev[i] = $i + } +} + +function clearline() +{ + for ( i = 1; i <= hlen; i++ ) { + prev[i] = 0 + } +} + +function getprev() +{ + for ( i = 1; i <= hlen; i++ ) { + $i = prev[i] + } +} + + +function interpolate() +{ + lastval = prev[1] + + curval = $1 + diff = curval - lastval + + # does this value fall in line with the requested step? + if ( diff == step ) + { + storeline() + + # we're good; continue + print + next + } + + # if we do not yet have a value large enough to reach our step, then continue + # until we do (do not store this line) + n = int( diff / step ) + if ( n <= 0 ) { + next + } + + # determine interpolation values + for ( i = 2; i <= hlen; i++ ) { + ival[i] = ( ( $i - prev[i] ) / n ) + } + + getprev() + + # let us interpolate values that are divisible by the step + do + { + # increase the last value by our step + $1 += step + + # interpolate each column value (notice that we skip the first column, which + # was handled directly above) + for ( i = 2; i <= hlen; i++ ) { + $i += ival[i] + } + + # print the new line + print + } while ( ( diff -= step ) > 0 ) + + # anything remaining does not fit into our step and will be ignored; we'll + # continue with our next step at the next line + + # consider this to be our last line + storeline() +} + + +BEGIN { + # the first row of the CSV is the header representing the column identifiers + getline + hlen = split( $0, header, /,/ ) + + # output the header + print $0 + + # delimit fields by commas (the field separator for CSVs); note that this + # won't work properly if strings contain commas + FS = OFS = "," + + clearline() + getline + + # if no step was provided, then calculate one based on the first two rows + if ( step == 0 ) { + # output the first row, which does not need to be interpolated + print + + # compute the step + vala = $1 + getline + valb = $1 + step = valb - vala + + # since the second line is used to determine the step, then it must match the + # step and therefore is good to output + print + + # begin. + storeline() + } +} + + +# for each row +{ interpolate() } diff --git a/tools/csvm2csv b/tools/csvm2csv new file mode 100755 index 00000000..3bcbc7b4 --- /dev/null +++ b/tools/csvm2csv @@ -0,0 +1,128 @@ +#!/usr/bin/awk -f +# +# Compiles a "magic" CSV file into a normal CSV +# +# Copyright (C) 2016 LoVullo Associates, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# "Magic" CSVs simply exist to make life easier: they permit comments, blank +# lines, variables, sub-delimiter expansion, and any number of ranges per line. +# Ranges will be expanded in every combination, making rate tables highly +# maintainable. +# +# Variables are also supported when defined using :var=val. Variables may +# expand into ranges, 'cause they're awesome. Multiple variables may be +# delimited by semi-colons, as may multiple values. +# +# For example: +# :foo=1--3 +# $foo;7;9--10:$foo, 5--10 +# +# Would generate: +# 1, 5 +# 1, 6 +# ... +# 5, 10 +# 2, 5 +# ... +# 9, 5 +# ... +# 1, 5 +# 1, 6 +# ... +## + + +function rangeout( i, m, j, me, orig ) +{ + if ( i > NF ) + { + print + return + } + + orig = $i + + # check first for delimiters + if ( match( $i, /^([^;]+);(.*)$/, m ) ) + { + # give it a shot with the first value + $i = m[1] + rangeout( i ) + + # strip off the first value and process with following value(s) + $i = m[2] + rangeout( i ) + + # we've delegated; we're done + $i = orig + return + } + + # attempt to parse variable (may expand into a range) + if ( match( $i, /^\$([a-zA-Z_-]+)$/, m ) ) + { + $i = vars[ m[1] ]; + } + + # parse range + if ( match( $i, /^([0-9]+)--([0-9]+)$/, m ) ) + { + j = m[1] + me = m[2] + do + { + $i = j + rangeout( i + 1 ) + } while ( j++ < me ) + } + else + { + rangeout( i + 1 ); + } + + # restore to original value + $i = orig +} + + +BEGIN { + # we're parsing CSVs + FS = " *, *" + OFS = "," +} + + +# skip all lines that begin with `#', which denotes a comment, or are empty +/^#|^$/ { next; } + +# lines that begin with a colon are variable definitions +/^:/ { + match( $0, /^:([a-zA-Z_-]+)=(.*?)$/, m ) + vars[ m[1] ] = m[2] + next +} + +# lines containing ranges (denoted by `--', the en dash, which is a typesetting +# convetion for ranges), sub-delimiters, or variables must be expanded +/--|;|\$[a-zA-Z_-]/ { rangeout( 1 ); next; } + +# all other lines are normal; simply output them verbatim +{ + # this assignment will ensure that awk processes the output, ensuring that + # extra spaces between commas are stripped + $1=$1 + print +} diff --git a/tools/gen-make b/tools/gen-make new file mode 100755 index 00000000..48f53bad --- /dev/null +++ b/tools/gen-make @@ -0,0 +1,111 @@ +#!/bin/bash +# +# Generates Makefile containing dependencies for each package +# +# Copyright (C) 2016 LoVullo Associates, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +## + +# windows machines may not have the tools to resolve a path, so let's do so +# ourselves (TODO: there's better (and more performant) ways of doing this than +# repeated string replacements); TODO: ./ +resolv-path() +{ + # no need to do anything if the string does not contain a parent dir reference + # (we use this convoluted string replacement check for woe32/64 to prevent + # additional spawns (e.g. sed) that would slow us down and because =~ is not + # properly supported in msys + [[ "$1" != "${1/..\//}"a ]] || { + echo "$1" + return + } + + local path= + while read name; do + if [ "$name" == .. ]; then + [ -n "$path" ] || { + echo "warning: will not resolve $1" >&2 + return 5 + } + + path="${path%/*}" + continue + fi + + path="$path/$name" + done <<< "${1//\//$'\n'}" + + # echo path without leading / + echo -n "${path:1}" +} + + +# rule for building +[ -z "$GEN_MAKE" ] && { + echo "%.xmlo:: %.tmp" + echo -e "\t@rm -f \$@ \$<" + [ -n "$xmlo_cmd" ] \ + && echo -e "\t$xmlo_cmd" \ + || echo -e "\ttouch \$@" + + echo "%.xmlo:: %.xml | prexmlo" + [ -n "$xmlo_cmd" ] \ + && echo -e "\t$xmlo_cmd" \ + || echo -e "\ttouch \$@" + + export GEN_MAKE="$( pwd )/$0" + exec "$GEN_MAKE" "$@" +} + +until [ $# -eq 0 ]; do ( + path="${1%%/}" + echo "[gen-make] scanning $path" >&2 + + cd "$( basename $path )/" || exit $? + + deps=$( find -maxdepth 1 -iname '*.dep' ) + for dpath in $deps; do + # equivalent to basename command; use this since spawning processes on + # windoze is slow as shit (originally we did find -exec bashename) + d="${dpath##*/}" + + echo "[gen-make] found $path/$d" >&2 + echo -n "$path/${d%.*}.xmlo:" + + # output deps + while read dep; do + # if the first character is a slash, then it's relative to the project + # root---the resolution has already been done for us! + if [ "${dep:0:1}" == '/' ]; then + echo -n " ${dep:1}.xmlo" + continue + fi + + echo -n ' ' + resolv-path "$path/$dep.xmlo" + done < "$d" + + echo + done + + # recurse on every subdirectory + for p in */; do + [ "$p" == ./ -o "$p" == ../ ] && continue + [ ! -d "$p" ] || "$GEN_MAKE" "$path/$p" || { + echo "fatal: failed to recurse on $( pwd )/$path/$p" >&2 + exit 1 + } + done +); shift; done diff --git a/tools/lib/zipre.php b/tools/lib/zipre.php new file mode 100644 index 00000000..d0197946 --- /dev/null +++ b/tools/lib/zipre.php @@ -0,0 +1,142 @@ +. + */ + + +function gen_re_quick( $data ) +{ + $re = ( '^' . gen_re( $data, 0 ) ); + + // attempt to simplify the regex (we're not going to put a lot of effort into + // this) + return re_simplify( $re ); +} + + +function gen_re( $data, $offset ) +{ + // if we've reached the end of the zip length, or if there's no more zips to + // look at, then stop + if ( ( count( $data ) === 0 ) + || ( $offset === 5 ) + ) + { + return ''; + } + + $out = '('; + + // loop through each digit at the current offset + $last = ''; + foreach ( $data as $zip ) + { + if ( !( isset( $zip[ $offset ] ) ) ) + { + continue; + } + + $digit = $zip[ $offset ]; + + // if we've already seen this digit in the current position, then + // continue + if ( $digit === $last ) + { + continue; + } + + // we're going to recurse now, delimiting allowable digits with pipes + // (for 'OR'); we'll recurse on a sublist that matches the zip up to + // (and including) the current digit (to do this, note that we only need + // to check the current digit, since our current list is already a + // sublist of the parent list up to the current point) + $prefix = substr( $zip, 0, $offset + 1 ); + + $out .= ( $last === '' ) ? '' : '|'; + $out .= $digit . gen_re( + filter_data( $data, $digit, $offset ), + ( $offset + 1 ) + ); + + $last = $digit; + } + + return $out . ')'; +} + +function filter_data( $data, $chr, $offset ) +{ + $ret = array(); + + foreach ( $data as $val ) + { + if ( $val[ $offset] === $chr ) + { + $ret[] = $val; + } + } + + return $ret; +} + +function re_simplify( $re ) +{ + // the only simplification we currently do is joining sequential digit ORs + // into a character range (e.g. (1|2|3|4) becomes [1-4]) + return preg_replace_callback( '/\([0-9](\|[0-9])*\)/', function( $results ) + { + $match = $results[ 0 ]; + $digits = explode( '|', str_replace( array( '(', ')' ), '', $match ) ); + + // are the digits sequential (we will only perform this optimization if + // there's more than 3 digits, since otherwise the replacement would + // result in a string of equal or greater length)? + if ( ( count( $digits ) > 3 ) && is_seq( $digits ) ) + { + return sprintf( '[%d-%d]', + $digits[ 0 ], + $digits[ count( $digits ) - 1 ] + ); + } + elseif ( count( $digits ) === 1 ) + { + // if there's only one digit, then that's all we need to return + return $digits[ 0 ]; + } + + return '[' . implode( '', $digits ) . ']'; + }, $re ); +} + +function is_seq( $digits, $last = '' ) +{ + // stop recursing once we're out of digits + if ( count( $digits ) === 0 ) + { + return true; + } + + // grab the current digit and remove it from the list (this has the effect + // of both cons and cdr) + $digit = (int)( array_shift( $digits ) ); + + // consider this a sequence if this digit is one more than the last (or if + // there is no last digit) and if the following digit is sequential + return ( ( $last === '' ) || ( $digit === ( $last + 1) ) ) + && is_seq( $digits, $digit ); +} diff --git a/tools/tdat2xml b/tools/tdat2xml new file mode 100755 index 00000000..e08ca7c0 --- /dev/null +++ b/tools/tdat2xml @@ -0,0 +1,294 @@ +#!/usr/bin/env php +. + */ +?> + +' . "\n"; + } + + return sprintf( + '' . + "\n%s" . + " %s\n" . + "\n\n", + $name, + gen_identifier( $id ), + $desc, + $yields, + $prev_value, + gen_any_block( $queue, $or ) + ); +} + + +function gen_any_block( $queue, $or ) +{ + $any = gen_zip_re( $queue ) . + gen_on_class( $or ); + + return ( $any ) + ? '' . $any . '' + : ''; +} + + +function gen_zip_re( $data ) +{ + if ( count( $data ) === 0 ) + { + return ''; + } + + return sprintf( + '', + gen_re_quick( $data ) + ); +} + +function gen_on_class( $data ) +{ + if ( count( $data ) === 0 ) + { + return ''; + } + + $cur = array_shift( $data ); + + return sprintf( + '%s', + $cur, + gen_on_class( $data ) + ); +} + +function gen_identifier( $id ) +{ + return is_numeric( $id ) + ? $id + : '-' . strtolower( $id ); +} + +function gen_identifier_value( $id ) +{ + // for non-numeric identifiers, return ascii value + // of character to represent our value + return is_numeric( $id ) + ? $id + : ord( $id ); +} + +$file = $argv[ 1 ]; +$fdat = explode( '.', basename( $file ) ); +$name = $fdat[ 0 ]; + +$cur = ''; +$queue = array(); +$or = array(); + +$fh = fopen( $file, 'r' ); + +echo 'name="rates/territories/', $name, '" ', "\n", + 'desc="', ucfirst( $name ), ' territory classifications">' . "\n\n"; + +echo "\n\n"; + +$ids = array(); +$params = array(); +$imports = array(); +$prev_yields = ''; +$prev_yields_all = array(); +$classes = ''; + +$param_type = 'terrType' . ucfirst( $name ); + +while ( true ) +{ + // read the line within the loop so that we do not terminate until after we + // treat eof as an empty line + $line = str_replace( array( "\n", "\r" ), '', fgets( $fh ) ); + + if ( !$cur ) + { + if ( substr( $line, 0, 12 ) === '@import-pkg ' ) + { + $imports[] = substr( $line, 12 ); + continue; + } + + // we expect this line to be a territory descriptor + try + { + list ( $id, $desc ) = parse_tdesc( $line ); + } + catch ( Exception $e ) + { + fwrite( STDERR, 'Invalid territory descriptor: ' . $line ); + exit( 1 ); + } + + $ids[] = $id; + $cur = $id; + } + elseif ( ( $line === '' ) || feof( $fh ) ) + { + // generate param for typedef + $params[ $id ] = $desc; + + // if there's nothing in the queue, then treat this as an 'ROS' (this + // should appear as the *last* territory, or it will not function as + // expected) + if ( count( $queue ) === 0 ) + { + $prev = $prev_yields_all; + } + else + { + $prev = array( $prev_yields ); + } + + // generate the classification + $classes .= gen_classification( $id, $name, $desc, $prev, $queue, $or ); + + // this accomplishes two things: (1) avoids regexes if there's a + // previous match and (2) ensures that we cannot possibly match multiple + // territories + $prev_yields = gen_yields( $id, $name ); + $prev_yields_all[] = $prev_yields; + + $cur = ''; + $queue = array(); + $or = array(); + + if ( feof( $fh ) ) + { + break; + } + } + elseif ( $line[0] === '=' ) + { + // =foo means match on classification @yields "foo" + $or[] = substr( $line, 1 ); + } + else + { + $queue[] = $line; + } +} + +$param_name = 'territory_' . $name; +?> + + + + + + + + + + + + + + + + + $desc ) { ?> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/zipre b/tools/zipre new file mode 100755 index 00000000..e16a5e9a --- /dev/null +++ b/tools/zipre @@ -0,0 +1,37 @@ +#!/usr/bin/env php +. + * + * I wanted to write this in Scheme (it's a perfect recursive application), but + * I figured that other developers may get annoyed having to find a Scheme impl + * that works for them...so...PHP it is... + * + * THIS SCRIPT EXPECTS THE DATA TO BE SORTED! This can be easily accomplished by + * doing the following: + * sort -d zipfile | ./zipre + */ + +include 'lib/zipre.php'; + +// grab input from stdin (must be sorted!) +$data = explode( "\n", file_get_contents( 'php://stdin' ) ); + +// build and output +echo gen_re_quick( $data );