Initial implementation of cat and friends

This is intended to be a very basic subset (for now) of cat that will be
more efficient for general I/O (mainly pipeines between shell functions)
than spawning a process. Benchmarks do show that it is definitely not always
worth the trade off, but those situations are less likely to occur (large
inputs) and, if they do, the author can be aware of it and use a function
that will prevent the builtin from being used (I'll provide that as well,
instead of `command cat`).

I'll be writing an article on this with benchmarks to rationalize and
explain in depth my approach.
cat
Mike Gerwitz 2014-06-17 23:48:13 -04:00
parent 77d4c14e5e
commit 7573a827a0
3 changed files with 320 additions and 0 deletions

View File

@ -0,0 +1,97 @@
#!/bin/bash
# Bash alternative to external cat call
#
# Copyright (C) 2014 Mike Gerwitz
#
# This file is part of pkgsh.
#
# pkgsh is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# N.B. While this shell implementation may be faster for most use cases, the
# external binary will likely be much faster for large streams.
#
# This also contains some convenience functions that are not part of
# GNU coreutils.
##
[ -z $__PKGSH_INC_COREUTILS_CAT ] || return
__PKGSH_INC_COREUTILS_CAT=1
##
# Echo characters from stdin up to (but not including) the provided
# delimiter
cat-until()
{
local -r delim="${1?Missing terminating delimiter}"
local -r file="${2:-/dev/stdin}"
read -rd "$delim" < "$file"
local -ri result=$?
echo -n "$REPLY"
return $result
}
##
# Echo characters from stdin up to and including the provided delimiter
cat-until-incl()
{
# `cat-until` will validate
local -r delim="$1"
cat-until "$@" \
&& echo -n "$delim"
}
##
# Proxies to either the shell implementation of cat or the system binary,
# depending on support
cat()
{
[[ "$1" =~ ^-[^-\ ] ]] \
&& command cat "$@" \
|| quickcat "$@"
}
##
# Limited implementation of `cat` for performance
#
# TODO: The proper research has not yet gone into optimizing this; this is
# just an initial implementation to get things going. I will be addressing
# this shortly.
#
# TODO: Exit status.
quickcat()
{
local in="${1:-/dev/stdin}"
[ "$in" == - ] && in=/dev/stdin
readonly in
while true; do
IFS= read -r || {
echo -n "$REPLY"
break
}
echo "$REPLY"
done < "$in"
if shift && [ $# -ne 0 ]; then
quickcat "$@"
fi
}

View File

@ -0,0 +1,92 @@
#!/bin/bash
# Tests cat-until{,-incl}
#
# Copyright (C) 2014 Mike Gerwitz
#
# This file is part of pkgsh.
#
# pkgsh is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
##
source src/coreutils/cat.sh
test-cat-until-no-delim()
{
local result
result="$( cat-until X <<< fooXbar )" \
|| assert -z "non-zero exit"
assert "$result" == foo
}
test-cat-until-incl-has-delim()
{
local result
result="$( cat-until-incl X <<< fooXbar )" \
|| assert -z "non-zero exit"
assert "$result" == fooX
}
test-cat-until-from-file()
{
local result
result="$( cat-until X <( echo fooXbar ) )" \
|| assert -z "non-zero exit"
assert "$result" == foo
}
test-cat-until-incl-from-file()
{
local result
result="$( cat-until-incl X <( echo fooXbar ) )" \
|| assert -z "non-zero exit"
assert "$result" == fooX
}
test-cat-until-echoes-all-on-no-delim()
{
local result
result="$( cat-until X <<< "foobar" )" \
&& assert -z "returned successfully on missing delim"
assert $? -eq 1
assert "$result" == foobar
}
test-cat-until-incl-echoes-all-on-no-delim-without-trailing-delim()
{
local result
result="$( cat-until-incl X <<< "foobar" )" \
&& assert -z "returned successfully on missing delim"
assert "$result" == foobar
}
test-cat-until-no-delim
test-cat-until-incl-has-delim
test-cat-until-from-file
test-cat-until-incl-from-file
test-cat-until-echoes-all-on-no-delim
test-cat-until-incl-echoes-all-on-no-delim-without-trailing-delim

View File

@ -0,0 +1,131 @@
#!/bin/bash
# Tests bash implementation of cat
#
# Copyright (C) 2014 Mike Gerwitz
#
# This file is part of pkgsh.
#
# pkgsh is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
##
source src/coreutils/cat.sh
set -T
declare -r qcalled=$'\001'
trap _callset RETURN
_callset()
{
# can't set a var because we're likely in a subshell
if [ "${FUNCNAME[1]}" == quickcat ]; then
# quickcat recurses
if [ "${FUNCNAME[2]}" != quickcat ]; then
echo -n "$qcalled"
fi
fi
}
_chk()
{
local given="$1" expected="$2"
[[ "$given" =~ "$qcalled"$ ]] \
|| assert -z "quickcat not called: $given"
given="${given%$qcalled}"
assert "$given" == "$expected"
}
_readall()
{
local -r var="$1"
IFS= read -rd '' "$var" || true
}
##
# Leading and trailing whitespace, be it spaces, newlines, or otherwise,
# should be retained (something that the shell does not normally like doing)
test-retains-leading-trailing-whitespace()
{
_readall val < <( echo -e "\n foo \n" )
_readall expected < <( command cat <( echo -n "$val" ) )
_readall given < <( cat <( echo -n "$val" ) )
_chk "$given" "$expected"
}
##
# If there is no trailing newline, one should not be added
test-does-not-add-trailing-whitespace()
{
_readall expected < <( command cat <( echo -n foo ) )
_readall given < <( cat <( echo -n foo ) )
_chk "$given" "$expected"
}
##
# It is, after all, what `cat` is good at
test-concatenates-multiple-files()
{
_readall expected < <( command cat <( echo -n foo ) <( echo "Bar" ) )
_readall given < <( cat <( echo -n foo ) <( echo "Bar" ) )
_chk "$given" "$expected"
}
##
# As by convention, `-` means `stdin` and can appear anywhere in the file
# list
test-can-read-stdin-via-dash()
{
_readall expected < <( command cat <( echo -n foo ) - <<< "Baz" )
_readall given < <( cat <( echo -n foo ) - <<< "Baz" )
_chk "$given" "$expected"
}
##
# If no arguments are provided, input is accepted from `stdin`
test-defaults-to-stdin()
{
_readall expected < <( command cat <<< "foo" )
_readall given < <( cat <<< "foo" )
_chk "$given" "$expected"
}
##
# We do not currently handle any options; defer to system binary
test-any-option-defers-to-binary()
{
_readall given < <( cat -E <<< "foo" )
# note that this assertion will implicitly ensure that quickcat was not
# called; see _callset
assert "$given" == 'foo$'$'\n'
}
test-concatenates-multiple-files
test-can-read-stdin-via-dash
test-defaults-to-stdin
test-retains-leading-trailing-whitespace
test-does-not-add-trailing-whitespace
test-any-option-defers-to-binary