Initial implementation of cat and friends
This is intended to be a very basic subset (for now) of cat that will be more efficient for general I/O (mainly pipeines between shell functions) than spawning a process. Benchmarks do show that it is definitely not always worth the trade off, but those situations are less likely to occur (large inputs) and, if they do, the author can be aware of it and use a function that will prevent the builtin from being used (I'll provide that as well, instead of `command cat`). I'll be writing an article on this with benchmarks to rationalize and explain in depth my approach.cat
parent
77d4c14e5e
commit
7573a827a0
|
@ -0,0 +1,97 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Bash alternative to external cat call
|
||||||
|
#
|
||||||
|
# Copyright (C) 2014 Mike Gerwitz
|
||||||
|
#
|
||||||
|
# This file is part of pkgsh.
|
||||||
|
#
|
||||||
|
# pkgsh is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
# N.B. While this shell implementation may be faster for most use cases, the
|
||||||
|
# external binary will likely be much faster for large streams.
|
||||||
|
#
|
||||||
|
# This also contains some convenience functions that are not part of
|
||||||
|
# GNU coreutils.
|
||||||
|
##
|
||||||
|
|
||||||
|
[ -z $__PKGSH_INC_COREUTILS_CAT ] || return
|
||||||
|
__PKGSH_INC_COREUTILS_CAT=1
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Echo characters from stdin up to (but not including) the provided
|
||||||
|
# delimiter
|
||||||
|
cat-until()
|
||||||
|
{
|
||||||
|
local -r delim="${1?Missing terminating delimiter}"
|
||||||
|
local -r file="${2:-/dev/stdin}"
|
||||||
|
|
||||||
|
read -rd "$delim" < "$file"
|
||||||
|
local -ri result=$?
|
||||||
|
|
||||||
|
echo -n "$REPLY"
|
||||||
|
return $result
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Echo characters from stdin up to and including the provided delimiter
|
||||||
|
cat-until-incl()
|
||||||
|
{
|
||||||
|
# `cat-until` will validate
|
||||||
|
local -r delim="$1"
|
||||||
|
cat-until "$@" \
|
||||||
|
&& echo -n "$delim"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Proxies to either the shell implementation of cat or the system binary,
|
||||||
|
# depending on support
|
||||||
|
cat()
|
||||||
|
{
|
||||||
|
[[ "$1" =~ ^-[^-\ ] ]] \
|
||||||
|
&& command cat "$@" \
|
||||||
|
|| quickcat "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Limited implementation of `cat` for performance
|
||||||
|
#
|
||||||
|
# TODO: The proper research has not yet gone into optimizing this; this is
|
||||||
|
# just an initial implementation to get things going. I will be addressing
|
||||||
|
# this shortly.
|
||||||
|
#
|
||||||
|
# TODO: Exit status.
|
||||||
|
quickcat()
|
||||||
|
{
|
||||||
|
local in="${1:-/dev/stdin}"
|
||||||
|
[ "$in" == - ] && in=/dev/stdin
|
||||||
|
readonly in
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
IFS= read -r || {
|
||||||
|
echo -n "$REPLY"
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "$REPLY"
|
||||||
|
done < "$in"
|
||||||
|
|
||||||
|
if shift && [ $# -ne 0 ]; then
|
||||||
|
quickcat "$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Tests cat-until{,-incl}
|
||||||
|
#
|
||||||
|
# Copyright (C) 2014 Mike Gerwitz
|
||||||
|
#
|
||||||
|
# This file is part of pkgsh.
|
||||||
|
#
|
||||||
|
# pkgsh is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
##
|
||||||
|
|
||||||
|
source src/coreutils/cat.sh
|
||||||
|
|
||||||
|
|
||||||
|
test-cat-until-no-delim()
|
||||||
|
{
|
||||||
|
local result
|
||||||
|
result="$( cat-until X <<< fooXbar )" \
|
||||||
|
|| assert -z "non-zero exit"
|
||||||
|
|
||||||
|
assert "$result" == foo
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test-cat-until-incl-has-delim()
|
||||||
|
{
|
||||||
|
local result
|
||||||
|
result="$( cat-until-incl X <<< fooXbar )" \
|
||||||
|
|| assert -z "non-zero exit"
|
||||||
|
|
||||||
|
assert "$result" == fooX
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test-cat-until-from-file()
|
||||||
|
{
|
||||||
|
local result
|
||||||
|
result="$( cat-until X <( echo fooXbar ) )" \
|
||||||
|
|| assert -z "non-zero exit"
|
||||||
|
|
||||||
|
assert "$result" == foo
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test-cat-until-incl-from-file()
|
||||||
|
{
|
||||||
|
local result
|
||||||
|
result="$( cat-until-incl X <( echo fooXbar ) )" \
|
||||||
|
|| assert -z "non-zero exit"
|
||||||
|
|
||||||
|
assert "$result" == fooX
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test-cat-until-echoes-all-on-no-delim()
|
||||||
|
{
|
||||||
|
local result
|
||||||
|
result="$( cat-until X <<< "foobar" )" \
|
||||||
|
&& assert -z "returned successfully on missing delim"
|
||||||
|
|
||||||
|
assert $? -eq 1
|
||||||
|
assert "$result" == foobar
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test-cat-until-incl-echoes-all-on-no-delim-without-trailing-delim()
|
||||||
|
{
|
||||||
|
local result
|
||||||
|
result="$( cat-until-incl X <<< "foobar" )" \
|
||||||
|
&& assert -z "returned successfully on missing delim"
|
||||||
|
|
||||||
|
assert "$result" == foobar
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test-cat-until-no-delim
|
||||||
|
test-cat-until-incl-has-delim
|
||||||
|
test-cat-until-from-file
|
||||||
|
test-cat-until-incl-from-file
|
||||||
|
test-cat-until-echoes-all-on-no-delim
|
||||||
|
test-cat-until-incl-echoes-all-on-no-delim-without-trailing-delim
|
||||||
|
|
|
@ -0,0 +1,131 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Tests bash implementation of cat
|
||||||
|
#
|
||||||
|
# Copyright (C) 2014 Mike Gerwitz
|
||||||
|
#
|
||||||
|
# This file is part of pkgsh.
|
||||||
|
#
|
||||||
|
# pkgsh is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
##
|
||||||
|
|
||||||
|
source src/coreutils/cat.sh
|
||||||
|
|
||||||
|
set -T
|
||||||
|
|
||||||
|
declare -r qcalled=$'\001'
|
||||||
|
trap _callset RETURN
|
||||||
|
|
||||||
|
_callset()
|
||||||
|
{
|
||||||
|
# can't set a var because we're likely in a subshell
|
||||||
|
if [ "${FUNCNAME[1]}" == quickcat ]; then
|
||||||
|
# quickcat recurses
|
||||||
|
if [ "${FUNCNAME[2]}" != quickcat ]; then
|
||||||
|
echo -n "$qcalled"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
_chk()
|
||||||
|
{
|
||||||
|
local given="$1" expected="$2"
|
||||||
|
|
||||||
|
[[ "$given" =~ "$qcalled"$ ]] \
|
||||||
|
|| assert -z "quickcat not called: $given"
|
||||||
|
|
||||||
|
given="${given%$qcalled}"
|
||||||
|
|
||||||
|
assert "$given" == "$expected"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_readall()
|
||||||
|
{
|
||||||
|
local -r var="$1"
|
||||||
|
IFS= read -rd '' "$var" || true
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Leading and trailing whitespace, be it spaces, newlines, or otherwise,
|
||||||
|
# should be retained (something that the shell does not normally like doing)
|
||||||
|
test-retains-leading-trailing-whitespace()
|
||||||
|
{
|
||||||
|
_readall val < <( echo -e "\n foo \n" )
|
||||||
|
_readall expected < <( command cat <( echo -n "$val" ) )
|
||||||
|
_readall given < <( cat <( echo -n "$val" ) )
|
||||||
|
_chk "$given" "$expected"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# If there is no trailing newline, one should not be added
|
||||||
|
test-does-not-add-trailing-whitespace()
|
||||||
|
{
|
||||||
|
_readall expected < <( command cat <( echo -n foo ) )
|
||||||
|
_readall given < <( cat <( echo -n foo ) )
|
||||||
|
_chk "$given" "$expected"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# It is, after all, what `cat` is good at
|
||||||
|
test-concatenates-multiple-files()
|
||||||
|
{
|
||||||
|
_readall expected < <( command cat <( echo -n foo ) <( echo "Bar" ) )
|
||||||
|
_readall given < <( cat <( echo -n foo ) <( echo "Bar" ) )
|
||||||
|
_chk "$given" "$expected"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# As by convention, `-` means `stdin` and can appear anywhere in the file
|
||||||
|
# list
|
||||||
|
test-can-read-stdin-via-dash()
|
||||||
|
{
|
||||||
|
_readall expected < <( command cat <( echo -n foo ) - <<< "Baz" )
|
||||||
|
_readall given < <( cat <( echo -n foo ) - <<< "Baz" )
|
||||||
|
_chk "$given" "$expected"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# If no arguments are provided, input is accepted from `stdin`
|
||||||
|
test-defaults-to-stdin()
|
||||||
|
{
|
||||||
|
_readall expected < <( command cat <<< "foo" )
|
||||||
|
_readall given < <( cat <<< "foo" )
|
||||||
|
_chk "$given" "$expected"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# We do not currently handle any options; defer to system binary
|
||||||
|
test-any-option-defers-to-binary()
|
||||||
|
{
|
||||||
|
_readall given < <( cat -E <<< "foo" )
|
||||||
|
|
||||||
|
# note that this assertion will implicitly ensure that quickcat was not
|
||||||
|
# called; see _callset
|
||||||
|
assert "$given" == 'foo$'$'\n'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test-concatenates-multiple-files
|
||||||
|
test-can-read-stdin-via-dash
|
||||||
|
test-defaults-to-stdin
|
||||||
|
test-retains-leading-trailing-whitespace
|
||||||
|
test-does-not-add-trailing-whitespace
|
||||||
|
test-any-option-defers-to-binary
|
||||||
|
|
Loading…
Reference in New Issue