From 7573a827a0993fbb795c3351140eb5427ac5692a Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Tue, 17 Jun 2014 23:48:13 -0400 Subject: [PATCH] Initial implementation of cat and friends This is intended to be a very basic subset (for now) of cat that will be more efficient for general I/O (mainly pipeines between shell functions) than spawning a process. Benchmarks do show that it is definitely not always worth the trade off, but those situations are less likely to occur (large inputs) and, if they do, the author can be aware of it and use a function that will prevent the builtin from being used (I'll provide that as well, instead of `command cat`). I'll be writing an article on this with benchmarks to rationalize and explain in depth my approach. --- src/coreutils/cat.sh | 97 +++++++++++++++++++++++ test/coreutils/test-cat-until.sh | 92 ++++++++++++++++++++++ test/coreutils/test-cat.sh | 131 +++++++++++++++++++++++++++++++ 3 files changed, 320 insertions(+) create mode 100644 src/coreutils/cat.sh create mode 100644 test/coreutils/test-cat-until.sh create mode 100644 test/coreutils/test-cat.sh diff --git a/src/coreutils/cat.sh b/src/coreutils/cat.sh new file mode 100644 index 0000000..52c3a93 --- /dev/null +++ b/src/coreutils/cat.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# Bash alternative to external cat call +# +# Copyright (C) 2014 Mike Gerwitz +# +# This file is part of pkgsh. +# +# pkgsh is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# N.B. While this shell implementation may be faster for most use cases, the +# external binary will likely be much faster for large streams. +# +# This also contains some convenience functions that are not part of +# GNU coreutils. +## + +[ -z $__PKGSH_INC_COREUTILS_CAT ] || return +__PKGSH_INC_COREUTILS_CAT=1 + + +## +# Echo characters from stdin up to (but not including) the provided +# delimiter +cat-until() +{ + local -r delim="${1?Missing terminating delimiter}" + local -r file="${2:-/dev/stdin}" + + read -rd "$delim" < "$file" + local -ri result=$? + + echo -n "$REPLY" + return $result +} + + +## +# Echo characters from stdin up to and including the provided delimiter +cat-until-incl() +{ + # `cat-until` will validate + local -r delim="$1" + cat-until "$@" \ + && echo -n "$delim" +} + + +## +# Proxies to either the shell implementation of cat or the system binary, +# depending on support +cat() +{ + [[ "$1" =~ ^-[^-\ ] ]] \ + && command cat "$@" \ + || quickcat "$@" +} + + +## +# Limited implementation of `cat` for performance +# +# TODO: The proper research has not yet gone into optimizing this; this is +# just an initial implementation to get things going. I will be addressing +# this shortly. +# +# TODO: Exit status. +quickcat() +{ + local in="${1:-/dev/stdin}" + [ "$in" == - ] && in=/dev/stdin + readonly in + + while true; do + IFS= read -r || { + echo -n "$REPLY" + break + } + + echo "$REPLY" + done < "$in" + + if shift && [ $# -ne 0 ]; then + quickcat "$@" + fi +} + diff --git a/test/coreutils/test-cat-until.sh b/test/coreutils/test-cat-until.sh new file mode 100644 index 0000000..c1ce282 --- /dev/null +++ b/test/coreutils/test-cat-until.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# Tests cat-until{,-incl} +# +# Copyright (C) 2014 Mike Gerwitz +# +# This file is part of pkgsh. +# +# pkgsh is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +## + +source src/coreutils/cat.sh + + +test-cat-until-no-delim() +{ + local result + result="$( cat-until X <<< fooXbar )" \ + || assert -z "non-zero exit" + + assert "$result" == foo +} + + +test-cat-until-incl-has-delim() +{ + local result + result="$( cat-until-incl X <<< fooXbar )" \ + || assert -z "non-zero exit" + + assert "$result" == fooX +} + + +test-cat-until-from-file() +{ + local result + result="$( cat-until X <( echo fooXbar ) )" \ + || assert -z "non-zero exit" + + assert "$result" == foo +} + + +test-cat-until-incl-from-file() +{ + local result + result="$( cat-until-incl X <( echo fooXbar ) )" \ + || assert -z "non-zero exit" + + assert "$result" == fooX +} + + +test-cat-until-echoes-all-on-no-delim() +{ + local result + result="$( cat-until X <<< "foobar" )" \ + && assert -z "returned successfully on missing delim" + + assert $? -eq 1 + assert "$result" == foobar +} + + +test-cat-until-incl-echoes-all-on-no-delim-without-trailing-delim() +{ + local result + result="$( cat-until-incl X <<< "foobar" )" \ + && assert -z "returned successfully on missing delim" + + assert "$result" == foobar +} + + +test-cat-until-no-delim +test-cat-until-incl-has-delim +test-cat-until-from-file +test-cat-until-incl-from-file +test-cat-until-echoes-all-on-no-delim +test-cat-until-incl-echoes-all-on-no-delim-without-trailing-delim + diff --git a/test/coreutils/test-cat.sh b/test/coreutils/test-cat.sh new file mode 100644 index 0000000..ff443c1 --- /dev/null +++ b/test/coreutils/test-cat.sh @@ -0,0 +1,131 @@ +#!/bin/bash +# Tests bash implementation of cat +# +# Copyright (C) 2014 Mike Gerwitz +# +# This file is part of pkgsh. +# +# pkgsh is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +## + +source src/coreutils/cat.sh + +set -T + +declare -r qcalled=$'\001' +trap _callset RETURN + +_callset() +{ + # can't set a var because we're likely in a subshell + if [ "${FUNCNAME[1]}" == quickcat ]; then + # quickcat recurses + if [ "${FUNCNAME[2]}" != quickcat ]; then + echo -n "$qcalled" + fi + fi +} + +_chk() +{ + local given="$1" expected="$2" + + [[ "$given" =~ "$qcalled"$ ]] \ + || assert -z "quickcat not called: $given" + + given="${given%$qcalled}" + + assert "$given" == "$expected" +} + + +_readall() +{ + local -r var="$1" + IFS= read -rd '' "$var" || true +} + + +## +# Leading and trailing whitespace, be it spaces, newlines, or otherwise, +# should be retained (something that the shell does not normally like doing) +test-retains-leading-trailing-whitespace() +{ + _readall val < <( echo -e "\n foo \n" ) + _readall expected < <( command cat <( echo -n "$val" ) ) + _readall given < <( cat <( echo -n "$val" ) ) + _chk "$given" "$expected" +} + + +## +# If there is no trailing newline, one should not be added +test-does-not-add-trailing-whitespace() +{ + _readall expected < <( command cat <( echo -n foo ) ) + _readall given < <( cat <( echo -n foo ) ) + _chk "$given" "$expected" +} + + +## +# It is, after all, what `cat` is good at +test-concatenates-multiple-files() +{ + _readall expected < <( command cat <( echo -n foo ) <( echo "Bar" ) ) + _readall given < <( cat <( echo -n foo ) <( echo "Bar" ) ) + _chk "$given" "$expected" +} + + +## +# As by convention, `-` means `stdin` and can appear anywhere in the file +# list +test-can-read-stdin-via-dash() +{ + _readall expected < <( command cat <( echo -n foo ) - <<< "Baz" ) + _readall given < <( cat <( echo -n foo ) - <<< "Baz" ) + _chk "$given" "$expected" +} + + +## +# If no arguments are provided, input is accepted from `stdin` +test-defaults-to-stdin() +{ + _readall expected < <( command cat <<< "foo" ) + _readall given < <( cat <<< "foo" ) + _chk "$given" "$expected" +} + + +## +# We do not currently handle any options; defer to system binary +test-any-option-defers-to-binary() +{ + _readall given < <( cat -E <<< "foo" ) + + # note that this assertion will implicitly ensure that quickcat was not + # called; see _callset + assert "$given" == 'foo$'$'\n' +} + + +test-concatenates-multiple-files +test-can-read-stdin-via-dash +test-defaults-to-stdin +test-retains-leading-trailing-whitespace +test-does-not-add-trailing-whitespace +test-any-option-defers-to-binary +