diff --git a/regex/bitwise.sed b/regex/bitwise.sed index a18b0b2..05c4c59 100644 --- a/regex/bitwise.sed +++ b/regex/bitwise.sed @@ -35,6 +35,14 @@ # # will perform a logical left shift to produce `11111110'. # +# Equality is a special operator: we first XOR the two values and then check +# to see if all bits are unset. If so, then the values are identical, and +# all bits are set; otherwise, all bits are cleared. For example: +# +# = 11100011 11100011 +# +# will result in `11111111'. A non-match results in `00000000'. +# # The regexes below all follow common patterns. To make that pattern clear, # some regexes may do useless things (e.g. `.\{0\}') so that they are # well-aligned. @@ -68,31 +76,31 @@ s/^\(& .\{5\}\)1\(.\{8\}0\)/\1.\2/ s/^\(& .\{6\}\)1\(.\{8\}0\)/\1.\2/ s/^\(& .\{7\}\)1\(.\{8\}0\)/\1.\2/ -# Bitwise OR (|) or XOR (^). This logic is shared for both operations (see -# XOR below). If the bit in A is already set, then we need not do anything, -# because the result will always be set. Otherwise, we need only set the bit -# if the respective bit in B is set. -s/^\([|^] .\{0\}\)0\(.\{8\}1\)/\1:\2/ -s/^\([|^] .\{1\}\)0\(.\{8\}1\)/\1:\2/ -s/^\([|^] .\{2\}\)0\(.\{8\}1\)/\1:\2/ -s/^\([|^] .\{3\}\)0\(.\{8\}1\)/\1:\2/ -s/^\([|^] .\{4\}\)0\(.\{8\}1\)/\1:\2/ -s/^\([|^] .\{5\}\)0\(.\{8\}1\)/\1:\2/ -s/^\([|^] .\{6\}\)0\(.\{8\}1\)/\1:\2/ -s/^\([|^] .\{7\}\)0\(.\{8\}1\)/\1:\2/ +# Bitwise OR (|), XOR (^), equality (=). This logic is shared for each +# operation (see XOR and equality below). If the bit in A is already set, +# then we need not do anything, because the result will always be set. +# Otherwise, we need only set the bit if the respective bit in B is set. +s/^\([=|^] .\{0\}\)0\(.\{8\}1\)/\1:\2/ +s/^\([=|^] .\{1\}\)0\(.\{8\}1\)/\1:\2/ +s/^\([=|^] .\{2\}\)0\(.\{8\}1\)/\1:\2/ +s/^\([=|^] .\{3\}\)0\(.\{8\}1\)/\1:\2/ +s/^\([=|^] .\{4\}\)0\(.\{8\}1\)/\1:\2/ +s/^\([=|^] .\{5\}\)0\(.\{8\}1\)/\1:\2/ +s/^\([=|^] .\{6\}\)0\(.\{8\}1\)/\1:\2/ +s/^\([=|^] .\{7\}\)0\(.\{8\}1\)/\1:\2/ -# Bitwise XOR (^). We must perform two steps: first, if a bit in A is clear, -# then it should be set if the respective bit in B is set; this logic -# is handled above in OR. Otherwise, if A is set, then it should be cleared -# if the respective bit in B is also set. -s/^\(\^ .\{0\}\)1\(.\{8\}1\)/\1.\2/ -s/^\(\^ .\{1\}\)1\(.\{8\}1\)/\1.\2/ -s/^\(\^ .\{2\}\)1\(.\{8\}1\)/\1.\2/ -s/^\(\^ .\{3\}\)1\(.\{8\}1\)/\1.\2/ -s/^\(\^ .\{4\}\)1\(.\{8\}1\)/\1.\2/ -s/^\(\^ .\{5\}\)1\(.\{8\}1\)/\1.\2/ -s/^\(\^ .\{6\}\)1\(.\{8\}1\)/\1.\2/ -s/^\(\^ .\{7\}\)1\(.\{8\}1\)/\1.\2/ +# Bitwise XOR (^), equality (=). We must perform two steps: first, if a bit +# in A is clear, then it should be set if the respective bit in B is set; +# this logic is handled above in OR. Otherwise, if A is set, then it should +# be cleared if the respective bit in B is also set. +s/^\([=^] .\{0\}\)1\(.\{8\}1\)/\1.\2/ +s/^\([=^] .\{1\}\)1\(.\{8\}1\)/\1.\2/ +s/^\([=^] .\{2\}\)1\(.\{8\}1\)/\1.\2/ +s/^\([=^] .\{3\}\)1\(.\{8\}1\)/\1.\2/ +s/^\([=^] .\{4\}\)1\(.\{8\}1\)/\1.\2/ +s/^\([=^] .\{5\}\)1\(.\{8\}1\)/\1.\2/ +s/^\([=^] .\{6\}\)1\(.\{8\}1\)/\1.\2/ +s/^\([=^] .\{7\}\)1\(.\{8\}1\)/\1.\2/ # Bitwise NOT (~). This is a unary operation. A bit in A is set if it is # clear and vice-versa. @@ -124,8 +132,20 @@ s/^a \(.\)\(.\{6\}\)/a \1\1\2/ s/^r \(.\)\(.\{7\}\)/r \2\1/ s/^R \(.\{7\}\)\(.\)/R \2\1/ - -# Prepare the final output by discarding the command and second byte, and -# then replacing the temporary values `:' and `.' with their respective bits. -s/^. \(.\{8\}\).*/\1/ +# Replace the intermediate values `:' and `.' with their respective +# bits. We must do this _before_ the equality check. s/:/1/g; s/\./0/g + +# Equality check (=). Since we already replaced the intermediate values +# above, we now have the final result of an XOR. If all bits are _clear_ +# (A^B=0), that means A=B. Otherwise, they differ. If A=B, then we set all +# bits and clear the operator. If the first replacement does not occur, +# then the operator will still be set, and so we clear all bits in A. +s/^= 0\{8\}/ 11111111/ +s/^= .\{8\}/ 00000000/ + +# Prepare the final output by discarding the command and second byte. +s/^. \(.\{8\}\).*/\1/ + +# Exit with code 1 so that the animate script knows we're done. +q1 diff --git a/regex/cmp.sed b/regex/cmp.sed new file mode 100644 index 0000000..bd86f5c --- /dev/null +++ b/regex/cmp.sed @@ -0,0 +1,105 @@ +# Single step in case-sensitive comparison of two ASCII-subset strings +# +# Copyright (C) 2018 Mike Gerwitz +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# This script compares the first character of two strings. Since this is +# just for simple illustration, we limit ourselves to [A-Za-z_-]. Applied +# recursively, this compare entire strings one character at a time. +# +# Input must be limited to the aforementioned ASCII subset and must be +# space-delimited. For example: +# +# foo_bar foobar +# +# will yield, in succession: +# +# oo_bar oobar +# o_bar obar +# _bar bar +# non-match +# +# whereas two identical strings ``foo'' will yield: +# +# foo foo +# oo oo +# o o +# match +# +# Using this method, we must have one regex per character. That is not all +# that bad if we limit ourselves to the printable ASCII range (though in +# that case we'd have to use a non-printable character rather than `!' for +# non-matches below). If we wanted to compare Unicode, though, then we'd +# have to do so byte-by-byte rather than character-by-character. +# +# There are other methods to test for equivalency; this is just one +# intuitive way of doing so. Another option, for example, is to convert +# them to binary and check that A^B=0 (see `bitwise.sed'). +# +# If all possible strings are known ahead of time, we could also make such +# comparisons directly. This would require O(1) steps rather than O(n). +## + +# Quit if we do not have two space-delimited values to compare. Note that +# this will also be the case if we found a match or have determined that we +# have a non-match (the output of a previous run). +/^[^ ]\+$/q1 + +# Check first character of both space-delimited strings, [A-Za-z_-], +# replacing the pattern with a `!' in the case of a non-match. `!' was +# chosen as a marker for non-matches rather than a non-printable character +# because it's easily visualized; see comments above. +s/^A.* [^A]/!/; s/^a.* [^a]/!/ +s/^B.* [^B]/!/; s/^b.* [^b]/!/ +s/^C.* [^C]/!/; s/^c.* [^c]/!/ +s/^D.* [^D]/!/; s/^d.* [^d]/!/ +s/^E.* [^E]/!/; s/^e.* [^e]/!/ +s/^F.* [^F]/!/; s/^f.* [^f]/!/ +s/^G.* [^G]/!/; s/^g.* [^g]/!/ +s/^H.* [^H]/!/; s/^h.* [^h]/!/ +s/^I.* [^I]/!/; s/^i.* [^i]/!/ +s/^J.* [^J]/!/; s/^j.* [^j]/!/ +s/^K.* [^K]/!/; s/^k.* [^k]/!/ +s/^L.* [^L]/!/; s/^l.* [^l]/!/ +s/^M.* [^M]/!/; s/^m.* [^m]/!/ +s/^N.* [^N]/!/; s/^n.* [^n]/!/ +s/^O.* [^O]/!/; s/^o.* [^o]/!/ +s/^P.* [^P]/!/; s/^p.* [^p]/!/ +s/^Q.* [^Q]/!/; s/^q.* [^q]/!/ +s/^R.* [^R]/!/; s/^r.* [^r]/!/ +s/^S.* [^S]/!/; s/^s.* [^s]/!/ +s/^T.* [^T]/!/; s/^t.* [^t]/!/ +s/^U.* [^U]/!/; s/^u.* [^u]/!/ +s/^V.* [^V]/!/; s/^v.* [^v]/!/ +s/^W.* [^W]/!/; s/^w.* [^w]/!/ +s/^X.* [^X]/!/; s/^x.* [^x]/!/ +s/^Y.* [^Y]/!/; s/^y.* [^y]/!/ +s/^Z.* [^Z]/!/; s/^z.* [^z]/!/ +s/^_.* [^_]/!/; s/^-.* [^-]/!/ + +# If any of the above produced the non-match marker, replace the entire +# output with ``non-match''. +s/^!.*/non-match/ + +# Otherwise, we're done comparing the first character of each string, so +# discard them. We are then left with the remainder of each string (still +# space-delimited), setting us up for comparing the next character. (Note +# that this will only match if we still have a space, which won't be the +# case if the match failed above.) +s/^.\(.*\) ./\1 / + +# If all we are left with at this point is a single space, then all +# characters have been compared and a match has been found. +s/^ .*/match/