regex: Add equality scripts
Continuing to build up a useful set of primitive examples. This type of thing is useful if we do not know all possible strings for comparison ahead of time. * regex/bitwise.sed: Add equality operator. Modify existing OR and XOR regexes to accept this new operator. * cmp.sed: New script.master
parent
3f5c397e83
commit
3031364e14
|
@ -35,6 +35,14 @@
|
|||
#
|
||||
# will perform a logical left shift to produce `11111110'.
|
||||
#
|
||||
# Equality is a special operator: we first XOR the two values and then check
|
||||
# to see if all bits are unset. If so, then the values are identical, and
|
||||
# all bits are set; otherwise, all bits are cleared. For example:
|
||||
#
|
||||
# = 11100011 11100011
|
||||
#
|
||||
# will result in `11111111'. A non-match results in `00000000'.
|
||||
#
|
||||
# The regexes below all follow common patterns. To make that pattern clear,
|
||||
# some regexes may do useless things (e.g. `.\{0\}') so that they are
|
||||
# well-aligned.
|
||||
|
@ -68,31 +76,31 @@ s/^\(& .\{5\}\)1\(.\{8\}0\)/\1.\2/
|
|||
s/^\(& .\{6\}\)1\(.\{8\}0\)/\1.\2/
|
||||
s/^\(& .\{7\}\)1\(.\{8\}0\)/\1.\2/
|
||||
|
||||
# Bitwise OR (|) or XOR (^). This logic is shared for both operations (see
|
||||
# XOR below). If the bit in A is already set, then we need not do anything,
|
||||
# because the result will always be set. Otherwise, we need only set the bit
|
||||
# if the respective bit in B is set.
|
||||
s/^\([|^] .\{0\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([|^] .\{1\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([|^] .\{2\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([|^] .\{3\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([|^] .\{4\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([|^] .\{5\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([|^] .\{6\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([|^] .\{7\}\)0\(.\{8\}1\)/\1:\2/
|
||||
# Bitwise OR (|), XOR (^), equality (=). This logic is shared for each
|
||||
# operation (see XOR and equality below). If the bit in A is already set,
|
||||
# then we need not do anything, because the result will always be set.
|
||||
# Otherwise, we need only set the bit if the respective bit in B is set.
|
||||
s/^\([=|^] .\{0\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([=|^] .\{1\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([=|^] .\{2\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([=|^] .\{3\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([=|^] .\{4\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([=|^] .\{5\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([=|^] .\{6\}\)0\(.\{8\}1\)/\1:\2/
|
||||
s/^\([=|^] .\{7\}\)0\(.\{8\}1\)/\1:\2/
|
||||
|
||||
# Bitwise XOR (^). We must perform two steps: first, if a bit in A is clear,
|
||||
# then it should be set if the respective bit in B is set; this logic
|
||||
# is handled above in OR. Otherwise, if A is set, then it should be cleared
|
||||
# if the respective bit in B is also set.
|
||||
s/^\(\^ .\{0\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\(\^ .\{1\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\(\^ .\{2\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\(\^ .\{3\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\(\^ .\{4\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\(\^ .\{5\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\(\^ .\{6\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\(\^ .\{7\}\)1\(.\{8\}1\)/\1.\2/
|
||||
# Bitwise XOR (^), equality (=). We must perform two steps: first, if a bit
|
||||
# in A is clear, then it should be set if the respective bit in B is set;
|
||||
# this logic is handled above in OR. Otherwise, if A is set, then it should
|
||||
# be cleared if the respective bit in B is also set.
|
||||
s/^\([=^] .\{0\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\([=^] .\{1\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\([=^] .\{2\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\([=^] .\{3\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\([=^] .\{4\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\([=^] .\{5\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\([=^] .\{6\}\)1\(.\{8\}1\)/\1.\2/
|
||||
s/^\([=^] .\{7\}\)1\(.\{8\}1\)/\1.\2/
|
||||
|
||||
# Bitwise NOT (~). This is a unary operation. A bit in A is set if it is
|
||||
# clear and vice-versa.
|
||||
|
@ -124,8 +132,20 @@ s/^a \(.\)\(.\{6\}\)/a \1\1\2/
|
|||
s/^r \(.\)\(.\{7\}\)/r \2\1/
|
||||
s/^R \(.\{7\}\)\(.\)/R \2\1/
|
||||
|
||||
|
||||
# Prepare the final output by discarding the command and second byte, and
|
||||
# then replacing the temporary values `:' and `.' with their respective bits.
|
||||
s/^. \(.\{8\}\).*/\1/
|
||||
# Replace the intermediate values `:' and `.' with their respective
|
||||
# bits. We must do this _before_ the equality check.
|
||||
s/:/1/g; s/\./0/g
|
||||
|
||||
# Equality check (=). Since we already replaced the intermediate values
|
||||
# above, we now have the final result of an XOR. If all bits are _clear_
|
||||
# (A^B=0), that means A=B. Otherwise, they differ. If A=B, then we set all
|
||||
# bits and clear the operator. If the first replacement does not occur,
|
||||
# then the operator will still be set, and so we clear all bits in A.
|
||||
s/^= 0\{8\}/ 11111111/
|
||||
s/^= .\{8\}/ 00000000/
|
||||
|
||||
# Prepare the final output by discarding the command and second byte.
|
||||
s/^. \(.\{8\}\).*/\1/
|
||||
|
||||
# Exit with code 1 so that the animate script knows we're done.
|
||||
q1
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
# Single step in case-sensitive comparison of two ASCII-subset strings
|
||||
#
|
||||
# Copyright (C) 2018 Mike Gerwitz
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# This script compares the first character of two strings. Since this is
|
||||
# just for simple illustration, we limit ourselves to [A-Za-z_-]. Applied
|
||||
# recursively, this compare entire strings one character at a time.
|
||||
#
|
||||
# Input must be limited to the aforementioned ASCII subset and must be
|
||||
# space-delimited. For example:
|
||||
#
|
||||
# foo_bar foobar
|
||||
#
|
||||
# will yield, in succession:
|
||||
#
|
||||
# oo_bar oobar
|
||||
# o_bar obar
|
||||
# _bar bar
|
||||
# non-match
|
||||
#
|
||||
# whereas two identical strings ``foo'' will yield:
|
||||
#
|
||||
# foo foo
|
||||
# oo oo
|
||||
# o o
|
||||
# match
|
||||
#
|
||||
# Using this method, we must have one regex per character. That is not all
|
||||
# that bad if we limit ourselves to the printable ASCII range (though in
|
||||
# that case we'd have to use a non-printable character rather than `!' for
|
||||
# non-matches below). If we wanted to compare Unicode, though, then we'd
|
||||
# have to do so byte-by-byte rather than character-by-character.
|
||||
#
|
||||
# There are other methods to test for equivalency; this is just one
|
||||
# intuitive way of doing so. Another option, for example, is to convert
|
||||
# them to binary and check that A^B=0 (see `bitwise.sed').
|
||||
#
|
||||
# If all possible strings are known ahead of time, we could also make such
|
||||
# comparisons directly. This would require O(1) steps rather than O(n).
|
||||
##
|
||||
|
||||
# Quit if we do not have two space-delimited values to compare. Note that
|
||||
# this will also be the case if we found a match or have determined that we
|
||||
# have a non-match (the output of a previous run).
|
||||
/^[^ ]\+$/q1
|
||||
|
||||
# Check first character of both space-delimited strings, [A-Za-z_-],
|
||||
# replacing the pattern with a `!' in the case of a non-match. `!' was
|
||||
# chosen as a marker for non-matches rather than a non-printable character
|
||||
# because it's easily visualized; see comments above.
|
||||
s/^A.* [^A]/!/; s/^a.* [^a]/!/
|
||||
s/^B.* [^B]/!/; s/^b.* [^b]/!/
|
||||
s/^C.* [^C]/!/; s/^c.* [^c]/!/
|
||||
s/^D.* [^D]/!/; s/^d.* [^d]/!/
|
||||
s/^E.* [^E]/!/; s/^e.* [^e]/!/
|
||||
s/^F.* [^F]/!/; s/^f.* [^f]/!/
|
||||
s/^G.* [^G]/!/; s/^g.* [^g]/!/
|
||||
s/^H.* [^H]/!/; s/^h.* [^h]/!/
|
||||
s/^I.* [^I]/!/; s/^i.* [^i]/!/
|
||||
s/^J.* [^J]/!/; s/^j.* [^j]/!/
|
||||
s/^K.* [^K]/!/; s/^k.* [^k]/!/
|
||||
s/^L.* [^L]/!/; s/^l.* [^l]/!/
|
||||
s/^M.* [^M]/!/; s/^m.* [^m]/!/
|
||||
s/^N.* [^N]/!/; s/^n.* [^n]/!/
|
||||
s/^O.* [^O]/!/; s/^o.* [^o]/!/
|
||||
s/^P.* [^P]/!/; s/^p.* [^p]/!/
|
||||
s/^Q.* [^Q]/!/; s/^q.* [^q]/!/
|
||||
s/^R.* [^R]/!/; s/^r.* [^r]/!/
|
||||
s/^S.* [^S]/!/; s/^s.* [^s]/!/
|
||||
s/^T.* [^T]/!/; s/^t.* [^t]/!/
|
||||
s/^U.* [^U]/!/; s/^u.* [^u]/!/
|
||||
s/^V.* [^V]/!/; s/^v.* [^v]/!/
|
||||
s/^W.* [^W]/!/; s/^w.* [^w]/!/
|
||||
s/^X.* [^X]/!/; s/^x.* [^x]/!/
|
||||
s/^Y.* [^Y]/!/; s/^y.* [^y]/!/
|
||||
s/^Z.* [^Z]/!/; s/^z.* [^z]/!/
|
||||
s/^_.* [^_]/!/; s/^-.* [^-]/!/
|
||||
|
||||
# If any of the above produced the non-match marker, replace the entire
|
||||
# output with ``non-match''.
|
||||
s/^!.*/non-match/
|
||||
|
||||
# Otherwise, we're done comparing the first character of each string, so
|
||||
# discard them. We are then left with the remainder of each string (still
|
||||
# space-delimited), setting us up for comparing the next character. (Note
|
||||
# that this will only match if we still have a space, which won't be the
|
||||
# case if the match failed above.)
|
||||
s/^.\(.*\) ./\1 /
|
||||
|
||||
# If all we are left with at this point is a single space, then all
|
||||
# characters have been compared and a match has been found.
|
||||
s/^ .*/match/
|
Loading…
Reference in New Issue