night/regex/cmp.sed

# Single step in case-sensitive comparison of two ASCII-subset strings
#
# Copyright (C) 2018 Mike Gerwitz
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# This script compares the first character of two strings.  Since this is
# just for simple illustration, we limit ourselves to [A-Za-z_-].  Applied
# recursively, this compare entire strings one character at a time.
#
# Input must be limited to the aforementioned ASCII subset and must be
# space-delimited.  For example:
#
#   foo_bar foobar
#
# will yield, in succession:
#
#   oo_bar oobar
#   o_bar obar
#   _bar bar
#   non-match
#
# whereas two identical strings ``foo'' will yield:
#
#   foo foo
#   oo oo
#   o o
#   match
#
# Using this method, we must have one regex per character.  That is not all
# that bad if we limit ourselves to the printable ASCII range (though in
# that case we'd have to use a non-printable character rather than `!' for
# non-matches below).  If we wanted to compare Unicode, though, then we'd
# have to do so byte-by-byte rather than character-by-character.
#
# There are other methods to test for equivalency; this is just one
# intuitive way of doing so.  Another option, for example, is to convert
# them to binary and check that A^B=0 (see `bitwise.sed').
#
# If all possible strings are known ahead of time, we could also make such
# comparisons directly.  This would require O(1) steps rather than O(n).
#
# To observe this comparison used as part of a larger program, see
# `env-dyn.sed'.
##

# Quit if we do not have two space-delimited values to compare.  Note that
# this will also be the case if we found a match or have determined that we
# have a non-match (the output of a previous run).
/^[^ ]\+$/q1

# Check first character of both space-delimited strings, [A-Za-z_-],
# replacing the pattern with a `!' in the case of a non-match.  `!' was
# chosen as a marker for non-matches rather than a non-printable character
# because it's easily visualized; see comments above.
s/^A.* [^A]/!/;  s/^a.* [^a]/!/
s/^B.* [^B]/!/;  s/^b.* [^b]/!/
s/^C.* [^C]/!/;  s/^c.* [^c]/!/
s/^D.* [^D]/!/;  s/^d.* [^d]/!/
s/^E.* [^E]/!/;  s/^e.* [^e]/!/
s/^F.* [^F]/!/;  s/^f.* [^f]/!/
s/^G.* [^G]/!/;  s/^g.* [^g]/!/
s/^H.* [^H]/!/;  s/^h.* [^h]/!/
s/^I.* [^I]/!/;  s/^i.* [^i]/!/
s/^J.* [^J]/!/;  s/^j.* [^j]/!/
s/^K.* [^K]/!/;  s/^k.* [^k]/!/
s/^L.* [^L]/!/;  s/^l.* [^l]/!/
s/^M.* [^M]/!/;  s/^m.* [^m]/!/
s/^N.* [^N]/!/;  s/^n.* [^n]/!/
s/^O.* [^O]/!/;  s/^o.* [^o]/!/
s/^P.* [^P]/!/;  s/^p.* [^p]/!/
s/^Q.* [^Q]/!/;  s/^q.* [^q]/!/
s/^R.* [^R]/!/;  s/^r.* [^r]/!/
s/^S.* [^S]/!/;  s/^s.* [^s]/!/
s/^T.* [^T]/!/;  s/^t.* [^t]/!/
s/^U.* [^U]/!/;  s/^u.* [^u]/!/
s/^V.* [^V]/!/;  s/^v.* [^v]/!/
s/^W.* [^W]/!/;  s/^w.* [^w]/!/
s/^X.* [^X]/!/;  s/^x.* [^x]/!/
s/^Y.* [^Y]/!/;  s/^y.* [^y]/!/
s/^Z.* [^Z]/!/;  s/^z.* [^z]/!/
s/^_.* [^_]/!/;  s/^-.* [^-]/!/

# If any of the above produced the non-match marker, replace the entire
# output with ``non-match''.
s/^!.*/non-match/

# Otherwise, we're done comparing the first character of each string, so
# discard them.  We are then left with the remainder of each string (still
# space-delimited), setting us up for comparing the next character.  (Note
# that this will only match if we still have a space, which won't be the
# case if the match failed above.)
s/^.\(.*\) ./\1 /

# If all we are left with at this point is a single space, then all
# characters have been compared and a match has been found.
s/^ .*/match/