| 1 | #!/usr/bin/env bash
 | 
| 2 | #
 | 
| 3 | # Usage:
 | 
| 4 | #   ./ere-char-class-literals.sh <function name>
 | 
| 5 | 
 | 
| 6 | set -o nounset
 | 
| 7 | set -o pipefail
 | 
| 8 | #set -o errexit
 | 
| 9 | 
 | 
| 10 | readonly FILE=_tmp/ere-test.txt
 | 
| 11 | 
 | 
| 12 | setup() {
 | 
| 13 |   { cat <<'EOF'
 | 
| 14 | aaa
 | 
| 15 | b-b
 | 
| 16 | ccc
 | 
| 17 | ^ caret
 | 
| 18 | \ backslash
 | 
| 19 | [ left bracket
 | 
| 20 | ] right bracket
 | 
| 21 | EOF
 | 
| 22 | 
 | 
| 23 |   # embedded NUL
 | 
| 24 |   # OSH Python bindings don't like this!  gah!
 | 
| 25 |   #echo -e 'NUL \x00 NUL'
 | 
| 26 | 
 | 
| 27 |   echo -e '\xff 0xff'
 | 
| 28 | 
 | 
| 29 |   } > $FILE
 | 
| 30 | 
 | 
| 31 |   od -c $FILE
 | 
| 32 | }
 | 
| 33 | 
 | 
| 34 | survey-shell() {
 | 
| 35 |   local ere=$1
 | 
| 36 | 
 | 
| 37 |   while read -r line; do
 | 
| 38 |     if [[ $line =~ $ere ]]; then
 | 
| 39 |       echo $line
 | 
| 40 |     fi
 | 
| 41 |   done < $FILE
 | 
| 42 | }
 | 
| 43 | 
 | 
| 44 | survey() {
 | 
| 45 |   local ere=$1
 | 
| 46 | 
 | 
| 47 |   echo ====
 | 
| 48 |   echo "$ere"
 | 
| 49 |   echo ====
 | 
| 50 | 
 | 
| 51 |   # Supports \ escapes
 | 
| 52 |   echo '    GAWK'
 | 
| 53 |   gawk 'match($0, /'$ere'/, m) { print $0 }' $FILE
 | 
| 54 | 
 | 
| 55 |   # Supports \ escapes
 | 
| 56 |   echo '    MAWK'
 | 
| 57 |   mawk '$0 ~ /'$ere'/ { print $0 }' $FILE
 | 
| 58 | 
 | 
| 59 |   echo '    EGREP'
 | 
| 60 |   egrep "$ere" $FILE
 | 
| 61 | 
 | 
| 62 |   echo '    BASH'
 | 
| 63 |   survey-shell "$ere"
 | 
| 64 | 
 | 
| 65 |   echo '    OSH'
 | 
| 66 |   bin/osh $0 survey-shell "$ere"
 | 
| 67 | }
 | 
| 68 | 
 | 
| 69 | test-ere() {
 | 
| 70 | 
 | 
| 71 |   survey '[-]'
 | 
| 72 | 
 | 
| 73 |   #survey '^'  # beginning of line
 | 
| 74 |   #survey '[^]'  # invalid
 | 
| 75 | 
 | 
| 76 |   # OK this seems to work, and doesn't include \
 | 
| 77 |   survey '\^'  
 | 
| 78 | 
 | 
| 79 |   # searches for backslash AND caret, except for gawk
 | 
| 80 |   survey '[\^]'
 | 
| 81 | 
 | 
| 82 |   survey '[]]'
 | 
| 83 |   survey '[[]'
 | 
| 84 | 
 | 
| 85 |   # are hex escapes supported?  GAWK only!
 | 
| 86 |   survey '[\x2d]'
 | 
| 87 | 
 | 
| 88 |   # gawk has problems because of extension!!!  Must escape
 | 
| 89 |   #survey '[\]'
 | 
| 90 | 
 | 
| 91 |   survey '[\\]'
 | 
| 92 | 
 | 
| 93 |   # single byte matching doesn't work with en_US.UTF-8 !
 | 
| 94 |   echo LANG=$LANG
 | 
| 95 |   survey "[$(echo -e '\xff')]"
 | 
| 96 | 
 | 
| 97 |   # it DOES work with LANG=C
 | 
| 98 |   LANG=C
 | 
| 99 |   echo LANG=$LANG
 | 
| 100 |   declare -p LANG  # it's exported!
 | 
| 101 |   survey "[$(echo -e '\xff')]"
 | 
| 102 | 
 | 
| 103 |   #survey "$(echo -e '\xff')"
 | 
| 104 | }
 | 
| 105 | 
 | 
| 106 | 
 | 
| 107 | argv() {
 | 
| 108 |   spec/bin/argv.py "$@"
 | 
| 109 | }
 | 
| 110 | 
 | 
| 111 | ere-capture-order() {
 | 
| 112 |   # It feels like it's a depth first walk
 | 
| 113 |   # It basically goes in order of the (
 | 
| 114 | 
 | 
| 115 |   pat='([a-z]+)([0-9]+)'
 | 
| 116 |   [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
 | 
| 117 | 
 | 
| 118 |   pat='(([a-z])([a-z])[a-z])([0-9]+)'
 | 
| 119 |   [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
 | 
| 120 | 
 | 
| 121 |   # Extra parens DOES create a new capture
 | 
| 122 |   pat='((([a-z]))([a-z])[a-z])([0-9]+)'
 | 
| 123 |   [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
 | 
| 124 | 
 | 
| 125 |   pat='(([a-z][a-z])[a-z])([0-9]+)'
 | 
| 126 |   [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
 | 
| 127 | 
 | 
| 128 |   pat='(([a-z]([a-z]))[a-z])(([0-9]+))'
 | 
| 129 |   [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
 | 
| 130 | }
 | 
| 131 | 
 | 
| 132 | "$@"
 | 
| 133 | 
 |