OILS / demo / ere-char-class-literals.sh View on Github | oilshell.org

133 lines, 63 significant
1#!/usr/bin/env bash
2#
3# Usage:
4# ./ere-char-class-literals.sh <function name>
5
6set -o nounset
7set -o pipefail
8#set -o errexit
9
10readonly FILE=_tmp/ere-test.txt
11
12setup() {
13 { cat <<'EOF'
14aaa
15b-b
16ccc
17^ caret
18\ backslash
19[ left bracket
20] right bracket
21EOF
22
23 # embedded NUL
24 # OSH Python bindings don't like this! gah!
25 #echo -e 'NUL \x00 NUL'
26
27 echo -e '\xff 0xff'
28
29 } > $FILE
30
31 od -c $FILE
32}
33
34survey-shell() {
35 local ere=$1
36
37 while read -r line; do
38 if [[ $line =~ $ere ]]; then
39 echo $line
40 fi
41 done < $FILE
42}
43
44survey() {
45 local ere=$1
46
47 echo ====
48 echo "$ere"
49 echo ====
50
51 # Supports \ escapes
52 echo ' GAWK'
53 gawk 'match($0, /'$ere'/, m) { print $0 }' $FILE
54
55 # Supports \ escapes
56 echo ' MAWK'
57 mawk '$0 ~ /'$ere'/ { print $0 }' $FILE
58
59 echo ' EGREP'
60 egrep "$ere" $FILE
61
62 echo ' BASH'
63 survey-shell "$ere"
64
65 echo ' OSH'
66 bin/osh $0 survey-shell "$ere"
67}
68
69test-ere() {
70
71 survey '[-]'
72
73 #survey '^' # beginning of line
74 #survey '[^]' # invalid
75
76 # OK this seems to work, and doesn't include \
77 survey '\^'
78
79 # searches for backslash AND caret, except for gawk
80 survey '[\^]'
81
82 survey '[]]'
83 survey '[[]'
84
85 # are hex escapes supported? GAWK only!
86 survey '[\x2d]'
87
88 # gawk has problems because of extension!!! Must escape
89 #survey '[\]'
90
91 survey '[\\]'
92
93 # single byte matching doesn't work with en_US.UTF-8 !
94 echo LANG=$LANG
95 survey "[$(echo -e '\xff')]"
96
97 # it DOES work with LANG=C
98 LANG=C
99 echo LANG=$LANG
100 declare -p LANG # it's exported!
101 survey "[$(echo -e '\xff')]"
102
103 #survey "$(echo -e '\xff')"
104}
105
106
107argv() {
108 spec/bin/argv.py "$@"
109}
110
111ere-capture-order() {
112 # It feels like it's a depth first walk
113 # It basically goes in order of the (
114
115 pat='([a-z]+)([0-9]+)'
116 [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
117
118 pat='(([a-z])([a-z])[a-z])([0-9]+)'
119 [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
120
121 # Extra parens DOES create a new capture
122 pat='((([a-z]))([a-z])[a-z])([0-9]+)'
123 [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
124
125 pat='(([a-z][a-z])[a-z])([0-9]+)'
126 [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
127
128 pat='(([a-z]([a-z]))[a-z])(([0-9]+))'
129 [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
130}
131
132"$@"
133