1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Usage:
|
4 | # ./ere-char-class-literals.sh <function name>
|
5 |
|
6 | set -o nounset
|
7 | set -o pipefail
|
8 | #set -o errexit
|
9 |
|
10 | readonly FILE=_tmp/ere-test.txt
|
11 |
|
12 | setup() {
|
13 | { cat <<'EOF'
|
14 | aaa
|
15 | b-b
|
16 | ccc
|
17 | ^ caret
|
18 | \ backslash
|
19 | [ left bracket
|
20 | ] right bracket
|
21 | EOF
|
22 |
|
23 | # embedded NUL
|
24 | # OSH Python bindings don't like this! gah!
|
25 | #echo -e 'NUL \x00 NUL'
|
26 |
|
27 | echo -e '\xff 0xff'
|
28 |
|
29 | } > $FILE
|
30 |
|
31 | od -c $FILE
|
32 | }
|
33 |
|
34 | survey-shell() {
|
35 | local ere=$1
|
36 |
|
37 | while read -r line; do
|
38 | if [[ $line =~ $ere ]]; then
|
39 | echo $line
|
40 | fi
|
41 | done < $FILE
|
42 | }
|
43 |
|
44 | survey() {
|
45 | local ere=$1
|
46 |
|
47 | echo ====
|
48 | echo "$ere"
|
49 | echo ====
|
50 |
|
51 | # Supports \ escapes
|
52 | echo ' GAWK'
|
53 | gawk 'match($0, /'$ere'/, m) { print $0 }' $FILE
|
54 |
|
55 | # Supports \ escapes
|
56 | echo ' MAWK'
|
57 | mawk '$0 ~ /'$ere'/ { print $0 }' $FILE
|
58 |
|
59 | echo ' EGREP'
|
60 | egrep "$ere" $FILE
|
61 |
|
62 | echo ' BASH'
|
63 | survey-shell "$ere"
|
64 |
|
65 | echo ' OSH'
|
66 | bin/osh $0 survey-shell "$ere"
|
67 | }
|
68 |
|
69 | test-ere() {
|
70 |
|
71 | survey '[-]'
|
72 |
|
73 | #survey '^' # beginning of line
|
74 | #survey '[^]' # invalid
|
75 |
|
76 | # OK this seems to work, and doesn't include \
|
77 | survey '\^'
|
78 |
|
79 | # searches for backslash AND caret, except for gawk
|
80 | survey '[\^]'
|
81 |
|
82 | survey '[]]'
|
83 | survey '[[]'
|
84 |
|
85 | # are hex escapes supported? GAWK only!
|
86 | survey '[\x2d]'
|
87 |
|
88 | # gawk has problems because of extension!!! Must escape
|
89 | #survey '[\]'
|
90 |
|
91 | survey '[\\]'
|
92 |
|
93 | # single byte matching doesn't work with en_US.UTF-8 !
|
94 | echo LANG=$LANG
|
95 | survey "[$(echo -e '\xff')]"
|
96 |
|
97 | # it DOES work with LANG=C
|
98 | LANG=C
|
99 | echo LANG=$LANG
|
100 | declare -p LANG # it's exported!
|
101 | survey "[$(echo -e '\xff')]"
|
102 |
|
103 | #survey "$(echo -e '\xff')"
|
104 | }
|
105 |
|
106 |
|
107 | argv() {
|
108 | spec/bin/argv.py "$@"
|
109 | }
|
110 |
|
111 | ere-capture-order() {
|
112 | # It feels like it's a depth first walk
|
113 | # It basically goes in order of the (
|
114 |
|
115 | pat='([a-z]+)([0-9]+)'
|
116 | [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
|
117 |
|
118 | pat='(([a-z])([a-z])[a-z])([0-9]+)'
|
119 | [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
|
120 |
|
121 | # Extra parens DOES create a new capture
|
122 | pat='((([a-z]))([a-z])[a-z])([0-9]+)'
|
123 | [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
|
124 |
|
125 | pat='(([a-z][a-z])[a-z])([0-9]+)'
|
126 | [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
|
127 |
|
128 | pat='(([a-z]([a-z]))[a-z])(([0-9]+))'
|
129 | [[ abc123 =~ $pat ]]; argv "${BASH_REMATCH[@]}"
|
130 | }
|
131 |
|
132 | "$@"
|
133 |
|