OILS / spec / ysh-regex.test.sh View on Github | oilshell.org

711 lines, 409 significant
1## oils_failures_allowed: 1
2
3#### /^.$/
4shopt -s ysh:all
5var pat = ''
6
7setvar pat = /^.$/
8echo pat=$pat
9
10setvar pat = /%start dot %end/
11echo pat=$pat
12
13if ('' ~ pat) { # ERE syntax
14 echo yes
15} else {
16 echo no
17}
18# $pat is same as pat
19if ('f' ~ pat) { # ERE syntax
20 echo yes
21} else {
22 echo no
23}
24
25## STDOUT:
26pat=^.$
27pat=^.$
28no
29yes
30## END
31
32
33#### /.+/
34shopt -s ysh:all
35
36var pat = /.+/
37echo $pat
38
39var s = 'foo'
40if (s ~ pat) { # ERE syntax
41 echo yes
42}
43var empty = ''
44if (empty ~ pat) { echo yes } else { echo no }
45## STDOUT:
46.+
47yes
48no
49## END
50
51#### Repeat {1,3} etc.
52var pat = null
53
54setvar pat = /d{2}/
55echo $pat
56setvar pat = /d{1,3}/
57echo $pat
58setvar pat = /d{1,}/
59echo $pat
60setvar pat = /d{,3}/
61echo $pat
62
63
64## STDOUT:
65[[:digit:]]{2}
66[[:digit:]]{1,3}
67[[:digit:]]{1,}
68[[:digit:]]{,3}
69## END
70
71
72#### d+ digit+ !d+ !digit+
73shopt -s ysh:all
74
75var pat = ''
76
77setvar pat = /d+/
78echo $pat
79if ('42' ~ pat) { echo yes }
80
81var empty = ''
82if (empty ~ pat) { echo yes } else { echo no }
83
84setvar pat = /digit+/
85echo $pat
86setvar pat = /!d+/
87echo $pat
88setvar pat = /!digit+/
89echo $pat
90
91
92## STDOUT:
93[[:digit:]]+
94yes
95no
96[[:digit:]]+
97[^[:digit:]]+
98[^[:digit:]]+
99## END
100
101#### Alternation and sequence
102var pat = ''
103setvar pat = /s d+ | w*/
104echo $pat
105setvar pat = /s d+ or w*/
106echo $pat
107## STDOUT:
108[[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
109[[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
110## END
111
112#### Char Class Ranges
113shopt -s ysh:all
114
115var pat = ''
116setvar pat = /[0-9 a-f]+/
117echo $pat
118# This is equivalent
119setvar pat = /['0' - '9' 'a' - 'f']+/
120echo $pat
121
122if ('0123' ~ pat) { echo yes } else { echo no }
123if ('zzz' ~ pat) { echo yes } else { echo no }
124if ('' ~ pat) { echo yes } else { echo no }
125## STDOUT:
126[0-9a-f]+
127[0-9a-f]+
128yes
129no
130no
131## END
132
133#### Char Class Set
134shopt -s ysh:all
135var pat = ''
136
137# This is NOT allowed
138# setvar pat = /[a b c]+/
139
140setvar pat = /['abc']+/
141echo $pat
142
143if ('cbcb' ~ pat) { echo yes } else { echo no }
144if ('0123' ~ pat) { echo yes } else { echo no }
145if ('' ~ pat) { echo yes } else { echo no }
146## STDOUT:
147[abc]+
148yes
149no
150no
151## END
152
153#### Range with escaped characters
154shopt -s ysh:all
155
156var pat = null
157
158setvar pat = / [ \x00 - \x0f ] /
159echo $pat | od -A n -t x1
160
161## STDOUT:
162 5b 00 2d 0f 5d 0a
163## END
164
165
166#### Group ()
167shopt -s ysh:all
168var pat = ''
169
170setvar pat = /(%start s or d d)/
171echo $pat
172
173if (' foo' ~ pat) { echo yes } else { echo no }
174if ('-00-' ~ pat) { echo yes } else { echo no }
175if ('foo' ~ pat) { echo yes } else { echo no }
176
177## STDOUT:
178(^[[:space:]]|[[:digit:]][[:digit:]])
179yes
180yes
181no
182## END
183
184#### Capture is acceptable as a group
185shopt -s ysh:all
186var pat = /<capture %start s | d d>/
187echo $pat
188## STDOUT:
189(^[[:space:]]|[[:digit:]][[:digit:]])
190## END
191
192#### literal ''
193shopt -s ysh:all
194var pat = ''
195
196setvar pat = /'abc' 'def'/
197echo $pat
198
199#setvar pat = /'abc' '^ + * ?'/
200#echo $pat
201
202if ('abcde' ~ pat) { echo yes } else { echo no }
203if ('abcdef' ~ pat) { echo yes } else { echo no }
204
205## STDOUT:
206abcdef
207no
208yes
209## END
210
211#### Single quotes and splicing (do what "foo $x ${x}" used to)
212shopt -s ysh:all
213var pat = ''
214
215var x = 'x'
216var y = 'y'
217setvar pat = / @x @x 'abc' @x @y /
218echo $pat
219
220if ('xxabcx' ~ pat) { echo yes } else { echo no }
221if ('xxabcxyf' ~ pat) { echo yes } else { echo no }
222
223## STDOUT:
224xxabcxy
225no
226yes
227## END
228
229#### @splice
230shopt -s ysh:all
231var d = /d+/;
232var ip = / @d '.' @d '.' @d '.' @d /
233echo $ip
234if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
235if ('0.0.0' ~ ip) { echo yes } else { echo no }
236## STDOUT:
237[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
238yes
239no
240## END
241
242#### splice with capital letters
243shopt -s ysh:all
244var D = /d+/;
245var ip = / D '.' D '.' D '.' D /
246echo $ip
247if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
248if ('0.0.0' ~ ip) { echo yes } else { echo no }
249## STDOUT:
250[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
251yes
252no
253## END
254
255#### Matching escaped tab character
256shopt -s ysh:all
257
258var lines = :| $'aa\tbb' $'cc\tdd' |
259
260var pat = / ('a' [\t] 'b') /
261write pat=$pat
262write @lines | egrep $pat
263
264## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
265
266#### Match unicode char
267shopt -s ysh:all
268var pat = / 'a' dot 'b' /
269
270if ('axb' ~ pat ) { echo yes } else { echo no }
271
272# mu character
273if ($'a\xce\xbcb' ~ pat ) { echo yes } else { echo no }
274
275if ('aZZb' ~ pat ) { echo yes } else { echo no }
276## STDOUT:
277yes
278yes
279no
280## END
281
282#### Match non-ASCII byte denoted using $'\xff' (TODO: LANG=C)
283
284# NOTE: This pattern doesn't work with en_US.UTF-8. I think the user should
285# set LANG=C or shopt --unset libc_utf8.
286
287shopt -s ysh:all
288var pat = /[ $'\xff' ]/;
289
290echo $pat | od -A n -t x1
291if ($'\xff' ~ pat) { echo yes } else { echo no }
292if ($'\xfe' ~ pat) { echo yes } else { echo no }
293
294## STDOUT:
295 5b ff 5d 0a
296yes
297no
298## END
299
300#### Match non-ASCII byte denoted using \xff
301shopt -s ysh:all
302var pat = /[ \xff ]/;
303
304# Show what it translates to
305echo $pat | od -A n -t x1
306
307# TODO: This might require LANG=C to work
308#if ($'\xff' ~ pat) { echo yes } else { echo no }
309#if ($'\xfe' ~ pat) { echo yes } else { echo no }
310
311## STDOUT:
312 5b ff 5d 0a
313## END
314
315#### ERE can express Unicode escapes that are in the ASCII range
316shopt -s ysh:all
317var pat = /[ \u{7f} ]/;
318
319echo $pat | od -A n -t x1
320if ($'\x7f' ~ pat) { echo yes } else { echo no }
321if ($'\x7e' ~ pat) { echo yes } else { echo no }
322
323var pat2 = /[ \u{7f} ]/;
324var pat3 = /[ \u{0007f} ]/;
325test "$pat2" = "$pat3" && echo 'equal'
326
327var range = / [ \u{70} - \u{7f} ] /
328if ($'\x70' ~ range) { echo yes } else { echo no }
329if ($'\x69' ~ range) { echo yes } else { echo no }
330
331## STDOUT:
332 5b 7f 5d 0a
333yes
334no
335equal
336yes
337no
338## END
339
340#### ERE can't express higher Unicode escapes
341shopt -s ysh:all
342var pat2 = /[ \u{00} - \u{ff} ]/;
343
344# This causes an error
345echo $pat2
346
347# This just prints it
348= pat2
349
350var pat1 = /[ \u{ff} ]/;
351
352echo $pat1 | od -A n -t x1
353if ($'\x7f' ~ pat) { echo yes } else { echo no }
354if ($'\x7e' ~ pat) { echo yes } else { echo no }
355
356## status: 1
357## stdout-json: ""
358
359#### non-ASCII bytes must be singleton terms, e.g. '\x7f\xff' is disallowed
360var bytes = $'\x7f\xff'
361var pat = / [ @bytes ] /
362echo $pat
363## status: 1
364## stdout-json: ""
365
366#### Matching escaped tab character
367shopt -s ysh:all
368
369# BUG: need C strings in array literal
370var lines = :| $'aa\tbb' $'cc\tdd' |
371
372var pat = / ('a' [\t] 'b') /
373write pat=$pat
374write @lines | egrep $pat
375
376## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
377
378#### Repeated String Literal With Single Char
379shopt -s ysh:all
380
381var literal = 'f'
382var pat = null
383
384setvar pat = / %start @literal+ %end /
385echo $pat
386setvar pat = / %start (@literal)+ %end /
387echo $pat
388
389if ('fff' ~ pat) { echo yes }
390if ('foo' !~ pat) { echo no }
391
392## STDOUT:
393^f+$
394^(f)+$
395yes
396no
397## END
398
399#### Error when unparenthesized string of more than one character is repeated
400shopt -s ysh:all
401
402var literal = 'foo'
403var pat = null
404
405setvar pat = / %start @literal+ %end /
406echo $pat
407setvar pat = / %start (@literal)+ %end /
408echo $pat
409
410if ('foofoo' ~ pat) { echo yes }
411if ('foof' !~ pat) { echo no }
412
413## status: 1
414## stdout-json: ""
415
416#### Instead of $'foo\\bar' use 'foo' \\ 'bar'
417shopt -s ysh:all
418var pat = /'foo' \\ 'bar'/
419echo $pat
420
421if (r'foo\bar' ~ pat) { echo yes }
422if (r'foo.bar' !~ pat) { echo no }
423## STDOUT:
424foo\\bar
425yes
426no
427## END
428
429#### Negation of Character Class ![a-z]
430shopt -s ysh:all
431
432var pat = / ![ a-z ] /
433echo $pat
434
435if ('0' ~ pat) { echo yes }
436if ('a' !~ pat) { echo no }
437
438## STDOUT:
439[^a-z]
440yes
441no
442## END
443
444#### Posix and Perl class in class literals
445shopt -s ysh:all
446
447var pat = null
448
449setvar pat = / [ space 'z' ] /
450echo $pat
451#setvar pat = / [ ~space 'z' ] /
452#echo $pat
453
454# PROBLEM: can't negate individual POSIX classes. They would have to be a Perl
455# class to be \D or \S.
456# [[:space:]z] negates the whole thing!
457# [^[:space:]]
458
459setvar pat = / [ digit 'z' ] /
460echo $pat
461#setvar pat = / [ ~digit 'z' ] /
462#echo $pat
463
464## STDOUT:
465[[:space:]z]
466[[:digit:]z]
467## END
468
469#### [!d] can't be negated because it's a literal character
470setvar pat = / [ !d 'z' ] /
471echo $pat
472## status: 2
473## stdout-json: ""
474
475#### [!digit] can't be negated in POSIX ERE (but yes in Perl)
476var pat = null
477setvar pat = / [ !digit 'z' ] /
478echo $pat
479## status: 1
480## stdout-json: ""
481
482#### Operator chars in char classes (bash-like)
483
484pat='[-]'
485[[ '-' =~ $pat ]] && echo hyphen
486[[ '\' =~ $pat ]] && echo FAIL
487
488pat='[\]'
489[[ '\' =~ $pat ]] && echo backslash
490[[ '-' =~ $pat ]] && echo FAIL
491
492pat='[]]'
493[[ ']' =~ $pat ]] && echo 'right bracket'
494[[ '[' =~ $pat ]] && echo FAIL
495
496pat='[[]'
497[[ '[' =~ $pat ]] && echo 'left bracket'
498[[ ']' =~ $pat ]] && echo FAIL
499
500pat='[.]'
501[[ '.' =~ $pat ]] && echo period
502[[ '\' =~ $pat ]] && echo FAIL
503
504pat='[\^]'
505[[ '^' =~ $pat ]] && echo caret
506[[ '\' =~ $pat ]] && echo 'no way to have [^]'
507
508## STDOUT:
509hyphen
510backslash
511right bracket
512left bracket
513period
514caret
515no way to have [^]
516## END
517
518#### Operator chars in char classes (eggex)
519shopt --set ysh:upgrade
520
521var pat = / ['-'] /
522#echo PAT=$pat
523if ('-' ~ pat) { echo hyphen }
524if ($'\\' ~ pat) { echo FAIL }
525
526var pat = / [ \\ ] /
527[[ '\' =~ $pat ]] && echo backslash
528[[ '-' =~ $pat ]] && echo FAIL
529
530var pat = / [ ']' ] /
531[[ ']' =~ $pat ]] && echo 'right bracket'
532[[ '[' =~ $pat ]] && echo FAIL
533
534var pat = / [ '[' ] /
535[[ '[' =~ $pat ]] && echo 'left bracket'
536[[ ']' =~ $pat ]] && echo FAIL
537
538var pat = / [ '.' ] /
539[[ '.' =~ $pat ]] && echo period
540[[ '\' =~ $pat ]] && echo FAIL
541
542var pat = / [ \\ '^' ] /
543[[ '^' =~ $pat ]] && echo caret
544[[ '\' =~ $pat ]] && echo 'no way to have [^]'
545
546
547## STDOUT:
548hyphen
549backslash
550right bracket
551left bracket
552period
553caret
554no way to have [^]
555## END
556
557#### Matching ] and \ and ' and " in character classes
558shopt -s ysh:all
559
560# BUG: need C strings in array literal
561var lines = :|
562 'backslash \'
563 'rbracket ]'
564 'lbracket ['
565 "sq '"
566 'dq ""'
567|
568
569# Weird GNU quirk: ] has to come first!
570# []abc] works. But [abc\]] does NOT work. Stupid rule!
571
572var pat = / [ ']' \\ \' \" ] /
573write pat=$pat
574write @lines | egrep $pat
575
576## STDOUT:
577pat=[]'"\\]
578backslash \
579rbracket ]
580sq '
581dq ""
582## END
583
584#### Matching literal hyphen in character classes
585shopt -s ysh:all
586
587var literal = '-'
588var pat = / [ 'a' 'b' @literal ] /
589write pat=$pat
590write 'c-d' 'ab' 'cd' | grep $pat
591## STDOUT:
592pat=[ab-]
593c-d
594ab
595## END
596
597#### Char class special: ^ - ] \
598
599# See demo/ere-char-class-literals.sh
600#
601# \ is special because of gawk
602
603shopt -s ysh:upgrade
604
605
606# Note: single caret disalowed
607var caret = / ['^' 'x'] /
608echo caret=$caret
609
610var caret2 = / [ \x5e 'x'] /
611echo caret2=$caret2
612
613var caret3 = / [ \u{5e} 'x'] /
614echo caret3=$caret3
615
616if ('x' ~ caret3) {
617 echo 'match x'
618}
619if ('^' ~ caret3) {
620 echo 'match ^'
621}
622
623echo ---
624
625var hyphen = / ['a' '-' 'b'] /
626echo hyphen=$hyphen
627
628var hyphen2 = / ['a' \x2d 'b' ] /
629echo hyphen2=$hyphen2
630
631if ('-' ~ hyphen2) {
632 echo 'match -'
633}
634
635if ('a' ~ hyphen2) {
636 echo 'match a'
637}
638
639if ('c' ~ hyphen2) {
640 echo 'match c'
641}
642
643echo ---
644
645var rbracket = / [ '[' ']' ] /
646echo rbracket=$rbracket
647
648var rbracket2 = / [ \x5b \x5d ] /
649echo rbracket2=$rbracket2
650
651if ('[' ~ rbracket2) {
652 echo 'match ['
653}
654
655if (']' ~ rbracket2) {
656 echo 'match ]'
657}
658
659echo ---
660
661var backslash = / [ 'x' \\ 'n' ] /
662echo backslash=$backslash
663
664var backslash2 = / [ 'x' \x5c 'n' ] /
665echo backslash2=$backslash2
666
667var backslash3 = / [ 'x' $'\\' 'n' ] /
668echo backslash3=$backslash3
669
670if ('x' ~ backslash3) {
671 echo 'match x'
672}
673
674if ('n' ~ backslash3) {
675 echo 'match n'
676}
677
678if ($'\\' ~ backslash3) {
679 echo 'match backslash'
680}
681
682if ($'\n' ~ backslash3) {
683 echo 'match nnewline'
684}
685
686
687## STDOUT:
688caret=[x^]
689caret2=[x^]
690caret3=[x^]
691match x
692match ^
693---
694hyphen=[ab-]
695hyphen2=[ab-]
696match -
697match a
698---
699rbracket=[][]
700rbracket2=[][]
701match [
702match ]
703---
704backslash=[xn\\]
705backslash2=[xn\\]
706backslash3=[xn\\]
707match x
708match n
709match backslash
710## END
711