OILS / pgen2 / pgen2-test.sh View on Github | oilshell.org

378 lines, 86 significant
1#!/usr/bin/env bash
2#
3# Proof of concept for pgen2 and Oil syntax.
4#
5# Usage:
6# ./pgen2-test.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12banner() {
13 echo
14 echo "----- $@ -----"
15 echo
16}
17
18grammar-gen() {
19 PYTHONPATH=. ysh/grammar_gen.py "$@"
20}
21
22# Build the grammar and parse code. Outside of the Oil binary.
23parse() {
24 grammar-gen parse "$@"
25}
26
27parse-exprs() {
28 readonly -a exprs=(
29 '1+2'
30 '1 + 2 * 3'
31 'x | ~y'
32 '1 << x'
33 'a not in b'
34 'a is not b'
35 '[x for x in a]'
36 '[1, 2]'
37 '{myset, a}'
38 '{mydict: a, key: b}'
39 '{x: dictcomp for x in b}'
40 'a[1,2]'
41 'a[i:i+1]'
42 )
43 for expr in "${exprs[@]}"; do
44 parse pgen2/oil.grammar eval_input "$expr"
45
46 # TODO: switch to Oil
47 #parse $OIL_GRAMMAR test_input "$expr"
48 done
49}
50
51parse-arglists() {
52 readonly -a arglists=(
53 'a'
54 'a,b'
55 'a,b=1'
56 # Hm this parses, although isn't not valid
57 'a=1,b'
58 'a, *b, **kwargs'
59
60 # Hm how is this valid?
61
62 # Comment:
63 # "The reason that keywords are test nodes instead of NAME is that using
64 # NAME results in an ambiguity. ast.c makes sure it's a NAME."
65 #
66 # Hm is the parsing model powerful enough?
67 # TODO: change it to NAME and figure out what happens.
68 #
69 # Python 3.6's grammar has more comments!
70
71 # "test '=' test" is really "keyword '=' test", but we have no such token.
72 # These need to be in a single rule to avoid grammar that is ambiguous
73 # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
74 # we explicitly match '*' here, too, to give it proper precedence.
75 # Illegal combinations and orderings are blocked in ast.c:
76 # multiple (test comp_for) arguments are blocked; keyword unpackings
77 # that precede iterable unpackings are blocked; etc.
78
79 'a+1'
80 )
81
82 for expr in "${arglists[@]}"; do
83 parse pgen2/oil.grammar arglist_input "$expr"
84 done
85}
86
87# NOTE: Unused small demo.
88parse-types() {
89 readonly -a types=(
90 'int'
91 'str'
92 'List<str>'
93 'Tuple<str, int, int>'
94 'Dict<str, int>'
95 # aha! Tokenizer issue
96 #'Dict<str, Tuple<int, int>>'
97
98 # Must be like this! That's funny. Oil will have lexer modes to solve
99 # this problem!
100 'Dict<str, Tuple<int, int> >'
101 )
102 for expr in "${types[@]}"; do
103 parse pgen2/oil.grammar type_input "$expr"
104 done
105}
106
107readonly OIL_GRAMMAR='ysh/grammar.pgen2'
108
109calc-test() {
110 local -a exprs=(
111 'a + 2'
112 '1 + 2*3/4' # operator precedence and left assoc
113
114 # Tuple
115 'x+1, y+1'
116 #'(x+1, y+1)' # TODO: atom
117
118 # Associative
119 '-1+2+3'
120 '4*5*6'
121 'i % n'
122 'i % n / 2'
123
124 # Uses string tokens
125 #'"abc" + "def"'
126
127 '2 ^ 3 ^ 4' # right assoc
128 'f(1)'
129 'f(1, 2, 3)'
130
131 'f(a[i], 2, 3)'
132 'f(a[i, j], 2, 3)'
133
134 'f(x)^3'
135 'f(x)[i]^3'
136
137 #'x < 3 and y <= 4'
138
139 # bad token
140 #'a * 3&4'
141 )
142
143 for e in "${exprs[@]}"; do
144 echo "$e"
145 parse $OIL_GRAMMAR eval_input "$e"
146 done
147}
148
149oil-productions() {
150 parse $OIL_GRAMMAR oil_var 'a = 1;'
151 parse $OIL_GRAMMAR oil_var 'a Int = 2;'
152
153 # Invalid because += now allowed
154 #parse $OIL_GRAMMAR oil_var 'a += 1;'
155
156 parse $OIL_GRAMMAR oil_setvar 'x = 3;'
157 parse $OIL_GRAMMAR oil_setvar 'x += 4;'
158
159 # Invalid because type expression isn't allowed (it could conflict)
160 #parse $OIL_GRAMMAR oil_setvar 'x Int += 4;'
161}
162
163mode-test() {
164 # Test all the mode transitions
165 local -a exprs=(
166 # Expr -> Array
167 # TODO: how is OilOuter different than Array
168 '@[]'
169 'x + @[a b] + y'
170
171 # Expr -> Command
172 # Hm empty could be illegal?
173 '$[]'
174 'x + $[hi there] + y'
175
176 # Expr -> Expr
177 '$(x)'
178 # NOTE: operator precedence is respected here!
179 'x + $(f(y) - 3) * 4'
180 # Expr -> Expr even though we saw )
181 #'$(f(x, y) + (1 * 3))'
182
183 # Expr -> OilVS
184 #'${}' # syntax error
185 '${x}'
186 # This will work when we add | to grammar
187 #'x + ${p|html} + y'
188
189 # Expr -> Regex
190 #'$/ /'
191 'x + $/ mypat / + y' # syntactically valid, semantically invalid
192
193 # Expr -> OilDQ
194 '"hello \$"'
195 'x + "hello \$" + y'
196 # TODO: Also do every other kind of string:
197 # r'raw' r"raw $sub" ''' """ r''' r"""
198
199 # Regex -> CharClass
200 #'$/ any* "." [a-z A-Z _] [a-z A-Z _ 0-9]+ /'
201 '$/ "." [a-z A-Z _] [a-z A-Z _ 0-9] /'
202 '$/ a [b] c /'
203
204 # Array -> CharClass
205 '@[one two *.[c h] *.[NOT c h] ]'
206
207 # Expr -> Array -> CharClass
208 'left + @[one two *.[c h] ] + right'
209 # Array brace sub. Not PARSED yet, but no lexer mode change AFAICT
210 #'@[ -{one,two}- *.[c h] ]'
211
212 ## OilDQ -> Expr
213 '"var expr $(2 + 3)"'
214
215 ## OilDQ -> Command
216 '"command $[echo hi]"'
217
218 # OilDQ -> OilVS -- % is not an operator
219 #'"quoted ${x %02d}"'
220 '"quoted ${x}"'
221
222 #)
223 #local -a exprs=(
224
225 )
226
227 for e in "${exprs[@]}"; do
228 echo "$e"
229 parse $OIL_GRAMMAR eval_input "$e"
230 done
231
232 # Command stuff. TODO: we don't have a parser for this!
233 # Maybe add 'echo' do everything?
234 exprs+=(
235 #'x = $[echo one; echo *.[c h] ]'
236
237 # Command -> Expr (PROBLEM: requires lookahead to =)
238 'x = a + b'
239 'var x = a + b'
240 'setvar x = a + b'
241
242 # Command -> Expr
243 'echo $(a + b)'
244 'echo ${x|html}'
245
246 # Command -> Expr
247 'echo $stringfunc(x, y)'
248 'echo @arrayfunc(x, y)'
249
250 # The signature must be parsed expression mode if it have
251 # defaults.
252 'func foo(x Int, y Int = 42 + 1) Int {
253 echo $x $y
254 }
255 '
256 # I guess [] is parsed in expression mode too. It's a very simple grammar.
257 # It only accepts strings. Maybe there is a special "BLOCK" var you can
258 # evaluate.
259 'proc copy [src dest="default $value"] {
260 echo $src $dest
261 }
262 '
263
264 'if (x > 1) { echo hi }'
265
266 'while (x > 0) {
267 set x -= 1
268 }
269 '
270 'for (x in y) { # "var" is implied; error if x is already defined?
271 echo $y
272 }
273 '
274 'for (i = 0; i < 10; ++i) {
275 echo $i
276 }
277 '
278 'switch (i+1) {
279 case 1:
280 echo "one"
281 case 2:
282 echo "two"
283 }
284 '
285 'match (x) {
286 1 { echo "one" }
287 2 { echo "two" }
288 }
289 '
290
291 # Command -> OilVS -- % is not an operator
292 'echo ${x %02d}'
293
294 # Command -> CharClass is DISALLOWED. Must go through array?
295 # @() could be synonym for array expression.
296 # Although if you could come up with a custom syntax error for this: it
297 # might be OK.
298 # a[x] = 1
299 #'echo *.[c h]'
300 #
301 # I think you could restrict the first words
302 )
303
304 # I don't think these are essential.
305 local -a deferred=(
306 # Expr -> Command (PROBLEM: mode is grammatical; needs state machine)
307 'x = func(x, y={}) {
308 echo hi
309 }
310 '
311
312 # Expr -> Command (PROBLEM: ditto)
313 # This one is even harder, because technically the expression on the left
314 # could have {}? Or we can ban that in patterns?
315 'x = match(x) {
316 1 { echo one }
317 2 { echo two }
318 }
319 '
320
321 # stays in Expr for comparison
322 'x = match(x) {
323 1 => "one"
324 2 => "two"
325 }
326 '
327 )
328}
329
330enum-test() {
331 readonly -a enums=(
332 # second alternative
333 'for 3 a'
334 'for 3 { a, b }'
335 'for 3 a { a, b }'
336 #'for'
337 #'a'
338 )
339 for expr in "${enums[@]}"; do
340 parse pgen2/enum.grammar eval_input "$expr"
341 done
342}
343
344all() {
345 banner 'exprs'
346 parse-exprs
347
348 #banner 'arglists'
349 #parse-arglists
350
351 banner 'calc'
352 calc-test
353
354 banner 'mode-test'
355 mode-test
356
357 banner 'oil-productions'
358 oil-productions
359
360 # enum-test doesn't work?
361}
362
363# Hm Python 3 has type syntax! But we may not use it.
364# And it has async/await.
365# And walrus operator :=.
366# @ matrix multiplication operator.
367
368diff-grammars() {
369 wc -l ~/src/languages/Python-*/Grammar/Grammar
370
371 cdiff ~/src/languages/Python-{2.7.15,3.7.3}/Grammar/Grammar
372}
373
374stdlib-test() {
375 pgen2 stdlib-test
376}
377
378"$@"