OILS / doctools / micro-syntax.sh View on Github | oilshell.org

369 lines, 144 significant
1#!/usr/bin/env bash
2#
3# Lexing / Parsing experiment
4#
5# Usage:
6# doctools/micro-syntax.sh <function name>
7
8# TODO:
9# - Rename to micro-syntax, from micro-grammars and uchex?
10# - micro-segmenting and lexing - comments, strings, and maybe { }
11# - micro-parsing: for indent/dedent
12#
13# - use GNU long flags, test them
14
15# C++
16#
17# - ANSI should cat all argv, and it should print line numbers
18# - HTML string can append with with netstrings!
19# - (path, html, path, html, ...) should be sufficient, though not fully general
20# - print SLOC at the top
21# - COALESCE tokens to save space
22
23# Then src-tree reads this stream
24# - actually it can take the filenames directly from here
25# - it can discard the big HTML!
26
27# Later: port some kind of parser combinator for
28# - def class, etc.
29
30set -o nounset
31set -o pipefail
32set -o errexit
33
34REPO_ROOT=$(cd "$(dirname $0)/.."; pwd) # tsv-lib.sh uses this
35
36#source build/dev-shell.sh # 're2c' in path
37source build/ninja-rules-cpp.sh
38
39my-re2c() {
40 local in=$1
41 local out=$2
42
43 # Copied from build/py.sh, and added --tags
44 re2c --tags -W -Wno-match-empty-string -Werror -o $out $in
45}
46
47readonly BASE_DIR=_tmp/micro-syntax
48
49build() {
50 local variant=${1:-asan}
51
52 case $variant in
53 asan)
54 cxxflags='-O0 -fsanitize=address'
55 ;;
56 opt)
57 cxxflags='-O2'
58 ;;
59 *)
60 die "Invalid variant $variant"
61 ;;
62 esac
63
64 mkdir -p $BASE_DIR
65
66 local cc=doctools/micro_syntax.cc
67 local h=$BASE_DIR/micro_syntax.h
68 local bin=$BASE_DIR/micro_syntax
69
70 my-re2c doctools/micro_syntax.re2c.h $h
71
72 # Note: with cc, you need gnu99 instead of c99 for fdopen() and getline()
73
74 # g++ - otherwise virtual functions don't work!
75
76 set -o xtrace
77 g++ -std=c++11 -Wall -I $BASE_DIR $cxxflags \
78 -o $bin $cc
79 set +o xtrace
80
81 strip -o $bin.stripped $bin
82
83 log " CXX $cc"
84
85}
86
87readonly -a PY_TESTS=(
88 'abc' '""'
89 '"dq \" backslash \\"' '"missing '
90 "'sq \\' backslash \\\\'"
91 '"line\n"' '"quote \" backslash \\ "'
92 '"\n"'
93 'hi # comment'
94 '"hi" # comment'
95 '(r"raw dq")'
96 "(r'raw \\' sq')"
97
98' "L1" # first
99 L2 # second'
100
101' def f():
102 """docstring
103 with "quote"
104 """
105 pass'
106
107" def f():
108 '''docstring
109 with 'quote'
110 '''
111 pass"
112
113 " print(r'''hello''')"
114 ' print(r"""hi there""")'
115
116 '"hi" # comment'
117)
118
119readonly -a CPP_TESTS=(
120 '#if 0'
121 'not prepreproc #ifdef 0'
122 "// comment can't "
123 "f(); // comment isn't "
124
125 # Char literal in C
126 "'\\''"
127
128 'void f(); /* multi-line
129 comment
130 */
131 void g(int x);'
132
133 '#include "foo.h"'
134 '#include <foo.h> // comment'
135
136 '#define X 3 // comment
137 int g();'
138
139 '// hello
140 #include <stdio.h>
141 #define SUM(x, y) \
142 (x) + \
143 (y) // comment
144 void f();'
145
146 '#undef x'
147
148 '#define F(x) x##name'
149
150 'char* s = f(R"(one
151 two
152 three)");
153 '
154
155 'char* s = f(R"zzXX(hi
156 world
157 )zzX" (not the end)
158 )zzXX");
159 '
160
161 'char* unclosed = f(R"zzXX(hi
162 world
163 )oops");
164 '
165)
166
167readonly -a SHELL_TESTS=(
168 "echo $'multi \\n
169 sq \\' line'"
170
171 # Quoted backslash
172 "echo hi \\' there"
173
174 'echo one#two'
175 'echo $(( 16#ff ))'
176
177 '# comment'
178 '### comment'
179
180 'echo one # comment'
181
182 'cat <<EOF
183hello $world
184EOF'
185
186 'cat <<- "EOF"
187$3.99
188EOF '
189
190 'cat <<- \_ACAWK
191$3.99
192more
193_ACAWK
194echo yo'
195
196 'echo multiple << EOF1 << EOF2 > out
197one
198EOF1
199...
200two
201EOF2
202echo done'
203)
204
205readonly -a R_TESTS=(
206 'f() # hello'
207 'x = f("1
208 2 \"quote\"
209 3")'
210
211 "x = f('1
212 2
213 3')"
214)
215
216run-cases() {
217 local lang=$1
218 shift
219
220 local bin=$BASE_DIR/micro_syntax
221
222 for s in "$@"; do
223 echo "==== $s"
224 echo "$s" | $bin -l $lang
225 echo
226 done
227}
228
229test-shell() {
230 build # TODO: use Ninja
231 run-cases shell "${SHELL_TESTS[@]}"
232}
233
234test-cpp() {
235 build
236 run-cases cpp "${CPP_TESTS[@]}"
237}
238
239test-py() {
240 build
241 run-cases py "${PY_TESTS[@]}"
242}
243
244test-R() {
245 build
246 run-cases R "${R_TESTS[@]}"
247}
248
249run-tests() {
250 local bin=$BASE_DIR/micro_syntax
251
252 build
253
254 run-cases shell "${SHELL_TESTS[@]}"
255 run-cases cpp "${CPP_TESTS[@]}"
256 run-cases py "${PY_TESTS[@]}"
257 run-cases R "${R_TESTS[@]}"
258
259 # No language specified
260 echo '==== No language'
261 head $0 | $bin
262 echo
263
264 echo '/dev/null'
265 $bin < /dev/null
266}
267
268cpp-self() {
269 build
270 cat doctools/micro_syntax.{re2c.h,cc} | $BASE_DIR/micro_syntax -l cpp | less -r
271}
272
273sh-self() {
274 build
275 #$BASE_DIR/micro_syntax -l shell < doctools/micro_syntax.sh | less -r
276
277 $BASE_DIR/micro_syntax -l shell doctools/micro-syntax.sh
278}
279
280lexer-def() {
281 ### Test on a hard Python file
282
283 build
284 $BASE_DIR/micro_syntax -l py < frontend/lexer_def.py | less -r
285}
286
287git-comp() {
288 ### Test on a hard shell file
289
290 # Exposes nested double quote issue
291 build
292 $BASE_DIR/micro_syntax -l shell < testdata/completion/git | less -r
293}
294
295mycpp-runtime() {
296 build
297 cat mycpp/gc_str.* | $BASE_DIR/micro_syntax -l cpp | less -r
298}
299
300count() {
301 wc -l doctools/micro_syntax*
302 echo
303 wc -l $BASE_DIR/*.h
304 echo
305 ls -l --si -h $BASE_DIR
306}
307
308test-usage() {
309 build
310
311 # help
312 $BASE_DIR/micro_syntax -h
313
314 echo 'ANSI'
315 echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell
316 echo
317
318 echo 'WEB'
319 echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell -w
320 echo
321
322 set -x
323 echo 'TSV'
324 echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell -t
325
326 echo
327 echo
328 echo '"dq"' | $BASE_DIR/micro_syntax -l shell
329
330 $BASE_DIR/micro_syntax -l shell configure | wc -l
331
332 # TODO: need a nicer pattern for this test
333 set +o errexit
334 $BASE_DIR/micro_syntax -l shell _nonexistent_ZZ
335 local status=$?
336 if test $status -ne 1; then
337 die 'Expected status 1'
338 fi
339 set -o errexit
340}
341
342soil-run() {
343 test-usage
344 echo
345
346 run-tests
347}
348
349### Shell Tests
350
351here-doc-syntax() {
352 ### Test here doc syntax with $0 sh-self
353
354 echo 42 > _tmp/42.txt
355
356 # _tmp/42 and - are arguments to cat! Vim doesn't understand
357 # and >_tmp/here.txt is not part of the here doc
358
359 cat <<EOF _tmp/42.txt - >_tmp/here.txt
360x
361short
362hello there
363EOF
364
365 cat _tmp/here.txt
366}
367
368"$@"
369