| 1 | #!/usr/bin/env bash
 | 
| 2 | #
 | 
| 3 | # Lexing / Parsing experiment
 | 
| 4 | #
 | 
| 5 | # Usage:
 | 
| 6 | #   doctools/micro-syntax.sh <function name>
 | 
| 7 | 
 | 
| 8 | # TODO:
 | 
| 9 | # - Rename to micro-syntax, from micro-grammars and uchex?
 | 
| 10 | #   - micro-segmenting and lexing - comments, strings, and maybe { }
 | 
| 11 | #   - micro-parsing: for indent/dedent
 | 
| 12 | #
 | 
| 13 | # - use GNU long flags, test them
 | 
| 14 | 
 | 
| 15 | # C++
 | 
| 16 | #
 | 
| 17 | # - ANSI should cat all argv, and it should print line numbers
 | 
| 18 | # - HTML string can append with with netstrings!
 | 
| 19 | #   - (path, html, path, html, ...) should be sufficient, though not fully general
 | 
| 20 | #   - print SLOC at the top
 | 
| 21 | # - COALESCE tokens to save space
 | 
| 22 | 
 | 
| 23 | # Then src-tree reads this stream
 | 
| 24 | # - actually it can take the filenames directly from here
 | 
| 25 | #   - it can discard the big HTML!
 | 
| 26 | 
 | 
| 27 | # Later: port some kind of parser combinator for
 | 
| 28 | # - def class, etc.
 | 
| 29 | 
 | 
| 30 | set -o nounset
 | 
| 31 | set -o pipefail
 | 
| 32 | set -o errexit
 | 
| 33 | 
 | 
| 34 | REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)  # tsv-lib.sh uses this
 | 
| 35 | 
 | 
| 36 | #source build/dev-shell.sh  # 're2c' in path
 | 
| 37 | source build/ninja-rules-cpp.sh
 | 
| 38 | 
 | 
| 39 | my-re2c() {
 | 
| 40 |   local in=$1
 | 
| 41 |   local out=$2
 | 
| 42 | 
 | 
| 43 |   # Copied from build/py.sh, and added --tags
 | 
| 44 |   re2c --tags -W -Wno-match-empty-string -Werror -o $out $in
 | 
| 45 | }
 | 
| 46 | 
 | 
| 47 | readonly BASE_DIR=_tmp/micro-syntax
 | 
| 48 | 
 | 
| 49 | build() {
 | 
| 50 |   local variant=${1:-asan}
 | 
| 51 | 
 | 
| 52 |   case $variant in
 | 
| 53 |     asan)
 | 
| 54 |       cxxflags='-O0 -fsanitize=address'
 | 
| 55 |       ;;
 | 
| 56 |     opt)
 | 
| 57 |       cxxflags='-O2'
 | 
| 58 |       ;;
 | 
| 59 |     *)
 | 
| 60 |       die "Invalid variant $variant"
 | 
| 61 |       ;;
 | 
| 62 |   esac
 | 
| 63 | 
 | 
| 64 |   mkdir -p $BASE_DIR
 | 
| 65 | 
 | 
| 66 |   local cc=doctools/micro_syntax.cc
 | 
| 67 |   local h=$BASE_DIR/micro_syntax.h
 | 
| 68 |   local bin=$BASE_DIR/micro_syntax
 | 
| 69 | 
 | 
| 70 |   my-re2c doctools/micro_syntax.re2c.h $h
 | 
| 71 | 
 | 
| 72 |   # Note: with cc, you need gnu99 instead of c99 for fdopen() and getline()
 | 
| 73 | 
 | 
| 74 |   # g++ - otherwise virtual functions don't work!
 | 
| 75 | 
 | 
| 76 |   set -o xtrace
 | 
| 77 |   g++ -std=c++11 -Wall -I $BASE_DIR $cxxflags \
 | 
| 78 |     -o $bin $cc
 | 
| 79 |   set +o xtrace
 | 
| 80 | 
 | 
| 81 |   strip -o $bin.stripped $bin
 | 
| 82 | 
 | 
| 83 |   log "  CXX $cc"
 | 
| 84 | 
 | 
| 85 | }
 | 
| 86 | 
 | 
| 87 | readonly -a PY_TESTS=(
 | 
| 88 |     'abc' '""'
 | 
| 89 |     '"dq \" backslash \\"' '"missing ' 
 | 
| 90 |     "'sq \\' backslash \\\\'" 
 | 
| 91 |     '"line\n"' '"quote \" backslash \\ "' 
 | 
| 92 |     '"\n"' 
 | 
| 93 |     'hi # comment' 
 | 
| 94 |     '"hi"  # comment'
 | 
| 95 |     '(r"raw dq")'
 | 
| 96 |     "(r'raw \\' sq')"
 | 
| 97 | 
 | 
| 98 | ' "L1"  # first
 | 
| 99 |   L2 # second' 
 | 
| 100 | 
 | 
| 101 | ' def f():
 | 
| 102 |     """docstring
 | 
| 103 |     with "quote"
 | 
| 104 |     """
 | 
| 105 |     pass'
 | 
| 106 | 
 | 
| 107 | " def f():
 | 
| 108 |     '''docstring
 | 
| 109 |     with 'quote'
 | 
| 110 |     '''
 | 
| 111 |     pass"
 | 
| 112 | 
 | 
| 113 |     " print(r'''hello''')"
 | 
| 114 |     ' print(r"""hi there""")'
 | 
| 115 | 
 | 
| 116 |   '"hi"  # comment'
 | 
| 117 | )
 | 
| 118 | 
 | 
| 119 | readonly -a CPP_TESTS=(
 | 
| 120 |   '#if 0'
 | 
| 121 |   'not prepreproc #ifdef 0'
 | 
| 122 |   "// comment can't "
 | 
| 123 |   "f(); // comment isn't "
 | 
| 124 | 
 | 
| 125 |   # Char literal in C
 | 
| 126 |   "'\\''"
 | 
| 127 | 
 | 
| 128 |   'void f(); /* multi-line
 | 
| 129 |                 comment
 | 
| 130 |              */
 | 
| 131 |   void g(int x);'
 | 
| 132 | 
 | 
| 133 |   '#include "foo.h"'
 | 
| 134 |   '#include <foo.h> // comment'
 | 
| 135 | 
 | 
| 136 |   '#define X 3  // comment
 | 
| 137 |    int g();'
 | 
| 138 | 
 | 
| 139 |   '// hello
 | 
| 140 |    #include <stdio.h>
 | 
| 141 |    #define SUM(x, y) \
 | 
| 142 |       (x) + \
 | 
| 143 |       (y)      // comment
 | 
| 144 |    void f();'
 | 
| 145 | 
 | 
| 146 |   '#undef x'
 | 
| 147 | 
 | 
| 148 |   '#define F(x) x##name'
 | 
| 149 | 
 | 
| 150 |   'char* s = f(R"(one
 | 
| 151 |   two
 | 
| 152 |   three)");
 | 
| 153 |   '
 | 
| 154 | 
 | 
| 155 |   'char* s = f(R"zzXX(hi
 | 
| 156 |   world
 | 
| 157 |   )zzX"  (not the end)
 | 
| 158 |   )zzXX");
 | 
| 159 |   '
 | 
| 160 | 
 | 
| 161 |   'char* unclosed = f(R"zzXX(hi
 | 
| 162 |   world
 | 
| 163 |   )oops");
 | 
| 164 |   '
 | 
| 165 | )
 | 
| 166 | 
 | 
| 167 | readonly -a SHELL_TESTS=(
 | 
| 168 |   "echo $'multi \\n
 | 
| 169 |      sq \\' line'"
 | 
| 170 | 
 | 
| 171 |   # Quoted backslash
 | 
| 172 |   "echo hi \\' there"
 | 
| 173 | 
 | 
| 174 |   'echo one#two'
 | 
| 175 |   'echo $(( 16#ff ))'
 | 
| 176 | 
 | 
| 177 |   '# comment'
 | 
| 178 |   '### comment'
 | 
| 179 | 
 | 
| 180 |   'echo one # comment'
 | 
| 181 | 
 | 
| 182 |   'cat <<EOF
 | 
| 183 | hello $world
 | 
| 184 | EOF'
 | 
| 185 | 
 | 
| 186 |   'cat <<- "EOF"
 | 
| 187 | $3.99
 | 
| 188 | EOF '
 | 
| 189 | 
 | 
| 190 |   'cat <<- \_ACAWK
 | 
| 191 | $3.99
 | 
| 192 | more
 | 
| 193 | _ACAWK 
 | 
| 194 | echo yo'
 | 
| 195 | 
 | 
| 196 |   'echo multiple << EOF1 << EOF2 > out
 | 
| 197 | one
 | 
| 198 | EOF1
 | 
| 199 | ...
 | 
| 200 | two
 | 
| 201 | EOF2
 | 
| 202 | echo done'
 | 
| 203 | )
 | 
| 204 | 
 | 
| 205 | readonly -a R_TESTS=(
 | 
| 206 |   'f() # hello'
 | 
| 207 |   'x = f("1
 | 
| 208 |   2 \"quote\"
 | 
| 209 |   3")'
 | 
| 210 | 
 | 
| 211 |   "x = f('1
 | 
| 212 |   2
 | 
| 213 |   3')"
 | 
| 214 | )
 | 
| 215 | 
 | 
| 216 | run-cases() {
 | 
| 217 |   local lang=$1
 | 
| 218 |   shift
 | 
| 219 | 
 | 
| 220 |   local bin=$BASE_DIR/micro_syntax
 | 
| 221 | 
 | 
| 222 |   for s in "$@"; do
 | 
| 223 |     echo "==== $s"
 | 
| 224 |     echo "$s" | $bin -l $lang
 | 
| 225 |     echo
 | 
| 226 |   done
 | 
| 227 | }
 | 
| 228 | 
 | 
| 229 | test-shell() {
 | 
| 230 |   build  # TODO: use Ninja
 | 
| 231 |   run-cases shell "${SHELL_TESTS[@]}"
 | 
| 232 | }
 | 
| 233 | 
 | 
| 234 | test-cpp() {
 | 
| 235 |   build
 | 
| 236 |   run-cases cpp "${CPP_TESTS[@]}"
 | 
| 237 | }
 | 
| 238 | 
 | 
| 239 | test-py() {
 | 
| 240 |   build
 | 
| 241 |   run-cases py "${PY_TESTS[@]}"
 | 
| 242 | }
 | 
| 243 | 
 | 
| 244 | test-R() {
 | 
| 245 |   build
 | 
| 246 |   run-cases R "${R_TESTS[@]}"
 | 
| 247 | }
 | 
| 248 | 
 | 
| 249 | run-tests() {
 | 
| 250 |   local bin=$BASE_DIR/micro_syntax
 | 
| 251 | 
 | 
| 252 |   build
 | 
| 253 | 
 | 
| 254 |   run-cases shell "${SHELL_TESTS[@]}"
 | 
| 255 |   run-cases cpp "${CPP_TESTS[@]}"
 | 
| 256 |   run-cases py "${PY_TESTS[@]}"
 | 
| 257 |   run-cases R "${R_TESTS[@]}"
 | 
| 258 | 
 | 
| 259 |   # No language specified
 | 
| 260 |   echo '==== No language'
 | 
| 261 |   head $0 | $bin
 | 
| 262 |   echo
 | 
| 263 | 
 | 
| 264 |   echo '/dev/null'
 | 
| 265 |   $bin < /dev/null
 | 
| 266 | }
 | 
| 267 | 
 | 
| 268 | cpp-self() {
 | 
| 269 |   build
 | 
| 270 |   cat doctools/micro_syntax.{re2c.h,cc} | $BASE_DIR/micro_syntax -l cpp  | less -r
 | 
| 271 | }
 | 
| 272 | 
 | 
| 273 | sh-self() {
 | 
| 274 |   build
 | 
| 275 |   #$BASE_DIR/micro_syntax -l shell < doctools/micro_syntax.sh | less -r
 | 
| 276 | 
 | 
| 277 |   $BASE_DIR/micro_syntax -l shell doctools/micro-syntax.sh
 | 
| 278 | }
 | 
| 279 | 
 | 
| 280 | lexer-def() {
 | 
| 281 |   ### Test on a hard Python file
 | 
| 282 | 
 | 
| 283 |   build
 | 
| 284 |   $BASE_DIR/micro_syntax -l py < frontend/lexer_def.py | less -r
 | 
| 285 | }
 | 
| 286 | 
 | 
| 287 | git-comp() {
 | 
| 288 |   ### Test on a hard shell file
 | 
| 289 | 
 | 
| 290 |   # Exposes nested double quote issue
 | 
| 291 |   build
 | 
| 292 |   $BASE_DIR/micro_syntax -l shell < testdata/completion/git | less -r
 | 
| 293 | }
 | 
| 294 | 
 | 
| 295 | mycpp-runtime() {
 | 
| 296 |   build
 | 
| 297 |   cat mycpp/gc_str.* | $BASE_DIR/micro_syntax -l cpp | less -r
 | 
| 298 | }
 | 
| 299 | 
 | 
| 300 | count() {
 | 
| 301 |   wc -l doctools/micro_syntax* 
 | 
| 302 |   echo
 | 
| 303 |   wc -l $BASE_DIR/*.h
 | 
| 304 |   echo
 | 
| 305 |   ls -l --si -h $BASE_DIR
 | 
| 306 | }
 | 
| 307 | 
 | 
| 308 | test-usage() {
 | 
| 309 |   build
 | 
| 310 | 
 | 
| 311 |   # help
 | 
| 312 |   $BASE_DIR/micro_syntax -h
 | 
| 313 | 
 | 
| 314 |   echo 'ANSI'
 | 
| 315 |   echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell
 | 
| 316 |   echo
 | 
| 317 | 
 | 
| 318 |   echo 'WEB'
 | 
| 319 |   echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell -w
 | 
| 320 |   echo
 | 
| 321 | 
 | 
| 322 |   set -x
 | 
| 323 |   echo 'TSV'
 | 
| 324 |   echo 'echo "hi $name"' | $BASE_DIR/micro_syntax -l shell -t
 | 
| 325 | 
 | 
| 326 |   echo
 | 
| 327 |   echo
 | 
| 328 |   echo '"dq"' | $BASE_DIR/micro_syntax -l shell 
 | 
| 329 | 
 | 
| 330 |   $BASE_DIR/micro_syntax -l shell  configure | wc -l
 | 
| 331 | 
 | 
| 332 |   # TODO: need a nicer pattern for this test
 | 
| 333 |   set +o errexit
 | 
| 334 |   $BASE_DIR/micro_syntax -l shell  _nonexistent_ZZ
 | 
| 335 |   local status=$?
 | 
| 336 |   if test $status -ne 1; then
 | 
| 337 |     die 'Expected status 1'
 | 
| 338 |   fi
 | 
| 339 |   set -o errexit
 | 
| 340 | }
 | 
| 341 | 
 | 
| 342 | soil-run() {
 | 
| 343 |   test-usage
 | 
| 344 |   echo
 | 
| 345 | 
 | 
| 346 |   run-tests 
 | 
| 347 | }
 | 
| 348 | 
 | 
| 349 | ### Shell Tests
 | 
| 350 | 
 | 
| 351 | here-doc-syntax() {
 | 
| 352 |   ### Test here doc syntax with $0 sh-self
 | 
| 353 | 
 | 
| 354 |   echo 42 > _tmp/42.txt
 | 
| 355 | 
 | 
| 356 |   # _tmp/42 and - are arguments to cat!  Vim doesn't understand
 | 
| 357 |   # and >_tmp/here.txt is not part of the here doc
 | 
| 358 | 
 | 
| 359 |   cat <<EOF _tmp/42.txt - >_tmp/here.txt
 | 
| 360 | x
 | 
| 361 | short
 | 
| 362 | hello there
 | 
| 363 | EOF
 | 
| 364 | 
 | 
| 365 |   cat _tmp/here.txt
 | 
| 366 | }
 | 
| 367 | 
 | 
| 368 | "$@"
 | 
| 369 | 
 |