| 1 | #!/usr/bin/env bash
 | 
| 2 | #
 | 
| 3 | # Quick test for a potential rewrite of mycpp.
 | 
| 4 | #
 | 
| 5 | # Usage:
 | 
| 6 | #   pea/TEST.sh <function name>
 | 
| 7 | 
 | 
| 8 | : ${LIB_OSH=stdlib/osh}
 | 
| 9 | source $LIB_OSH/bash-strict.sh
 | 
| 10 | source $LIB_OSH/no-quotes.sh
 | 
| 11 | 
 | 
| 12 | source test/common.sh  # run-test-funcs
 | 
| 13 | source devtools/common.sh
 | 
| 14 | 
 | 
| 15 | source build/dev-shell.sh  # find python3 in /wedge PATH component
 | 
| 16 | 
 | 
| 17 | # This is just like the yapf problem in devtools/format.sh !
 | 
| 18 | # Pea needs a newer version of MyPy -- one that supports 'math'
 | 
| 19 | unset PYTHONPATH
 | 
| 20 | export PYTHONPATH=.
 | 
| 21 | 
 | 
| 22 | readonly MYPY_VENV='_tmp/mypy-venv'
 | 
| 23 | 
 | 
| 24 | install-mypy() {
 | 
| 25 |   local venv=$MYPY_VENV
 | 
| 26 | 
 | 
| 27 |   rm -r -f -v $venv
 | 
| 28 | 
 | 
| 29 |   python3 -m venv $venv
 | 
| 30 | 
 | 
| 31 |   . $venv/bin/activate
 | 
| 32 | 
 | 
| 33 |   python3 -m pip install mypy
 | 
| 34 | 
 | 
| 35 |   # Says 1.5.1 (compiled: yes)
 | 
| 36 |   mypy-version
 | 
| 37 | }
 | 
| 38 | 
 | 
| 39 | mypy-version() {
 | 
| 40 |   . $MYPY_VENV/bin/activate
 | 
| 41 |   python3 -m mypy --version
 | 
| 42 | }
 | 
| 43 | 
 | 
| 44 | #
 | 
| 45 | # Run Pea
 | 
| 46 | #
 | 
| 47 | 
 | 
| 48 | pea-main() {
 | 
| 49 |   pea/pea_main.py "$@"
 | 
| 50 | }
 | 
| 51 | 
 | 
| 52 | parse-one() {
 | 
| 53 |   pea-main parse "$@"
 | 
| 54 | }
 | 
| 55 | 
 | 
| 56 | translate-cpp() {
 | 
| 57 |   ### Used by mycpp/NINJA-steps.sh
 | 
| 58 | 
 | 
| 59 |   pea-main cpp "$@"
 | 
| 60 | }
 | 
| 61 | 
 | 
| 62 | all-files() {
 | 
| 63 |   # Can't run this on Soil because we only have build/py.sh py-source, not
 | 
| 64 |   # 'minimal'
 | 
| 65 | 
 | 
| 66 |   # Update this file with build/dynamic-deps.sh pea-hack
 | 
| 67 | 
 | 
| 68 |   cat pea/oils-typecheck.txt
 | 
| 69 | 
 | 
| 70 |   for path in */*.pyi; do
 | 
| 71 |     echo $path
 | 
| 72 |   done
 | 
| 73 | }
 | 
| 74 | 
 | 
| 75 | parse-all() {
 | 
| 76 |   time all-files | xargs --verbose -- $0 pea-main parse
 | 
| 77 | }
 | 
| 78 | 
 | 
| 79 | # Good illustration of "distributing your overhead"
 | 
| 80 | #
 | 
| 81 | # Total work goes up, while latency goes down.  To a point.  Then it goes back
 | 
| 82 | # up.
 | 
| 83 | 
 | 
| 84 | # batch size 30
 | 
| 85 | # 
 | 
| 86 | # real    0m0.342s
 | 
| 87 | # user    0m0.735s
 | 
| 88 | # sys     0m0.059s
 | 
| 89 | # 
 | 
| 90 | # batch size 20
 | 
| 91 | # 
 | 
| 92 | # real    0m0.305s
 | 
| 93 | # user    0m0.993s
 | 
| 94 | # sys     0m0.081s
 | 
| 95 | # 
 | 
| 96 | # batch size 15
 | 
| 97 | # 
 | 
| 98 | # real    0m0.299s
 | 
| 99 | # user    0m1.110s
 | 
| 100 | # sys     0m0.123s
 | 
| 101 | # 
 | 
| 102 | # batch size 10
 | 
| 103 | # 
 | 
| 104 | # real    0m0.272s
 | 
| 105 | # user    0m1.362s
 | 
| 106 | # sys     0m0.145s
 | 
| 107 | 
 | 
| 108 | batch-size() {
 | 
| 109 |   local num_files=$1
 | 
| 110 | 
 | 
| 111 |   local num_procs
 | 
| 112 |   num_procs=$(nproc)
 | 
| 113 | 
 | 
| 114 |   # Use (p-1) as a fudge so we don't end up more batches than processors
 | 
| 115 |   local files_per_process=$(( num_files / (num_procs - 1) ))
 | 
| 116 | 
 | 
| 117 |   echo "$num_procs $files_per_process"
 | 
| 118 | }
 | 
| 119 | 
 | 
| 120 | demo-par() {
 | 
| 121 |   ### Demo parallelism of Python processes
 | 
| 122 | 
 | 
| 123 |   local files
 | 
| 124 |   num_files=$(all-files | wc -l)
 | 
| 125 | 
 | 
| 126 |   # 103 files
 | 
| 127 | 
 | 
| 128 |   shopt -s lastpipe
 | 
| 129 |   batch-size $num_files | read num_procs optimal
 | 
| 130 | 
 | 
| 131 |   echo "Parsing $num_files files with $num_procs parallel processes"
 | 
| 132 |   echo "Optimal batch size is $optimal"
 | 
| 133 | 
 | 
| 134 |   echo
 | 
| 135 | 
 | 
| 136 |   echo 'All at once:'
 | 
| 137 |   time parse-all > /dev/null 2>&1
 | 
| 138 |   echo
 | 
| 139 | 
 | 
| 140 |   # 5 is meant to be suboptimal
 | 
| 141 |   for n in 50 30 20 10 5 $optimal; do
 | 
| 142 |     echo "batch size $n"
 | 
| 143 |     time all-files | xargs --verbose -P $num_procs -n $n -- \
 | 
| 144 |       $0 parse-one > /dev/null 2>&1
 | 
| 145 |     echo
 | 
| 146 |   done
 | 
| 147 | }
 | 
| 148 | 
 | 
| 149 | # - 0.40 secs to parse
 | 
| 150 | # - 0.56 secs pickle, so that's 160 ms
 | 
| 151 | # Then
 | 
| 152 | #
 | 
| 153 | # - 0.39 secs load pickle
 | 
| 154 | #
 | 
| 155 | # That's definitely slower than I want.  It's 6.6 MB of data.
 | 
| 156 | #
 | 
| 157 | # So 
 | 
| 158 | # - parallel parsing can be done in <300 ms
 | 
| 159 | # - parallel pickling
 | 
| 160 | # - serial unpickling (reduce) in 390 ms
 | 
| 161 | #
 | 
| 162 | # So now we're at ~700 ms or so.  Can we type check in 300 ms in pure Python?
 | 
| 163 | #
 | 
| 164 | # What if we compress the generated ASDL?  Those are very repetitive.
 | 
| 165 | 
 | 
| 166 | # Problem statement:
 | 
| 167 | 
 | 
| 168 | _serial-pickle() {
 | 
| 169 |   mkdir -p _tmp
 | 
| 170 |   local tmp=_tmp/serial
 | 
| 171 | 
 | 
| 172 |   time all-files | xargs --verbose -- $0 pea-main dump-pickles > $tmp
 | 
| 173 | 
 | 
| 174 |   ls -l -h $tmp
 | 
| 175 | 
 | 
| 176 |   echo 'loading'
 | 
| 177 |   time pea-main load-pickles < $tmp
 | 
| 178 | }
 | 
| 179 | 
 | 
| 180 | # 1.07 seconds
 | 
| 181 | serial-pickle() { time $0 _serial-pickle; }
 | 
| 182 | 
 | 
| 183 | pickle-one() {
 | 
| 184 |   pea-main dump-pickles "$@" > _tmp/p/$$
 | 
| 185 | }
 | 
| 186 | 
 | 
| 187 | _par-pickle() {
 | 
| 188 |   local files
 | 
| 189 |   num_files=$(all-files | wc -l)
 | 
| 190 | 
 | 
| 191 |   shopt -s lastpipe
 | 
| 192 |   batch-size $num_files | read num_procs optimal
 | 
| 193 | 
 | 
| 194 |   local dir=_tmp/p
 | 
| 195 |   rm -r -f -v $dir
 | 
| 196 |   mkdir -p $dir
 | 
| 197 | 
 | 
| 198 |   time all-files | xargs --verbose -P $num_procs -n $optimal -- $0 pickle-one
 | 
| 199 | 
 | 
| 200 |   ls -l -h $dir
 | 
| 201 | 
 | 
| 202 |   # This takes 410-430 ms?  Wow that's slow.
 | 
| 203 |   time cat $dir/* | pea-main load-pickles
 | 
| 204 | }
 | 
| 205 | 
 | 
| 206 | # Can get this down to ~700 ms
 | 
| 207 | #
 | 
| 208 | # Note parsing serially in a single process is 410 ms !!!  So this is NOT a win
 | 
| 209 | # unless we have more work besides parsing to parallelize.
 | 
| 210 | # 
 | 
| 211 | # We can extract constants and forward declarations in parallel I suppose.
 | 
| 212 | #
 | 
| 213 | # BUT immutable string constants have to be de-duplciated!  Though I guess that
 | 
| 214 | # is a natural 'reduce' step.
 | 
| 215 | #
 | 
| 216 | # And we can even do implementation and prototypes in parallel too?
 | 
| 217 | #
 | 
| 218 | # I think the entire algorithm can be OPTIMISTIC without serialized type
 | 
| 219 | # checking?
 | 
| 220 | #
 | 
| 221 | # I think 
 | 
| 222 | #
 | 
| 223 | # a = 5
 | 
| 224 | # b = a  # do not know the type without a global algorithm
 | 
| 225 | #
 | 
| 226 | # Or I guess you can do type checking within a function.  Functions require
 | 
| 227 | # signatures.  So yes let's do that in parallel.
 | 
| 228 | #
 | 
| 229 | # --
 | 
| 230 | #
 | 
| 231 | # The ideal way to do this would be to split Oils up into MODULES, like
 | 
| 232 | #
 | 
| 233 | # _debuild/
 | 
| 234 | # builtin/
 | 
| 235 | # core/
 | 
| 236 | # data_lang/
 | 
| 237 | # frontend/
 | 
| 238 | # osh/
 | 
| 239 | # ysh/
 | 
| 240 | # Smaller: pgen2/ pylib/ tools/
 | 
| 241 | #
 | 
| 242 | # And modules are acyclic, and can compile on their own with dependencies.  If
 | 
| 243 | # you pick random .py files and spit out header files, I think they won't compile.
 | 
| 244 | # The forward declarations and constants will work, but the prototype won't.
 | 
| 245 | 
 | 
| 246 | par-pickle() { time $0 _par-pickle; }
 | 
| 247 | 
 | 
| 248 | sum1() {
 | 
| 249 |   awk '{ sum += $1 } END { print sum }'
 | 
| 250 | }
 | 
| 251 | 
 | 
| 252 | sum-sizes() {
 | 
| 253 |   xargs -I {} -- find {} -printf '%s %p\n' | sum1
 | 
| 254 | }
 | 
| 255 | 
 | 
| 256 | size-ratio() {
 | 
| 257 |   # all-files
 | 
| 258 |   # echo _tmp/p/*
 | 
| 259 | 
 | 
| 260 |   # 1.96 MB of source code
 | 
| 261 |   all-files | sum-sizes
 | 
| 262 | 
 | 
| 263 |   # 7.13 MB of pickle files
 | 
| 264 |   # Weirdly echo _tmp/p/* doesn't work here
 | 
| 265 |   for f in _tmp/p/*; do echo $f; done | sum-sizes
 | 
| 266 | }
 | 
| 267 | 
 | 
| 268 | # Only 47 ms!
 | 
| 269 | # I want the overhead to be less than 1 second:
 | 
| 270 | #   1. parallel parsing + pickle
 | 
| 271 | #   2. serial unpickle + type check
 | 
| 272 | #   3. starting the process
 | 
| 273 | #
 | 
| 274 | # So unpickling is slow.
 | 
| 275 | 
 | 
| 276 | osh-overhead() {
 | 
| 277 |   time bin/osh -c 'echo hi'
 | 
| 278 | }
 | 
| 279 | 
 | 
| 280 | 
 | 
| 281 | # MyPy dev version takes 10.2 seconds the first time (without their mypyc
 | 
| 282 | # speedups)
 | 
| 283 | #
 | 
| 284 | # 0.150 seconds the second time, WITHOUT code changes
 | 
| 285 | # 0.136 seconds
 | 
| 286 | 
 | 
| 287 | # 4.1 seconds: whitespace change
 | 
| 288 | # 3.9 seconds: again, and this is on my fast hoover machine
 | 
| 289 | 
 | 
| 290 | # 5.0 seconds - Invalid type!
 | 
| 291 | # 4.9 seconds - again invalid
 | 
| 292 | 
 | 
| 293 | 
 | 
| 294 | mypy-compare() {
 | 
| 295 |   devtools/types.sh check-oils
 | 
| 296 | }
 | 
| 297 | 
 | 
| 298 | check-types() {
 | 
| 299 | 
 | 
| 300 |   # install-mypy creates this.  May not be present in CI machine.
 | 
| 301 |   local activate=$MYPY_VENV/bin/activate
 | 
| 302 |   if test -f $activate; then
 | 
| 303 |     . $activate
 | 
| 304 |   fi
 | 
| 305 | 
 | 
| 306 |   time python3 -m mypy --strict pea/pea_main.py
 | 
| 307 | }
 | 
| 308 | 
 | 
| 309 | test-translate() {
 | 
| 310 |   translate-cpp bin/oils_for_unix.py
 | 
| 311 | }
 | 
| 312 | 
 | 
| 313 | test-syntax-error() {
 | 
| 314 |   set +o errexit
 | 
| 315 | 
 | 
| 316 |   # error in Python syntax
 | 
| 317 |   parse-one pea/testdata/py_err.py
 | 
| 318 |   nq-assert $? -eq 1
 | 
| 319 | 
 | 
| 320 |   # error in signature
 | 
| 321 |   parse-one pea/testdata/sig_err.py
 | 
| 322 |   nq-assert $? -eq 1
 | 
| 323 | 
 | 
| 324 |   # error in assignment
 | 
| 325 |   parse-one pea/testdata/assign_err.py
 | 
| 326 |   nq-assert $? -eq 1
 | 
| 327 | }
 | 
| 328 | 
 | 
| 329 | run-tests() {
 | 
| 330 |   # Making this separate for soil/worker.sh
 | 
| 331 | 
 | 
| 332 |   echo 'Running test functions'
 | 
| 333 |   run-test-funcs
 | 
| 334 | }
 | 
| 335 | 
 | 
| 336 | "$@"
 |