| 1 | #!/usr/bin/env bash
 | 
| 2 | #
 | 
| 3 | # Run the osh parser on shell scripts found in the wild.
 | 
| 4 | #
 | 
| 5 | # Usage:
 | 
| 6 | #   test/wild.sh <function name>
 | 
| 7 | #
 | 
| 8 | # Examples:
 | 
| 9 | #   test/wild.sh all
 | 
| 10 | #   test/wild.sh all '^oil'  # subset
 | 
| 11 | 
 | 
| 12 | # TODO:
 | 
| 13 | # - Add more scripts, like gentoo package defs
 | 
| 14 | 
 | 
| 15 | set -o nounset
 | 
| 16 | set -o pipefail
 | 
| 17 | set -o errexit
 | 
| 18 | shopt -s strict:all 2>/dev/null || true  # dogfood for OSH
 | 
| 19 | 
 | 
| 20 | source test/common.sh  # export-osh-cpp
 | 
| 21 | 
 | 
| 22 | # This persists across build/clean.sh
 | 
| 23 | readonly DEPS_WILD_DIR=../oil_DEPS/wild
 | 
| 24 | 
 | 
| 25 | 
 | 
| 26 | #
 | 
| 27 | # Helpers
 | 
| 28 | #
 | 
| 29 | 
 | 
| 30 | _manifest() {
 | 
| 31 |   local proj=$1
 | 
| 32 |   local base_dir=$2
 | 
| 33 |   shift 2
 | 
| 34 | 
 | 
| 35 |   for path in "$@"; do
 | 
| 36 |     echo $proj $base_dir/$path $path
 | 
| 37 |   done
 | 
| 38 | }
 | 
| 39 | 
 | 
| 40 | # generic helper
 | 
| 41 | _sh-manifest() {
 | 
| 42 |   local base_dir=$1
 | 
| 43 |   local category=${2:-}
 | 
| 44 | 
 | 
| 45 |   local proj
 | 
| 46 |   if test -n "$category"; then
 | 
| 47 |     proj="$category/$(basename $base_dir)"
 | 
| 48 |   else
 | 
| 49 |     proj="$(basename $base_dir)"
 | 
| 50 |   fi
 | 
| 51 | 
 | 
| 52 |   _manifest $proj $base_dir \
 | 
| 53 |     $(find $base_dir -name '*.sh' -a -printf '%P\n')
 | 
| 54 | }
 | 
| 55 | 
 | 
| 56 | _configure-manifest() {
 | 
| 57 |   local base_dir=$1
 | 
| 58 |   shift
 | 
| 59 | 
 | 
| 60 |   local name=$(basename $base_dir)
 | 
| 61 |   _manifest ${name}__configure $base_dir \
 | 
| 62 |     $(find $src -name 'configure' -a -printf '%P\n')
 | 
| 63 | }
 | 
| 64 | 
 | 
| 65 | #
 | 
| 66 | # Special Case Corpora Using Explicit Globs
 | 
| 67 | #
 | 
| 68 | 
 | 
| 69 | # TODO: Where do we write the base dir?
 | 
| 70 | oil-sketch-manifest() {
 | 
| 71 |   local base_dir=~/git/oil-sketch
 | 
| 72 |   pushd $base_dir >/dev/null
 | 
| 73 |   for name in *.sh {awk,demo,make,misc,regex,tools}/*.sh; do
 | 
| 74 |     echo oil-sketch $base_dir/$name $name
 | 
| 75 |   done
 | 
| 76 |   popd >/dev/null
 | 
| 77 | }
 | 
| 78 | 
 | 
| 79 | oil-manifest() {
 | 
| 80 |   local base_dir=$PWD
 | 
| 81 |   for name in \
 | 
| 82 |     configure install *.sh {benchmarks,build,test,scripts,opy}/*.sh; do
 | 
| 83 |     echo oil $base_dir/$name $name
 | 
| 84 |   done
 | 
| 85 | }
 | 
| 86 | 
 | 
| 87 | readonly ABORIGINAL_DIR=~/src/aboriginal-1.4.5
 | 
| 88 | 
 | 
| 89 | #
 | 
| 90 | # All
 | 
| 91 | #
 | 
| 92 | 
 | 
| 93 | all-manifests() {
 | 
| 94 |   # Don't expose this repo for now
 | 
| 95 |   #oil-sketch-manifest
 | 
| 96 | 
 | 
| 97 |   #
 | 
| 98 |   # My Code
 | 
| 99 |   #
 | 
| 100 | 
 | 
| 101 |   oil-manifest
 | 
| 102 | 
 | 
| 103 |   local src
 | 
| 104 | 
 | 
| 105 |   #
 | 
| 106 |   # Books
 | 
| 107 |   #
 | 
| 108 |   src=~/git/wild/books/wicked_cool_shell_scripts_2e
 | 
| 109 |   _manifest "books/$(basename $src)" $src \
 | 
| 110 |     $(find $src \
 | 
| 111 |       \( -name .git -a -prune \) -o \
 | 
| 112 |       \( -type f -a -executable -a -printf '%P\n' \) )
 | 
| 113 | 
 | 
| 114 |   #
 | 
| 115 |   # Shell/Bash Frameworks/Collections
 | 
| 116 |   #
 | 
| 117 | 
 | 
| 118 |   src=~/git/other/bash-completion
 | 
| 119 |   _manifest "shell/$(basename $src)" $src \
 | 
| 120 |     $(find $src/completions -type f -a -printf 'completions/%P\n')
 | 
| 121 | 
 | 
| 122 |   # Bats bash test framework.  It appears to be fairly popular.
 | 
| 123 |   src=~/git/other/bats
 | 
| 124 |   _manifest "shell/$(basename $src)" $src \
 | 
| 125 |     $(find $src \
 | 
| 126 |       \( -wholename '*/libexec/*' -a -type f -a \
 | 
| 127 |          -executable -a -printf '%P\n' \) )
 | 
| 128 | 
 | 
| 129 |   # Bash debugger?
 | 
| 130 |   src=~/src/bashdb-4.4-0.92
 | 
| 131 |   _manifest shell/bashdb $src \
 | 
| 132 |     $(find $src -name '*.sh' -a -printf '%P\n')
 | 
| 133 | 
 | 
| 134 |   src=~/git/other/Bash-Snippets
 | 
| 135 |   _manifest "shell/$(basename $src)" $src \
 | 
| 136 |     $(find $src \
 | 
| 137 |       \( -name .git -a -prune \) -o \
 | 
| 138 |       \( -type f -a -executable -a -printf '%P\n' \) )
 | 
| 139 | 
 | 
| 140 |   # ASDF meta package/version manager.
 | 
| 141 |   # Note that the language-specific plugins are specified (as remote repos)
 | 
| 142 |   # here: https://github.com/asdf-vm/asdf-plugins/tree/master/plugins
 | 
| 143 |   # They # could be used for more tests.
 | 
| 144 | 
 | 
| 145 |   src=~/git/other/asdf
 | 
| 146 |   _manifest "shell/$(basename $src)" $src \
 | 
| 147 |     $(find $src \( -name '*.sh' -o -name '*.bash' \) -a -printf '%P\n' )
 | 
| 148 | 
 | 
| 149 |   src=~/git/other/scripts-to-rule-them-all
 | 
| 150 |   _manifest "shell/$(basename $src)" $src \
 | 
| 151 |     $(find $src \
 | 
| 152 |       \( -name .git -a -prune \) -o \
 | 
| 153 |       \( -type f -a -executable -a -printf '%P\n' \) )
 | 
| 154 | 
 | 
| 155 |   src=~/git/wild/shell/shflags
 | 
| 156 |   _manifest "shell/$(basename $src)" $src \
 | 
| 157 |     $(find $src \
 | 
| 158 |       '(' -name .git -a -prune ')' -o \
 | 
| 159 |       '(' -name '*.sh' -o \
 | 
| 160 |         -name shflags -o -name shlib -o -name shunit2 -o -name versions \
 | 
| 161 |       ')' -a -printf '%P\n')
 | 
| 162 | 
 | 
| 163 |   _sh-manifest ~/hg/wild/shell-script-library shell
 | 
| 164 |   _sh-manifest ~/git/other/modernish shell
 | 
| 165 |   _sh-manifest ~/git/other/posixcube shell
 | 
| 166 | 
 | 
| 167 |   # Shells themselves
 | 
| 168 |   _sh-manifest ~/git/languages/ast shell  # korn shell stuff
 | 
| 169 |   _sh-manifest ~/git/languages/mwc-sh shell
 | 
| 170 |   _sh-manifest ~/src/mksh shell
 | 
| 171 | 
 | 
| 172 |   #
 | 
| 173 |   # Linux Distros
 | 
| 174 |   #
 | 
| 175 | 
 | 
| 176 |   _sh-manifest ~/git/other/minimal distro
 | 
| 177 |   _sh-manifest ~/git/other/linuxkit distro
 | 
| 178 |   _sh-manifest ~/git/other/portage distro
 | 
| 179 |   _sh-manifest ~/git/wild/distro/woof-CE distro
 | 
| 180 |   _sh-manifest ~/git/wild/distro/crankshaft distro
 | 
| 181 | 
 | 
| 182 |   src=~/git/alpine/aports
 | 
| 183 |   _manifest distro/alpine-aports $src \
 | 
| 184 |     $(find $src -name APKBUILD -a -printf '%P\n')
 | 
| 185 | 
 | 
| 186 |   src=$ABORIGINAL_DIR
 | 
| 187 |   _manifest distro/aboriginal $src \
 | 
| 188 |     $(find $src -name '*.sh' -printf '%P\n')
 | 
| 189 | 
 | 
| 190 |   src=/etc/init.d
 | 
| 191 |   _manifest distro/initd $src \
 | 
| 192 |     $(find $src -type f -a -executable -a -printf '%P\n')
 | 
| 193 | 
 | 
| 194 |   src=/usr/bin
 | 
| 195 |   _manifest distro/usr-bin $src \
 | 
| 196 |     $(find $src -name '*.sh' -a -printf '%P\n')
 | 
| 197 | 
 | 
| 198 |   # Version 1.0.89 extracts to a version-less dir.
 | 
| 199 |   src=~/git/basis-build/_tmp/debootstrap
 | 
| 200 |   _manifest distro/debootstrap $src \
 | 
| 201 |     $(find $src '(' -name debootstrap -o -name functions ')' -a -printf '%P\n') \
 | 
| 202 |     $(find $src/scripts -type f -a -printf 'scripts/%P\n')
 | 
| 203 | 
 | 
| 204 |   # There are lot of dietpi-* bash scripts that aren't executable, for some
 | 
| 205 |   # reason.  Big hairy shell scripts here.
 | 
| 206 |   src=~/git/wild/distro/DietPi
 | 
| 207 |   _manifest distro/DietPi $src \
 | 
| 208 |     $(find $src '(' -name '*.sh' -o -name 'dietpi-*' ')' -a -printf '%P\n') \
 | 
| 209 | 
 | 
| 210 |   src=~/src/grep-2.24
 | 
| 211 |   _sh-manifest $src gnu
 | 
| 212 | 
 | 
| 213 |   src=~/src/coreutils-8.22
 | 
| 214 |   _sh-manifest $src gnu
 | 
| 215 | 
 | 
| 216 |   src=~/src/glibc-2.23
 | 
| 217 |   _sh-manifest $src gnu
 | 
| 218 | 
 | 
| 219 |   src=~/src/binutils-2.26
 | 
| 220 |   _sh-manifest $src gnu
 | 
| 221 | 
 | 
| 222 |   #
 | 
| 223 |   # Operating Systems
 | 
| 224 |   #
 | 
| 225 | 
 | 
| 226 |   # Skip the autoconf stuff here.  Could skip it elsewhere too.
 | 
| 227 |   src=~/src/freebsd-11.1/usr/src
 | 
| 228 |   _manifest freebsd-11.1 $src \
 | 
| 229 |     $(find $src -name '*.sh' -a ! -name 'ltmain.sh' -a -printf '%P\n')
 | 
| 230 | 
 | 
| 231 |   _sh-manifest ~/git/other/minix
 | 
| 232 |   _sh-manifest ~/git/other/illumos-gate
 | 
| 233 |   _sh-manifest ~/git/other/daemontools-encore
 | 
| 234 | 
 | 
| 235 |   #
 | 
| 236 |   # Cloud Stuff
 | 
| 237 |   #
 | 
| 238 |   _sh-manifest ~/git/other/mesos cloud
 | 
| 239 |   _sh-manifest ~/git/other/rocket cloud
 | 
| 240 |   _sh-manifest ~/git/other/docker cloud
 | 
| 241 |   _sh-manifest ~/git/other/chef-bcpc cloud
 | 
| 242 |   _sh-manifest ~/git/other/sandstorm cloud
 | 
| 243 |   _sh-manifest ~/git/other/kubernetes cloud
 | 
| 244 |   _sh-manifest ~/git/other/manta-marlin cloud
 | 
| 245 | 
 | 
| 246 |   src=~/git/other/dokku
 | 
| 247 |   _manifest cloud/dokku $src \
 | 
| 248 |     $(find $src '(' -name '*.sh' -o -name dokku ')' -a -printf '%P\n')
 | 
| 249 | 
 | 
| 250 |   #
 | 
| 251 |   # Google
 | 
| 252 |   #
 | 
| 253 |   _sh-manifest ~/git/other/bazel google
 | 
| 254 |   _sh-manifest ~/git/other/protobuf google
 | 
| 255 |   _sh-manifest ~/git/other/kythe google
 | 
| 256 |   _sh-manifest ~/git/other/tensorflow google
 | 
| 257 |   # Filenames with spaces!
 | 
| 258 |   #_sh-manifest ~/git/other/grpc google
 | 
| 259 | 
 | 
| 260 |   #
 | 
| 261 |   # Esoteric
 | 
| 262 |   #
 | 
| 263 | 
 | 
| 264 |   _sh-manifest ~/git/scratch/shasm esoteric
 | 
| 265 |   _sh-manifest ~/git/wild/esoteric/wwwoosh esoteric
 | 
| 266 |   _sh-manifest ~/git/wild/esoteric/lishp esoteric
 | 
| 267 | 
 | 
| 268 |   # OpenGL for bash is interesting because there is a lot of arithmetic.
 | 
| 269 |   # Not surprisingly, there are 6 parse errors, almost all of which are due to
 | 
| 270 |   # "dynamic arithmetic".
 | 
| 271 |   src=~/git/other/CmdlineGL
 | 
| 272 |   _manifest esoteric/CmdlineGL $src \
 | 
| 273 |     $(find $src \
 | 
| 274 |       -type f -a \
 | 
| 275 |       '(' -name '*.sh' -o -name '*.lib' ')' -a \
 | 
| 276 |       -printf '%P\n')
 | 
| 277 | 
 | 
| 278 |   src=~/git/wild/esoteric/setup.shl
 | 
| 279 |   _manifest esoteric/setup.shl $src \
 | 
| 280 |     $(find $src \
 | 
| 281 |       -type f -a \
 | 
| 282 |       '(' -name '*.shl' -o -name setup -o -name Setup ')' -a \
 | 
| 283 |       -printf '%P\n')
 | 
| 284 | 
 | 
| 285 |   src=~/git/wild/esoteric/mal/bash
 | 
| 286 |   _manifest esoteric/make-a-lisp-bash $src \
 | 
| 287 |     $(find $src '(' -name '*.sh' ')' -a -printf '%P\n')
 | 
| 288 | 
 | 
| 289 |   src=~/git/wild/esoteric/gherkin
 | 
| 290 |   _manifest esoteric/gherkin $src \
 | 
| 291 |     $(find $src '(' -name '*.sh' -o -name 'gherkin' ')' -a -printf '%P\n')
 | 
| 292 | 
 | 
| 293 |   src=~/git/wild/esoteric/balls
 | 
| 294 |   _manifest esoteric/balls $src \
 | 
| 295 |     $(find $src '(' -name '*.sh' -o -name balls -o -name esh ')' -a \
 | 
| 296 |                 -printf '%P\n')
 | 
| 297 | 
 | 
| 298 |   src=~/git/wild/esoteric/bashcached
 | 
| 299 |   _manifest esoteric/bashcached $src \
 | 
| 300 |     $(find $src '(' -name '*.sh' -o -name 'bashcached' ')' -a -printf '%P\n')
 | 
| 301 | 
 | 
| 302 |   src=~/git/wild/esoteric/quinedb
 | 
| 303 |   _manifest esoteric/quinedb $src \
 | 
| 304 |     $(find $src '(' -name '*.sh' -o -name 'quinedb' ')' -a -printf '%P\n')
 | 
| 305 | 
 | 
| 306 |   src=~/git/wild/esoteric/bashttpd
 | 
| 307 |   _manifest esoteric/bashttpd $src \
 | 
| 308 |     $(find $src -name 'bashttpd' -a -printf '%P\n')
 | 
| 309 | 
 | 
| 310 |   # JSON Parsers
 | 
| 311 |   src=~/git/other/j
 | 
| 312 |   _manifest esoteric/j $src \
 | 
| 313 |     $(find $src -type f -a  -name j -a -printf '%P\n')
 | 
| 314 | 
 | 
| 315 |   _sh-manifest ~/git/other/JSON.sh esoteric
 | 
| 316 | 
 | 
| 317 |   #
 | 
| 318 |   # Other Languages
 | 
| 319 |   #
 | 
| 320 | 
 | 
| 321 |   _sh-manifest ~/git/languages/julia
 | 
| 322 |   _sh-manifest ~/git/languages/reason
 | 
| 323 |   _sh-manifest ~/git/languages/sdk  # Dart SDK?
 | 
| 324 | 
 | 
| 325 |   _sh-manifest ~/git/languages/micropython
 | 
| 326 |   _sh-manifest ~/git/other/staticpython  # statically linked build
 | 
| 327 | 
 | 
| 328 |   _sh-manifest ~/git/other/exp  # Go experimental repo
 | 
| 329 | 
 | 
| 330 |   #
 | 
| 331 |   # Grab Bag
 | 
| 332 |   #
 | 
| 333 | 
 | 
| 334 |   # This overlaps with git too much
 | 
| 335 |   #src=~/git/other
 | 
| 336 |   #local depth=3
 | 
| 337 |   #_manifest git-other $src \
 | 
| 338 |   #  $(find $src -maxdepth $depth -name '*.sh' -a -printf '%P\n')
 | 
| 339 | 
 | 
| 340 |   src=~/hg/other
 | 
| 341 |   _manifest hg-other $src \
 | 
| 342 |     $(find $src -name '*.sh' -a -printf '%P\n')
 | 
| 343 | 
 | 
| 344 |   #
 | 
| 345 |   # Top-Level
 | 
| 346 |   #
 | 
| 347 | 
 | 
| 348 |   # Kernel
 | 
| 349 |   _sh-manifest ~/src/linux-4.8.7
 | 
| 350 | 
 | 
| 351 |   # Git
 | 
| 352 |   # git-gui.sh and po2msg.sh are actually Tcl!  We could stop parsing at 'exec'
 | 
| 353 |   # but there's no point right now.
 | 
| 354 |   src=~/git/other/git
 | 
| 355 |   _manifest $(basename $src) $src \
 | 
| 356 |     $(find $src -name '*.sh' -a \
 | 
| 357 |       ! -name 'git-gui.sh' \
 | 
| 358 |       ! -name 'po2msg.sh' \
 | 
| 359 |       -a -printf '%P\n')
 | 
| 360 | 
 | 
| 361 |   _sh-manifest ~/git/other/liballocs
 | 
| 362 |   _sh-manifest ~/git/other/boringssl
 | 
| 363 |   _sh-manifest ~/git/other/arrow
 | 
| 364 | 
 | 
| 365 |   #
 | 
| 366 |   # Uncategorized
 | 
| 367 |   #
 | 
| 368 | 
 | 
| 369 |   # Has filenames with spaces!
 | 
| 370 |   #_sh-manifest ~/git/other/linguist
 | 
| 371 | 
 | 
| 372 |   # Brendan Gregg's performance scripts.
 | 
| 373 |   # Find executable scripts, since they don't end in sh.
 | 
| 374 |   # net/tcpretrans is written in Perl.
 | 
| 375 |   src=~/git/other/perf-tools
 | 
| 376 |   _manifest $(basename $src) $src \
 | 
| 377 |     $(find $src \
 | 
| 378 |       \( -name .git -a -prune \) -o \
 | 
| 379 |       \( -name tcpretrans -a -prune \) -o \
 | 
| 380 |       \( -type f -a -executable -a -printf '%P\n' \) )
 | 
| 381 | 
 | 
| 382 |   # Most of these scripts have no extension.  So look at executable ones and
 | 
| 383 |   # then see if the shebang ends with sh!
 | 
| 384 | 
 | 
| 385 |   # NOTE: In YSH it would be nice if shebang-is-shell could be a function call.
 | 
| 386 |   # Don't need to fork every time.
 | 
| 387 |   src=~/git/other/pixelb-scripts
 | 
| 388 |   _manifest pixelb-scripts $src \
 | 
| 389 |     $(find $src \( -name .git -a -prune \) -o \
 | 
| 390 |                 \( -type f -a \
 | 
| 391 |                    -executable -a \
 | 
| 392 |                    ! -name '*.py' -a \
 | 
| 393 |                    -exec test/shebang.sh is-shell {} ';' -a \
 | 
| 394 |                    -printf '%P\n' \) )
 | 
| 395 | 
 | 
| 396 |   # Something related to WebDriver
 | 
| 397 |   # Doesn't parse because of extended glob.
 | 
| 398 |   src=~/git/other/wd
 | 
| 399 |   _manifest $(basename $src) $src \
 | 
| 400 |     $(find $src -type f -a  -name wd -a -printf '%P\n')
 | 
| 401 | 
 | 
| 402 |   #
 | 
| 403 |   # Big
 | 
| 404 |   #
 | 
| 405 | 
 | 
| 406 |   return
 | 
| 407 |   log "Finding Files in Big Projects"
 | 
| 408 |   readonly BIG_BUILD_ROOT=/media/andy/hdd-8T/big-build/ssd-backup/sdb/build
 | 
| 409 | 
 | 
| 410 |   # 2m 18s the first time.
 | 
| 411 |   # 2 seconds the second time.  This is a big slow drive.
 | 
| 412 |   time {
 | 
| 413 |     _sh-manifest $BIG_BUILD_ROOT/hg/other/mozilla-central/
 | 
| 414 | 
 | 
| 415 |     _sh-manifest $BIG_BUILD_ROOT/chrome
 | 
| 416 |     _configure-manifest $BIG_BUILD_ROOT/chrome
 | 
| 417 | 
 | 
| 418 |     _sh-manifest $BIG_BUILD_ROOT/android
 | 
| 419 |     _configure-manifest $BIG_BUILD_ROOT/android
 | 
| 420 | 
 | 
| 421 |     _sh-manifest $BIG_BUILD_ROOT/openwrt
 | 
| 422 |     _sh-manifest $BIG_BUILD_ROOT/OpenWireless
 | 
| 423 |   }
 | 
| 424 | }
 | 
| 425 | 
 | 
| 426 | # TODO: Note: duplicated in wild-runner.sh
 | 
| 427 | readonly MANIFEST=_tmp/wild/MANIFEST.txt
 | 
| 428 | 
 | 
| 429 | write-manifest() {
 | 
| 430 |   mkdir -p _tmp/wild
 | 
| 431 |   local out=$MANIFEST
 | 
| 432 |   all-manifests > $out
 | 
| 433 |   wc -l $out
 | 
| 434 | }
 | 
| 435 | 
 | 
| 436 | # TODO: Publish this script
 | 
| 437 | multi() { ~/hg/tree-tools/bin/multi "$@"; }
 | 
| 438 | 
 | 
| 439 | make-archive() {
 | 
| 440 |   # Format of manifest:
 | 
| 441 |   # $1 is project
 | 
| 442 |   # $2 is abspath of source
 | 
| 443 |   # $3 is rel path within project
 | 
| 444 |   local out=_tmp/wild/wild-source.tar.gz
 | 
| 445 |   rm -f $out
 | 
| 446 |   awk '{print $2 " " $1 "/" $3 }' $MANIFEST \
 | 
| 447 |     | multi tar $out
 | 
| 448 |   ls -l $out
 | 
| 449 | }
 | 
| 450 | 
 | 
| 451 | # This is opposed to crawling the file system with 'find'.
 | 
| 452 | manifest-from-archive() {
 | 
| 453 |   mkdir -p $(dirname $MANIFEST)
 | 
| 454 |   # relative path then absolute path
 | 
| 455 |   find $DEPS_WILD_DIR/src -type f -a -printf '%P %p\n' > $MANIFEST
 | 
| 456 | }
 | 
| 457 | 
 | 
| 458 | # 442K lines without "big" and without ltmain.sh
 | 
| 459 | # TODO: Include a few ltmain.sh.  Have to de-dupe them.
 | 
| 460 | #
 | 
| 461 | # 767K lines with aports (It's 250K lines by itself.)
 | 
| 462 | 
 | 
| 463 | # 1.30 M lines with "big".
 | 
| 464 | # 760K lines without ltmain.sh.  Hm need to get up to 1M.
 | 
| 465 | 
 | 
| 466 | abspaths() {
 | 
| 467 |   local proj=${1:-}
 | 
| 468 |   if test -n "$proj"; then
 | 
| 469 |     awk -v proj=$proj '$1 == proj {print $2}' $MANIFEST
 | 
| 470 |   else
 | 
| 471 |     awk '{print $2}' $MANIFEST
 | 
| 472 |   fi
 | 
| 473 | }
 | 
| 474 | 
 | 
| 475 | # Excluding ltmain.sh, goes from 910K lines to 830K.
 | 
| 476 | count-lines() {
 | 
| 477 |   # We need this weird --files0-from because there are too many files.  xargs
 | 
| 478 |   # would split it into multiple invocations.
 | 
| 479 |   #
 | 
| 480 |   # It would be nicer if wc just had an option not to sum?
 | 
| 481 |   time abspaths | 
 | 
| 482 |     grep -v ltmain.sh |
 | 
| 483 |     tr '\n' '\0' | wc -l --files0-from - | sort -n
 | 
| 484 | }
 | 
| 485 | 
 | 
| 486 | grep-features1() {
 | 
| 487 |   # Hm only 608 files out of 10,000 use a[x]=
 | 
| 488 |   # But it is used in
 | 
| 489 |   # /home/andy/src/linux-4.8.7/scripts/decode_stacktrace.sh
 | 
| 490 |   # portage, bash-completion, uses it
 | 
| 491 |   time abspaths | grep -v ltmain.sh |
 | 
| 492 |     xargs egrep '^[[:space:]]*[a-zA-Z0-9]+\[.*\]='
 | 
| 493 | }
 | 
| 494 | 
 | 
| 495 | grep-features2() {
 | 
| 496 |   # Outside of illumos/ast/esoteric, there's only one real usage of associative
 | 
| 497 |   # array literals!
 | 
| 498 |   # /home/andy/git/other/tensorflow/tensorflow/tools/ci_build/builds/pip.sh:  WHL_TAGS=(["2.7"]="cp27-none" ["3.4"]="cp34-cp34m" ["3.5"]="cp35-cp35m")
 | 
| 499 |   time abspaths | grep -v ltmain.sh |
 | 
| 500 |     xargs grep -F '=(['
 | 
| 501 | }
 | 
| 502 | 
 | 
| 503 | grep-features3() {
 | 
| 504 |   # Wow this is ONLY used in a handful of files in bash-completions!  And tests.
 | 
| 505 |   # That might be enough to justify it.
 | 
| 506 |   time abspaths | grep -v ltmain.sh |
 | 
| 507 |     xargs grep -F ';&'
 | 
| 508 | }
 | 
| 509 | 
 | 
| 510 | grep-features4() {
 | 
| 511 |   # Wow this is ONLY used in a handful of files in bash-completions!  And tests.
 | 
| 512 |   # That might be enough to justify it.
 | 
| 513 |   time abspaths | #| grep -v ltmain.sh |
 | 
| 514 |     xargs grep -E '\[\[ .*-(eq|ne|le|ge|lt|gt)'
 | 
| 515 |     #xargs grep -E '\${[a-zA-Z0-9_]+\[[^@*]'  # looks like ${a[i]}
 | 
| 516 |     #xargs grep -F '$(('
 | 
| 517 | }
 | 
| 518 | 
 | 
| 519 | # Takes ~15 seconds for 8,000+ files.
 | 
| 520 | #
 | 
| 521 | # NOTE: APKBUILD don't have shebang lines!  So there are a bunch of false
 | 
| 522 | # detections, e.g. APKBUILD as Makefile, C, etc.
 | 
| 523 | detect-all-types() {
 | 
| 524 |   time abspaths | xargs file | pv > _tmp/wild/file-types.txt
 | 
| 525 | }
 | 
| 526 | 
 | 
| 527 | wild-types() {
 | 
| 528 |   cat _tmp/wild/file-types.txt | test/wild_types.py
 | 
| 529 | }
 | 
| 530 | 
 | 
| 531 | all() {
 | 
| 532 |   ### Run by devtools/release.sh
 | 
| 533 | 
 | 
| 534 |   # Make a report for all, but only run some
 | 
| 535 | 
 | 
| 536 |   test/wild-runner.sh parse-and-report "$@"
 | 
| 537 | }
 | 
| 538 | 
 | 
| 539 | smoke-test() {
 | 
| 540 |   ### Smoke test on Oils source; takes a few seconds
 | 
| 541 |   all 'oil/'
 | 
| 542 | }
 | 
| 543 | 
 | 
| 544 | find-tracebacks() {
 | 
| 545 |   find _tmp/wild/raw -name '*__parse.stderr.txt*' |
 | 
| 546 |     xargs grep -l 'Traceback'
 | 
| 547 | }
 | 
| 548 | 
 | 
| 549 | find-with-shebang() {
 | 
| 550 |   local dir=$1
 | 
| 551 | 
 | 
| 552 |   # Look for files without an extension that have shell shebang lines.
 | 
| 553 | 
 | 
| 554 |   # Bad things about find:
 | 
| 555 |   # * -regextype is part of the expression that always returns true, and has a
 | 
| 556 |   # side effect that only affects later clauses!
 | 
| 557 |   # * there are TEN different kinds
 | 
| 558 |   # * emacs is the default regex type!
 | 
| 559 |   # * -regex matches the whole path, whereas -name only matches the name
 | 
| 560 |   #   - should be name ~ /regex/ and path ~ /regex/
 | 
| 561 |   #   - there is no way to search just the name for a regex
 | 
| 562 |   # * no character classes in the default type
 | 
| 563 |   #
 | 
| 564 |   # https://www.gnu.org/software/findutils/manual/html_node/find_html/Regular-Expressions.html#Regular-Expressions
 | 
| 565 | 
 | 
| 566 |   # The regex matches the whole path, e.g. so freebsd-11.1 must be matched.
 | 
| 567 | 
 | 
| 568 |   # What might be faster here is to find all the executables first, then put
 | 
| 569 |   # them in a text file.  test/shebang.sh can be invoked with stdin as a path
 | 
| 570 |   # list and filter them.  It's not horribly slow though.
 | 
| 571 | 
 | 
| 572 |   # Looking for *.sh misses 590 files in FreeBSD.  There are 1088 .sh files.
 | 
| 573 | 
 | 
| 574 |   # NOTE: Should get rid of most 'configure' scripts?
 | 
| 575 | 
 | 
| 576 |   time find $dir \
 | 
| 577 |     \( -name .git -a -prune \) -o \
 | 
| 578 |     \( -regex '.+/[a-zA-Z0-9_\-]+' -a \
 | 
| 579 |        -type f -a \
 | 
| 580 |        -executable -a \
 | 
| 581 |        -exec test/shebang.sh is-shell {} ';' -a \
 | 
| 582 |        -printf '%p\n' \)
 | 
| 583 | }
 | 
| 584 | 
 | 
| 585 | gentoo() {
 | 
| 586 |   # 38,000 ebuild files
 | 
| 587 |   local src
 | 
| 588 |   src=~/git/gentoo/gentoo
 | 
| 589 | 
 | 
| 590 |   # 2M lines, because there are a lot of duplicate versions.
 | 
| 591 | 
 | 
| 592 |   time find $src -name '*.ebuild' -a -print0 | 
 | 
| 593 |     wc -l --files0-from - | sort -n
 | 
| 594 | 
 | 
| 595 |   return
 | 
| 596 |   _manifest distro/gentoo $src \
 | 
| 597 |     $(find $src . -name '*.ebuild')
 | 
| 598 | }
 | 
| 599 | 
 | 
| 600 | #
 | 
| 601 | # ANALYSIS: Find Biggest Shell Scripts in Aboriginal Source Tarballs
 | 
| 602 | #
 | 
| 603 | 
 | 
| 604 | readonly AB_PACKAGES=~/hg/scratch/aboriginal/aboriginal-1.2.2/packages
 | 
| 605 | 
 | 
| 606 | aboriginal-packages() {
 | 
| 607 |   for z in $AB_PACKAGES/*.tar.gz; do
 | 
| 608 |     local name=$(basename $z .tar.gz)
 | 
| 609 |     echo $z -z $name
 | 
| 610 |   done
 | 
| 611 |   for z in $AB_PACKAGES/*.tar.bz2; do
 | 
| 612 |     local name=$(basename $z .tar.bz2)
 | 
| 613 |     echo $z -j $name
 | 
| 614 |   done
 | 
| 615 | }
 | 
| 616 | 
 | 
| 617 | readonly AB_OUT=_tmp/aboriginal
 | 
| 618 | 
 | 
| 619 | aboriginal-manifest() {
 | 
| 620 |   mkdir -p $AB_OUT
 | 
| 621 | 
 | 
| 622 |   aboriginal-packages | while read z tar_flag name; do
 | 
| 623 |     echo $z $name
 | 
| 624 |     local listing=$AB_OUT/${name}.txt
 | 
| 625 |     tar --list --verbose $tar_flag < $z | grep '\.sh$' > $listing || true
 | 
| 626 |   done
 | 
| 627 | }
 | 
| 628 | 
 | 
| 629 | aboriginal-biggest() {
 | 
| 630 |   # print size and filename
 | 
| 631 |   cat $AB_OUT/*.txt | awk '{print $3 " " $6}' | sort -n
 | 
| 632 | }
 | 
| 633 | 
 | 
| 634 | readonly AB_TIMES=_tmp/parse-aboriginal.csv 
 | 
| 635 | 
 | 
| 636 | parse-aboriginal() {
 | 
| 637 |   #find $ABORIGINAL_DIR -name '*.sh' | xargs wc -l | sort -n
 | 
| 638 |   #return
 | 
| 639 | 
 | 
| 640 |   find $ABORIGINAL_DIR -name '*.sh' | xargs -n 1 -- \
 | 
| 641 |     benchmarks/time_.py --append --output $AB_TIMES -- \
 | 
| 642 |     bin/osh -n --ast-format none
 | 
| 643 | }
 | 
| 644 | 
 | 
| 645 | # 80 ms max.  That is good enough for sure.
 | 
| 646 | ab-times() {
 | 
| 647 |   awk -F ',' '{ if ($2 > max_elapsed) max_elapsed = $2 } END { print(max_elapsed) }' $AB_TIMES
 | 
| 648 | }
 | 
| 649 | 
 | 
| 650 | # biggest scripts besides ltmain:
 | 
| 651 | #
 | 
| 652 | # 8406 binutils-397a64b3/binutils/embedspu.sh
 | 
| 653 | # 8597 binutils-397a64b3/ld/emulparams/msp430all.sh
 | 
| 654 | # 9951 bash-2.05b/examples/scripts/dd-ex.sh
 | 
| 655 | # 12558 binutils-397a64b3/ld/genscripts.sh
 | 
| 656 | # 14148 bash-2.05b/examples/scripts/adventure.sh
 | 
| 657 | # 21811 binutils-397a64b3/gas/testsuite/gas/xstormy16/allinsn.sh
 | 
| 658 | # 28004 bash-2.05b/examples/scripts/bcsh.sh
 | 
| 659 | # 29666 gcc-4.2.1/ltcf-gcj.sh
 | 
| 660 | # 33972 gcc-4.2.1/ltcf-c.sh
 | 
| 661 | # 39048 gcc-4.2.1/ltcf-cxx.sh
 | 
| 662 | 
 | 
| 663 | #
 | 
| 664 | # ANALYSIS: Number of comment lines
 | 
| 665 | #
 | 
| 666 | # TODO: Determine if we should try to save comment lines?  I think we should
 | 
| 667 | # save more than that.
 | 
| 668 | 
 | 
| 669 | 
 | 
| 670 | #
 | 
| 671 | # ANALYSIS: Which scripts use set -C / set -o noclobber?
 | 
| 672 | #
 | 
| 673 | 
 | 
| 674 | # VERY rare, only 13 instances, in ast, freebsd, and illumos-gate.
 | 
| 675 | analyze-noclobber() {
 | 
| 676 |   local out=_tmp/noclobber.txt
 | 
| 677 |   # Ignore this script
 | 
| 678 |   time abspaths | grep -v 'test/wild.sh' |
 | 
| 679 |     xargs grep -E 'noclobber|^set -C|^set +C' > $out || true
 | 
| 680 |   wc -l $out
 | 
| 681 | }
 | 
| 682 | 
 | 
| 683 | # Quick golden test.  Test that pretty-printing doesn't regress.
 | 
| 684 | golden-subset() {
 | 
| 685 |   $0 all esoteric
 | 
| 686 | }
 | 
| 687 | 
 | 
| 688 | # Make a copy of the output for comparison.
 | 
| 689 | copy-golden-ast() {
 | 
| 690 |   local dest=${1:-_tmp/wild-gold}
 | 
| 691 |   find _tmp/wild-www/esoteric/ -name '*__ast.html' -a -printf '%p %P\n' \
 | 
| 692 |     | ~/git/tree-tools/bin/multi cp $dest
 | 
| 693 | }
 | 
| 694 | 
 | 
| 695 | # Find shell scripts on the root file system.
 | 
| 696 | # 1302 files on my system.
 | 
| 697 | rootfs-manifest() {
 | 
| 698 |   find /bin /lib /sbin /etc/ /opt /root /run /usr /var \
 | 
| 699 |     -type f -a \
 | 
| 700 |     -executable -a \
 | 
| 701 |     -exec test/shebang.sh is-shell {} ';' \
 | 
| 702 |     -a -print | tee _tmp/rootfs.txt
 | 
| 703 | }
 | 
| 704 | 
 | 
| 705 | soil-run() {
 | 
| 706 |   export-osh-cpp _tmp/native-tar-test opt
 | 
| 707 | 
 | 
| 708 |   if test -n "${QUICKLY:-}"; then
 | 
| 709 |     # Do a quick version
 | 
| 710 |     all '^oil'
 | 
| 711 |   else
 | 
| 712 |     # This takes longer than 15 minutes with build/dev.sh minimal !
 | 
| 713 |     # That's with xargs -P $MAX_PROCS in test/wild-runner.sh
 | 
| 714 | 
 | 
| 715 |     # The whole thing takes 7:25, which means that the 'wild' Soil job takes 10
 | 
| 716 |     # minutes.  It waits for the tarball, then tests it.
 | 
| 717 |     #
 | 
| 718 |     # For now, just do 'distro', since that's about half the files.
 | 
| 719 | 
 | 
| 720 |     all '^distro'
 | 
| 721 |     # all '^cloud'
 | 
| 722 |     # all '^cloud|^gnu|^freebsd'
 | 
| 723 |     # all
 | 
| 724 |   fi
 | 
| 725 | }
 | 
| 726 | 
 | 
| 727 | if test "$(basename $0)" = 'wild.sh'; then
 | 
| 728 |   "$@"
 | 
| 729 | fi
 |