| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Run the osh parser on shell scripts found in the wild.
|
| 4 | #
|
| 5 | # Usage:
|
| 6 | # test/wild.sh <function name>
|
| 7 | #
|
| 8 | # Examples:
|
| 9 | # test/wild.sh all
|
| 10 | # test/wild.sh all '^oil' # subset
|
| 11 |
|
| 12 | # TODO:
|
| 13 | # - Add more scripts, like gentoo package defs
|
| 14 |
|
| 15 | set -o nounset
|
| 16 | set -o pipefail
|
| 17 | set -o errexit
|
| 18 | shopt -s strict:all 2>/dev/null || true # dogfood for OSH
|
| 19 |
|
| 20 | source test/common.sh # export-osh-cpp
|
| 21 |
|
| 22 | # This persists across build/clean.sh
|
| 23 | readonly DEPS_WILD_DIR=../oil_DEPS/wild
|
| 24 |
|
| 25 |
|
| 26 | #
|
| 27 | # Helpers
|
| 28 | #
|
| 29 |
|
| 30 | _manifest() {
|
| 31 | local proj=$1
|
| 32 | local base_dir=$2
|
| 33 | shift 2
|
| 34 |
|
| 35 | for path in "$@"; do
|
| 36 | echo $proj $base_dir/$path $path
|
| 37 | done
|
| 38 | }
|
| 39 |
|
| 40 | # generic helper
|
| 41 | _sh-manifest() {
|
| 42 | local base_dir=$1
|
| 43 | local category=${2:-}
|
| 44 |
|
| 45 | local proj
|
| 46 | if test -n "$category"; then
|
| 47 | proj="$category/$(basename $base_dir)"
|
| 48 | else
|
| 49 | proj="$(basename $base_dir)"
|
| 50 | fi
|
| 51 |
|
| 52 | _manifest $proj $base_dir \
|
| 53 | $(find $base_dir -name '*.sh' -a -printf '%P\n')
|
| 54 | }
|
| 55 |
|
| 56 | _configure-manifest() {
|
| 57 | local base_dir=$1
|
| 58 | shift
|
| 59 |
|
| 60 | local name=$(basename $base_dir)
|
| 61 | _manifest ${name}__configure $base_dir \
|
| 62 | $(find $src -name 'configure' -a -printf '%P\n')
|
| 63 | }
|
| 64 |
|
| 65 | #
|
| 66 | # Special Case Corpora Using Explicit Globs
|
| 67 | #
|
| 68 |
|
| 69 | # TODO: Where do we write the base dir?
|
| 70 | oil-sketch-manifest() {
|
| 71 | local base_dir=~/git/oil-sketch
|
| 72 | pushd $base_dir >/dev/null
|
| 73 | for name in *.sh {awk,demo,make,misc,regex,tools}/*.sh; do
|
| 74 | echo oil-sketch $base_dir/$name $name
|
| 75 | done
|
| 76 | popd >/dev/null
|
| 77 | }
|
| 78 |
|
| 79 | oil-manifest() {
|
| 80 | local base_dir=$PWD
|
| 81 | for name in \
|
| 82 | configure install *.sh {benchmarks,build,test,scripts,opy}/*.sh; do
|
| 83 | echo oil $base_dir/$name $name
|
| 84 | done
|
| 85 | }
|
| 86 |
|
| 87 | readonly ABORIGINAL_DIR=~/src/aboriginal-1.4.5
|
| 88 |
|
| 89 | #
|
| 90 | # All
|
| 91 | #
|
| 92 |
|
| 93 | all-manifests() {
|
| 94 | # Don't expose this repo for now
|
| 95 | #oil-sketch-manifest
|
| 96 |
|
| 97 | #
|
| 98 | # My Code
|
| 99 | #
|
| 100 |
|
| 101 | oil-manifest
|
| 102 |
|
| 103 | local src
|
| 104 |
|
| 105 | #
|
| 106 | # Books
|
| 107 | #
|
| 108 | src=~/git/wild/books/wicked_cool_shell_scripts_2e
|
| 109 | _manifest "books/$(basename $src)" $src \
|
| 110 | $(find $src \
|
| 111 | \( -name .git -a -prune \) -o \
|
| 112 | \( -type f -a -executable -a -printf '%P\n' \) )
|
| 113 |
|
| 114 | #
|
| 115 | # Shell/Bash Frameworks/Collections
|
| 116 | #
|
| 117 |
|
| 118 | src=~/git/other/bash-completion
|
| 119 | _manifest "shell/$(basename $src)" $src \
|
| 120 | $(find $src/completions -type f -a -printf 'completions/%P\n')
|
| 121 |
|
| 122 | # Bats bash test framework. It appears to be fairly popular.
|
| 123 | src=~/git/other/bats
|
| 124 | _manifest "shell/$(basename $src)" $src \
|
| 125 | $(find $src \
|
| 126 | \( -wholename '*/libexec/*' -a -type f -a \
|
| 127 | -executable -a -printf '%P\n' \) )
|
| 128 |
|
| 129 | # Bash debugger?
|
| 130 | src=~/src/bashdb-4.4-0.92
|
| 131 | _manifest shell/bashdb $src \
|
| 132 | $(find $src -name '*.sh' -a -printf '%P\n')
|
| 133 |
|
| 134 | src=~/git/other/Bash-Snippets
|
| 135 | _manifest "shell/$(basename $src)" $src \
|
| 136 | $(find $src \
|
| 137 | \( -name .git -a -prune \) -o \
|
| 138 | \( -type f -a -executable -a -printf '%P\n' \) )
|
| 139 |
|
| 140 | # ASDF meta package/version manager.
|
| 141 | # Note that the language-specific plugins are specified (as remote repos)
|
| 142 | # here: https://github.com/asdf-vm/asdf-plugins/tree/master/plugins
|
| 143 | # They # could be used for more tests.
|
| 144 |
|
| 145 | src=~/git/other/asdf
|
| 146 | _manifest "shell/$(basename $src)" $src \
|
| 147 | $(find $src \( -name '*.sh' -o -name '*.bash' \) -a -printf '%P\n' )
|
| 148 |
|
| 149 | src=~/git/other/scripts-to-rule-them-all
|
| 150 | _manifest "shell/$(basename $src)" $src \
|
| 151 | $(find $src \
|
| 152 | \( -name .git -a -prune \) -o \
|
| 153 | \( -type f -a -executable -a -printf '%P\n' \) )
|
| 154 |
|
| 155 | src=~/git/wild/shell/shflags
|
| 156 | _manifest "shell/$(basename $src)" $src \
|
| 157 | $(find $src \
|
| 158 | '(' -name .git -a -prune ')' -o \
|
| 159 | '(' -name '*.sh' -o \
|
| 160 | -name shflags -o -name shlib -o -name shunit2 -o -name versions \
|
| 161 | ')' -a -printf '%P\n')
|
| 162 |
|
| 163 | _sh-manifest ~/hg/wild/shell-script-library shell
|
| 164 | _sh-manifest ~/git/other/modernish shell
|
| 165 | _sh-manifest ~/git/other/posixcube shell
|
| 166 |
|
| 167 | # Shells themselves
|
| 168 | _sh-manifest ~/git/languages/ast shell # korn shell stuff
|
| 169 | _sh-manifest ~/git/languages/mwc-sh shell
|
| 170 | _sh-manifest ~/src/mksh shell
|
| 171 |
|
| 172 | #
|
| 173 | # Linux Distros
|
| 174 | #
|
| 175 |
|
| 176 | _sh-manifest ~/git/other/minimal distro
|
| 177 | _sh-manifest ~/git/other/linuxkit distro
|
| 178 | _sh-manifest ~/git/other/portage distro
|
| 179 | _sh-manifest ~/git/wild/distro/woof-CE distro
|
| 180 | _sh-manifest ~/git/wild/distro/crankshaft distro
|
| 181 |
|
| 182 | src=~/git/alpine/aports
|
| 183 | _manifest distro/alpine-aports $src \
|
| 184 | $(find $src -name APKBUILD -a -printf '%P\n')
|
| 185 |
|
| 186 | src=$ABORIGINAL_DIR
|
| 187 | _manifest distro/aboriginal $src \
|
| 188 | $(find $src -name '*.sh' -printf '%P\n')
|
| 189 |
|
| 190 | src=/etc/init.d
|
| 191 | _manifest distro/initd $src \
|
| 192 | $(find $src -type f -a -executable -a -printf '%P\n')
|
| 193 |
|
| 194 | src=/usr/bin
|
| 195 | _manifest distro/usr-bin $src \
|
| 196 | $(find $src -name '*.sh' -a -printf '%P\n')
|
| 197 |
|
| 198 | # Version 1.0.89 extracts to a version-less dir.
|
| 199 | src=~/git/basis-build/_tmp/debootstrap
|
| 200 | _manifest distro/debootstrap $src \
|
| 201 | $(find $src '(' -name debootstrap -o -name functions ')' -a -printf '%P\n') \
|
| 202 | $(find $src/scripts -type f -a -printf 'scripts/%P\n')
|
| 203 |
|
| 204 | # There are lot of dietpi-* bash scripts that aren't executable, for some
|
| 205 | # reason. Big hairy shell scripts here.
|
| 206 | src=~/git/wild/distro/DietPi
|
| 207 | _manifest distro/DietPi $src \
|
| 208 | $(find $src '(' -name '*.sh' -o -name 'dietpi-*' ')' -a -printf '%P\n') \
|
| 209 |
|
| 210 | src=~/src/grep-2.24
|
| 211 | _sh-manifest $src gnu
|
| 212 |
|
| 213 | src=~/src/coreutils-8.22
|
| 214 | _sh-manifest $src gnu
|
| 215 |
|
| 216 | src=~/src/glibc-2.23
|
| 217 | _sh-manifest $src gnu
|
| 218 |
|
| 219 | src=~/src/binutils-2.26
|
| 220 | _sh-manifest $src gnu
|
| 221 |
|
| 222 | #
|
| 223 | # Operating Systems
|
| 224 | #
|
| 225 |
|
| 226 | # Skip the autoconf stuff here. Could skip it elsewhere too.
|
| 227 | src=~/src/freebsd-11.1/usr/src
|
| 228 | _manifest freebsd-11.1 $src \
|
| 229 | $(find $src -name '*.sh' -a ! -name 'ltmain.sh' -a -printf '%P\n')
|
| 230 |
|
| 231 | _sh-manifest ~/git/other/minix
|
| 232 | _sh-manifest ~/git/other/illumos-gate
|
| 233 | _sh-manifest ~/git/other/daemontools-encore
|
| 234 |
|
| 235 | #
|
| 236 | # Cloud Stuff
|
| 237 | #
|
| 238 | _sh-manifest ~/git/other/mesos cloud
|
| 239 | _sh-manifest ~/git/other/rocket cloud
|
| 240 | _sh-manifest ~/git/other/docker cloud
|
| 241 | _sh-manifest ~/git/other/chef-bcpc cloud
|
| 242 | _sh-manifest ~/git/other/sandstorm cloud
|
| 243 | _sh-manifest ~/git/other/kubernetes cloud
|
| 244 | _sh-manifest ~/git/other/manta-marlin cloud
|
| 245 |
|
| 246 | src=~/git/other/dokku
|
| 247 | _manifest cloud/dokku $src \
|
| 248 | $(find $src '(' -name '*.sh' -o -name dokku ')' -a -printf '%P\n')
|
| 249 |
|
| 250 | #
|
| 251 | # Google
|
| 252 | #
|
| 253 | _sh-manifest ~/git/other/bazel google
|
| 254 | _sh-manifest ~/git/other/protobuf google
|
| 255 | _sh-manifest ~/git/other/kythe google
|
| 256 | _sh-manifest ~/git/other/tensorflow google
|
| 257 | # Filenames with spaces!
|
| 258 | #_sh-manifest ~/git/other/grpc google
|
| 259 |
|
| 260 | #
|
| 261 | # Esoteric
|
| 262 | #
|
| 263 |
|
| 264 | _sh-manifest ~/git/scratch/shasm esoteric
|
| 265 | _sh-manifest ~/git/wild/esoteric/wwwoosh esoteric
|
| 266 | _sh-manifest ~/git/wild/esoteric/lishp esoteric
|
| 267 |
|
| 268 | # OpenGL for bash is interesting because there is a lot of arithmetic.
|
| 269 | # Not surprisingly, there are 6 parse errors, almost all of which are due to
|
| 270 | # "dynamic arithmetic".
|
| 271 | src=~/git/other/CmdlineGL
|
| 272 | _manifest esoteric/CmdlineGL $src \
|
| 273 | $(find $src \
|
| 274 | -type f -a \
|
| 275 | '(' -name '*.sh' -o -name '*.lib' ')' -a \
|
| 276 | -printf '%P\n')
|
| 277 |
|
| 278 | src=~/git/wild/esoteric/setup.shl
|
| 279 | _manifest esoteric/setup.shl $src \
|
| 280 | $(find $src \
|
| 281 | -type f -a \
|
| 282 | '(' -name '*.shl' -o -name setup -o -name Setup ')' -a \
|
| 283 | -printf '%P\n')
|
| 284 |
|
| 285 | src=~/git/wild/esoteric/mal/bash
|
| 286 | _manifest esoteric/make-a-lisp-bash $src \
|
| 287 | $(find $src '(' -name '*.sh' ')' -a -printf '%P\n')
|
| 288 |
|
| 289 | src=~/git/wild/esoteric/gherkin
|
| 290 | _manifest esoteric/gherkin $src \
|
| 291 | $(find $src '(' -name '*.sh' -o -name 'gherkin' ')' -a -printf '%P\n')
|
| 292 |
|
| 293 | src=~/git/wild/esoteric/balls
|
| 294 | _manifest esoteric/balls $src \
|
| 295 | $(find $src '(' -name '*.sh' -o -name balls -o -name esh ')' -a \
|
| 296 | -printf '%P\n')
|
| 297 |
|
| 298 | src=~/git/wild/esoteric/bashcached
|
| 299 | _manifest esoteric/bashcached $src \
|
| 300 | $(find $src '(' -name '*.sh' -o -name 'bashcached' ')' -a -printf '%P\n')
|
| 301 |
|
| 302 | src=~/git/wild/esoteric/quinedb
|
| 303 | _manifest esoteric/quinedb $src \
|
| 304 | $(find $src '(' -name '*.sh' -o -name 'quinedb' ')' -a -printf '%P\n')
|
| 305 |
|
| 306 | src=~/git/wild/esoteric/bashttpd
|
| 307 | _manifest esoteric/bashttpd $src \
|
| 308 | $(find $src -name 'bashttpd' -a -printf '%P\n')
|
| 309 |
|
| 310 | # JSON Parsers
|
| 311 | src=~/git/other/j
|
| 312 | _manifest esoteric/j $src \
|
| 313 | $(find $src -type f -a -name j -a -printf '%P\n')
|
| 314 |
|
| 315 | _sh-manifest ~/git/other/JSON.sh esoteric
|
| 316 |
|
| 317 | #
|
| 318 | # Other Languages
|
| 319 | #
|
| 320 |
|
| 321 | _sh-manifest ~/git/languages/julia
|
| 322 | _sh-manifest ~/git/languages/reason
|
| 323 | _sh-manifest ~/git/languages/sdk # Dart SDK?
|
| 324 |
|
| 325 | _sh-manifest ~/git/languages/micropython
|
| 326 | _sh-manifest ~/git/other/staticpython # statically linked build
|
| 327 |
|
| 328 | _sh-manifest ~/git/other/exp # Go experimental repo
|
| 329 |
|
| 330 | #
|
| 331 | # Grab Bag
|
| 332 | #
|
| 333 |
|
| 334 | # This overlaps with git too much
|
| 335 | #src=~/git/other
|
| 336 | #local depth=3
|
| 337 | #_manifest git-other $src \
|
| 338 | # $(find $src -maxdepth $depth -name '*.sh' -a -printf '%P\n')
|
| 339 |
|
| 340 | src=~/hg/other
|
| 341 | _manifest hg-other $src \
|
| 342 | $(find $src -name '*.sh' -a -printf '%P\n')
|
| 343 |
|
| 344 | #
|
| 345 | # Top-Level
|
| 346 | #
|
| 347 |
|
| 348 | # Kernel
|
| 349 | _sh-manifest ~/src/linux-4.8.7
|
| 350 |
|
| 351 | # Git
|
| 352 | # git-gui.sh and po2msg.sh are actually Tcl! We could stop parsing at 'exec'
|
| 353 | # but there's no point right now.
|
| 354 | src=~/git/other/git
|
| 355 | _manifest $(basename $src) $src \
|
| 356 | $(find $src -name '*.sh' -a \
|
| 357 | ! -name 'git-gui.sh' \
|
| 358 | ! -name 'po2msg.sh' \
|
| 359 | -a -printf '%P\n')
|
| 360 |
|
| 361 | _sh-manifest ~/git/other/liballocs
|
| 362 | _sh-manifest ~/git/other/boringssl
|
| 363 | _sh-manifest ~/git/other/arrow
|
| 364 |
|
| 365 | #
|
| 366 | # Uncategorized
|
| 367 | #
|
| 368 |
|
| 369 | # Has filenames with spaces!
|
| 370 | #_sh-manifest ~/git/other/linguist
|
| 371 |
|
| 372 | # Brendan Gregg's performance scripts.
|
| 373 | # Find executable scripts, since they don't end in sh.
|
| 374 | # net/tcpretrans is written in Perl.
|
| 375 | src=~/git/other/perf-tools
|
| 376 | _manifest $(basename $src) $src \
|
| 377 | $(find $src \
|
| 378 | \( -name .git -a -prune \) -o \
|
| 379 | \( -name tcpretrans -a -prune \) -o \
|
| 380 | \( -type f -a -executable -a -printf '%P\n' \) )
|
| 381 |
|
| 382 | # Most of these scripts have no extension. So look at executable ones and
|
| 383 | # then see if the shebang ends with sh!
|
| 384 |
|
| 385 | # NOTE: In YSH it would be nice if shebang-is-shell could be a function call.
|
| 386 | # Don't need to fork every time.
|
| 387 | src=~/git/other/pixelb-scripts
|
| 388 | _manifest pixelb-scripts $src \
|
| 389 | $(find $src \( -name .git -a -prune \) -o \
|
| 390 | \( -type f -a \
|
| 391 | -executable -a \
|
| 392 | ! -name '*.py' -a \
|
| 393 | -exec test/shebang.sh is-shell {} ';' -a \
|
| 394 | -printf '%P\n' \) )
|
| 395 |
|
| 396 | # Something related to WebDriver
|
| 397 | # Doesn't parse because of extended glob.
|
| 398 | src=~/git/other/wd
|
| 399 | _manifest $(basename $src) $src \
|
| 400 | $(find $src -type f -a -name wd -a -printf '%P\n')
|
| 401 |
|
| 402 | #
|
| 403 | # Big
|
| 404 | #
|
| 405 |
|
| 406 | return
|
| 407 | log "Finding Files in Big Projects"
|
| 408 | readonly BIG_BUILD_ROOT=/media/andy/hdd-8T/big-build/ssd-backup/sdb/build
|
| 409 |
|
| 410 | # 2m 18s the first time.
|
| 411 | # 2 seconds the second time. This is a big slow drive.
|
| 412 | time {
|
| 413 | _sh-manifest $BIG_BUILD_ROOT/hg/other/mozilla-central/
|
| 414 |
|
| 415 | _sh-manifest $BIG_BUILD_ROOT/chrome
|
| 416 | _configure-manifest $BIG_BUILD_ROOT/chrome
|
| 417 |
|
| 418 | _sh-manifest $BIG_BUILD_ROOT/android
|
| 419 | _configure-manifest $BIG_BUILD_ROOT/android
|
| 420 |
|
| 421 | _sh-manifest $BIG_BUILD_ROOT/openwrt
|
| 422 | _sh-manifest $BIG_BUILD_ROOT/OpenWireless
|
| 423 | }
|
| 424 | }
|
| 425 |
|
| 426 | # TODO: Note: duplicated in wild-runner.sh
|
| 427 | readonly MANIFEST=_tmp/wild/MANIFEST.txt
|
| 428 |
|
| 429 | write-manifest() {
|
| 430 | mkdir -p _tmp/wild
|
| 431 | local out=$MANIFEST
|
| 432 | all-manifests > $out
|
| 433 | wc -l $out
|
| 434 | }
|
| 435 |
|
| 436 | # TODO: Publish this script
|
| 437 | multi() { ~/hg/tree-tools/bin/multi "$@"; }
|
| 438 |
|
| 439 | make-archive() {
|
| 440 | # Format of manifest:
|
| 441 | # $1 is project
|
| 442 | # $2 is abspath of source
|
| 443 | # $3 is rel path within project
|
| 444 | local out=_tmp/wild/wild-source.tar.gz
|
| 445 | rm -f $out
|
| 446 | awk '{print $2 " " $1 "/" $3 }' $MANIFEST \
|
| 447 | | multi tar $out
|
| 448 | ls -l $out
|
| 449 | }
|
| 450 |
|
| 451 | # This is opposed to crawling the file system with 'find'.
|
| 452 | manifest-from-archive() {
|
| 453 | mkdir -p $(dirname $MANIFEST)
|
| 454 | # relative path then absolute path
|
| 455 | find $DEPS_WILD_DIR/src -type f -a -printf '%P %p\n' > $MANIFEST
|
| 456 | }
|
| 457 |
|
| 458 | # 442K lines without "big" and without ltmain.sh
|
| 459 | # TODO: Include a few ltmain.sh. Have to de-dupe them.
|
| 460 | #
|
| 461 | # 767K lines with aports (It's 250K lines by itself.)
|
| 462 |
|
| 463 | # 1.30 M lines with "big".
|
| 464 | # 760K lines without ltmain.sh. Hm need to get up to 1M.
|
| 465 |
|
| 466 | abspaths() {
|
| 467 | local proj=${1:-}
|
| 468 | if test -n "$proj"; then
|
| 469 | awk -v proj=$proj '$1 == proj {print $2}' $MANIFEST
|
| 470 | else
|
| 471 | awk '{print $2}' $MANIFEST
|
| 472 | fi
|
| 473 | }
|
| 474 |
|
| 475 | # Excluding ltmain.sh, goes from 910K lines to 830K.
|
| 476 | count-lines() {
|
| 477 | # We need this weird --files0-from because there are too many files. xargs
|
| 478 | # would split it into multiple invocations.
|
| 479 | #
|
| 480 | # It would be nicer if wc just had an option not to sum?
|
| 481 | time abspaths |
|
| 482 | grep -v ltmain.sh |
|
| 483 | tr '\n' '\0' | wc -l --files0-from - | sort -n
|
| 484 | }
|
| 485 |
|
| 486 | grep-features1() {
|
| 487 | # Hm only 608 files out of 10,000 use a[x]=
|
| 488 | # But it is used in
|
| 489 | # /home/andy/src/linux-4.8.7/scripts/decode_stacktrace.sh
|
| 490 | # portage, bash-completion, uses it
|
| 491 | time abspaths | grep -v ltmain.sh |
|
| 492 | xargs egrep '^[[:space:]]*[a-zA-Z0-9]+\[.*\]='
|
| 493 | }
|
| 494 |
|
| 495 | grep-features2() {
|
| 496 | # Outside of illumos/ast/esoteric, there's only one real usage of associative
|
| 497 | # array literals!
|
| 498 | # /home/andy/git/other/tensorflow/tensorflow/tools/ci_build/builds/pip.sh: WHL_TAGS=(["2.7"]="cp27-none" ["3.4"]="cp34-cp34m" ["3.5"]="cp35-cp35m")
|
| 499 | time abspaths | grep -v ltmain.sh |
|
| 500 | xargs grep -F '=(['
|
| 501 | }
|
| 502 |
|
| 503 | grep-features3() {
|
| 504 | # Wow this is ONLY used in a handful of files in bash-completions! And tests.
|
| 505 | # That might be enough to justify it.
|
| 506 | time abspaths | grep -v ltmain.sh |
|
| 507 | xargs grep -F ';&'
|
| 508 | }
|
| 509 |
|
| 510 | grep-features4() {
|
| 511 | # Wow this is ONLY used in a handful of files in bash-completions! And tests.
|
| 512 | # That might be enough to justify it.
|
| 513 | time abspaths | #| grep -v ltmain.sh |
|
| 514 | xargs grep -E '\[\[ .*-(eq|ne|le|ge|lt|gt)'
|
| 515 | #xargs grep -E '\${[a-zA-Z0-9_]+\[[^@*]' # looks like ${a[i]}
|
| 516 | #xargs grep -F '$(('
|
| 517 | }
|
| 518 |
|
| 519 | # Takes ~15 seconds for 8,000+ files.
|
| 520 | #
|
| 521 | # NOTE: APKBUILD don't have shebang lines! So there are a bunch of false
|
| 522 | # detections, e.g. APKBUILD as Makefile, C, etc.
|
| 523 | detect-all-types() {
|
| 524 | time abspaths | xargs file | pv > _tmp/wild/file-types.txt
|
| 525 | }
|
| 526 |
|
| 527 | wild-types() {
|
| 528 | cat _tmp/wild/file-types.txt | test/wild_types.py
|
| 529 | }
|
| 530 |
|
| 531 | all() {
|
| 532 | ### Run by devtools/release.sh
|
| 533 |
|
| 534 | # Make a report for all, but only run some
|
| 535 |
|
| 536 | test/wild-runner.sh parse-and-report "$@"
|
| 537 | }
|
| 538 |
|
| 539 | smoke-test() {
|
| 540 | ### Smoke test on Oils source; takes a few seconds
|
| 541 | all 'oil/'
|
| 542 | }
|
| 543 |
|
| 544 | find-tracebacks() {
|
| 545 | find _tmp/wild/raw -name '*__parse.stderr.txt*' |
|
| 546 | xargs grep -l 'Traceback'
|
| 547 | }
|
| 548 |
|
| 549 | find-with-shebang() {
|
| 550 | local dir=$1
|
| 551 |
|
| 552 | # Look for files without an extension that have shell shebang lines.
|
| 553 |
|
| 554 | # Bad things about find:
|
| 555 | # * -regextype is part of the expression that always returns true, and has a
|
| 556 | # side effect that only affects later clauses!
|
| 557 | # * there are TEN different kinds
|
| 558 | # * emacs is the default regex type!
|
| 559 | # * -regex matches the whole path, whereas -name only matches the name
|
| 560 | # - should be name ~ /regex/ and path ~ /regex/
|
| 561 | # - there is no way to search just the name for a regex
|
| 562 | # * no character classes in the default type
|
| 563 | #
|
| 564 | # https://www.gnu.org/software/findutils/manual/html_node/find_html/Regular-Expressions.html#Regular-Expressions
|
| 565 |
|
| 566 | # The regex matches the whole path, e.g. so freebsd-11.1 must be matched.
|
| 567 |
|
| 568 | # What might be faster here is to find all the executables first, then put
|
| 569 | # them in a text file. test/shebang.sh can be invoked with stdin as a path
|
| 570 | # list and filter them. It's not horribly slow though.
|
| 571 |
|
| 572 | # Looking for *.sh misses 590 files in FreeBSD. There are 1088 .sh files.
|
| 573 |
|
| 574 | # NOTE: Should get rid of most 'configure' scripts?
|
| 575 |
|
| 576 | time find $dir \
|
| 577 | \( -name .git -a -prune \) -o \
|
| 578 | \( -regex '.+/[a-zA-Z0-9_\-]+' -a \
|
| 579 | -type f -a \
|
| 580 | -executable -a \
|
| 581 | -exec test/shebang.sh is-shell {} ';' -a \
|
| 582 | -printf '%p\n' \)
|
| 583 | }
|
| 584 |
|
| 585 | gentoo() {
|
| 586 | # 38,000 ebuild files
|
| 587 | local src
|
| 588 | src=~/git/gentoo/gentoo
|
| 589 |
|
| 590 | # 2M lines, because there are a lot of duplicate versions.
|
| 591 |
|
| 592 | time find $src -name '*.ebuild' -a -print0 |
|
| 593 | wc -l --files0-from - | sort -n
|
| 594 |
|
| 595 | return
|
| 596 | _manifest distro/gentoo $src \
|
| 597 | $(find $src . -name '*.ebuild')
|
| 598 | }
|
| 599 |
|
| 600 | #
|
| 601 | # ANALYSIS: Find Biggest Shell Scripts in Aboriginal Source Tarballs
|
| 602 | #
|
| 603 |
|
| 604 | readonly AB_PACKAGES=~/hg/scratch/aboriginal/aboriginal-1.2.2/packages
|
| 605 |
|
| 606 | aboriginal-packages() {
|
| 607 | for z in $AB_PACKAGES/*.tar.gz; do
|
| 608 | local name=$(basename $z .tar.gz)
|
| 609 | echo $z -z $name
|
| 610 | done
|
| 611 | for z in $AB_PACKAGES/*.tar.bz2; do
|
| 612 | local name=$(basename $z .tar.bz2)
|
| 613 | echo $z -j $name
|
| 614 | done
|
| 615 | }
|
| 616 |
|
| 617 | readonly AB_OUT=_tmp/aboriginal
|
| 618 |
|
| 619 | aboriginal-manifest() {
|
| 620 | mkdir -p $AB_OUT
|
| 621 |
|
| 622 | aboriginal-packages | while read z tar_flag name; do
|
| 623 | echo $z $name
|
| 624 | local listing=$AB_OUT/${name}.txt
|
| 625 | tar --list --verbose $tar_flag < $z | grep '\.sh$' > $listing || true
|
| 626 | done
|
| 627 | }
|
| 628 |
|
| 629 | aboriginal-biggest() {
|
| 630 | # print size and filename
|
| 631 | cat $AB_OUT/*.txt | awk '{print $3 " " $6}' | sort -n
|
| 632 | }
|
| 633 |
|
| 634 | readonly AB_TIMES=_tmp/parse-aboriginal.csv
|
| 635 |
|
| 636 | parse-aboriginal() {
|
| 637 | #find $ABORIGINAL_DIR -name '*.sh' | xargs wc -l | sort -n
|
| 638 | #return
|
| 639 |
|
| 640 | find $ABORIGINAL_DIR -name '*.sh' | xargs -n 1 -- \
|
| 641 | benchmarks/time_.py --append --output $AB_TIMES -- \
|
| 642 | bin/osh -n --ast-format none
|
| 643 | }
|
| 644 |
|
| 645 | # 80 ms max. That is good enough for sure.
|
| 646 | ab-times() {
|
| 647 | awk -F ',' '{ if ($2 > max_elapsed) max_elapsed = $2 } END { print(max_elapsed) }' $AB_TIMES
|
| 648 | }
|
| 649 |
|
| 650 | # biggest scripts besides ltmain:
|
| 651 | #
|
| 652 | # 8406 binutils-397a64b3/binutils/embedspu.sh
|
| 653 | # 8597 binutils-397a64b3/ld/emulparams/msp430all.sh
|
| 654 | # 9951 bash-2.05b/examples/scripts/dd-ex.sh
|
| 655 | # 12558 binutils-397a64b3/ld/genscripts.sh
|
| 656 | # 14148 bash-2.05b/examples/scripts/adventure.sh
|
| 657 | # 21811 binutils-397a64b3/gas/testsuite/gas/xstormy16/allinsn.sh
|
| 658 | # 28004 bash-2.05b/examples/scripts/bcsh.sh
|
| 659 | # 29666 gcc-4.2.1/ltcf-gcj.sh
|
| 660 | # 33972 gcc-4.2.1/ltcf-c.sh
|
| 661 | # 39048 gcc-4.2.1/ltcf-cxx.sh
|
| 662 |
|
| 663 | #
|
| 664 | # ANALYSIS: Number of comment lines
|
| 665 | #
|
| 666 | # TODO: Determine if we should try to save comment lines? I think we should
|
| 667 | # save more than that.
|
| 668 |
|
| 669 |
|
| 670 | #
|
| 671 | # ANALYSIS: Which scripts use set -C / set -o noclobber?
|
| 672 | #
|
| 673 |
|
| 674 | # VERY rare, only 13 instances, in ast, freebsd, and illumos-gate.
|
| 675 | analyze-noclobber() {
|
| 676 | local out=_tmp/noclobber.txt
|
| 677 | # Ignore this script
|
| 678 | time abspaths | grep -v 'test/wild.sh' |
|
| 679 | xargs grep -E 'noclobber|^set -C|^set +C' > $out || true
|
| 680 | wc -l $out
|
| 681 | }
|
| 682 |
|
| 683 | # Quick golden test. Test that pretty-printing doesn't regress.
|
| 684 | golden-subset() {
|
| 685 | $0 all esoteric
|
| 686 | }
|
| 687 |
|
| 688 | # Make a copy of the output for comparison.
|
| 689 | copy-golden-ast() {
|
| 690 | local dest=${1:-_tmp/wild-gold}
|
| 691 | find _tmp/wild-www/esoteric/ -name '*__ast.html' -a -printf '%p %P\n' \
|
| 692 | | ~/git/tree-tools/bin/multi cp $dest
|
| 693 | }
|
| 694 |
|
| 695 | # Find shell scripts on the root file system.
|
| 696 | # 1302 files on my system.
|
| 697 | rootfs-manifest() {
|
| 698 | find /bin /lib /sbin /etc/ /opt /root /run /usr /var \
|
| 699 | -type f -a \
|
| 700 | -executable -a \
|
| 701 | -exec test/shebang.sh is-shell {} ';' \
|
| 702 | -a -print | tee _tmp/rootfs.txt
|
| 703 | }
|
| 704 |
|
| 705 | soil-run() {
|
| 706 | export-osh-cpp _tmp/native-tar-test opt
|
| 707 |
|
| 708 | if test -n "${QUICKLY:-}"; then
|
| 709 | # Do a quick version
|
| 710 | all '^oil'
|
| 711 | else
|
| 712 | # This takes longer than 15 minutes with build/dev.sh minimal !
|
| 713 | # That's with xargs -P $MAX_PROCS in test/wild-runner.sh
|
| 714 |
|
| 715 | # The whole thing takes 7:25, which means that the 'wild' Soil job takes 10
|
| 716 | # minutes. It waits for the tarball, then tests it.
|
| 717 | #
|
| 718 | # For now, just do 'distro', since that's about half the files.
|
| 719 |
|
| 720 | all '^distro'
|
| 721 | # all '^cloud'
|
| 722 | # all '^cloud|^gnu|^freebsd'
|
| 723 | # all
|
| 724 | fi
|
| 725 | }
|
| 726 |
|
| 727 | if test "$(basename $0)" = 'wild.sh'; then
|
| 728 | "$@"
|
| 729 | fi
|