1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Run the osh parser on shell scripts found in the wild.
|
4 | #
|
5 | # Usage:
|
6 | # test/wild.sh <function name>
|
7 | #
|
8 | # Examples:
|
9 | # test/wild.sh all
|
10 | # test/wild.sh all '^oil' # subset
|
11 |
|
12 | # TODO:
|
13 | # - Add more scripts, like gentoo package defs
|
14 |
|
15 | set -o nounset
|
16 | set -o pipefail
|
17 | set -o errexit
|
18 | shopt -s strict:all 2>/dev/null || true # dogfood for OSH
|
19 |
|
20 | source test/common.sh # export-osh-cpp
|
21 |
|
22 | # This persists across build/clean.sh
|
23 | readonly DEPS_WILD_DIR=../oil_DEPS/wild
|
24 |
|
25 |
|
26 | #
|
27 | # Helpers
|
28 | #
|
29 |
|
30 | _manifest() {
|
31 | local proj=$1
|
32 | local base_dir=$2
|
33 | shift 2
|
34 |
|
35 | for path in "$@"; do
|
36 | echo $proj $base_dir/$path $path
|
37 | done
|
38 | }
|
39 |
|
40 | # generic helper
|
41 | _sh-manifest() {
|
42 | local base_dir=$1
|
43 | local category=${2:-}
|
44 |
|
45 | local proj
|
46 | if test -n "$category"; then
|
47 | proj="$category/$(basename $base_dir)"
|
48 | else
|
49 | proj="$(basename $base_dir)"
|
50 | fi
|
51 |
|
52 | _manifest $proj $base_dir \
|
53 | $(find $base_dir -name '*.sh' -a -printf '%P\n')
|
54 | }
|
55 |
|
56 | _configure-manifest() {
|
57 | local base_dir=$1
|
58 | shift
|
59 |
|
60 | local name=$(basename $base_dir)
|
61 | _manifest ${name}__configure $base_dir \
|
62 | $(find $src -name 'configure' -a -printf '%P\n')
|
63 | }
|
64 |
|
65 | #
|
66 | # Special Case Corpora Using Explicit Globs
|
67 | #
|
68 |
|
69 | # TODO: Where do we write the base dir?
|
70 | oil-sketch-manifest() {
|
71 | local base_dir=~/git/oil-sketch
|
72 | pushd $base_dir >/dev/null
|
73 | for name in *.sh {awk,demo,make,misc,regex,tools}/*.sh; do
|
74 | echo oil-sketch $base_dir/$name $name
|
75 | done
|
76 | popd >/dev/null
|
77 | }
|
78 |
|
79 | oil-manifest() {
|
80 | local base_dir=$PWD
|
81 | for name in \
|
82 | configure install *.sh {benchmarks,build,test,scripts,opy}/*.sh; do
|
83 | echo oil $base_dir/$name $name
|
84 | done
|
85 | }
|
86 |
|
87 | readonly ABORIGINAL_DIR=~/src/aboriginal-1.4.5
|
88 |
|
89 | #
|
90 | # All
|
91 | #
|
92 |
|
93 | all-manifests() {
|
94 | # Don't expose this repo for now
|
95 | #oil-sketch-manifest
|
96 |
|
97 | #
|
98 | # My Code
|
99 | #
|
100 |
|
101 | oil-manifest
|
102 |
|
103 | local src
|
104 |
|
105 | #
|
106 | # Books
|
107 | #
|
108 | src=~/git/wild/books/wicked_cool_shell_scripts_2e
|
109 | _manifest "books/$(basename $src)" $src \
|
110 | $(find $src \
|
111 | \( -name .git -a -prune \) -o \
|
112 | \( -type f -a -executable -a -printf '%P\n' \) )
|
113 |
|
114 | #
|
115 | # Shell/Bash Frameworks/Collections
|
116 | #
|
117 |
|
118 | src=~/git/other/bash-completion
|
119 | _manifest "shell/$(basename $src)" $src \
|
120 | $(find $src/completions -type f -a -printf 'completions/%P\n')
|
121 |
|
122 | # Bats bash test framework. It appears to be fairly popular.
|
123 | src=~/git/other/bats
|
124 | _manifest "shell/$(basename $src)" $src \
|
125 | $(find $src \
|
126 | \( -wholename '*/libexec/*' -a -type f -a \
|
127 | -executable -a -printf '%P\n' \) )
|
128 |
|
129 | # Bash debugger?
|
130 | src=~/src/bashdb-4.4-0.92
|
131 | _manifest shell/bashdb $src \
|
132 | $(find $src -name '*.sh' -a -printf '%P\n')
|
133 |
|
134 | src=~/git/other/Bash-Snippets
|
135 | _manifest "shell/$(basename $src)" $src \
|
136 | $(find $src \
|
137 | \( -name .git -a -prune \) -o \
|
138 | \( -type f -a -executable -a -printf '%P\n' \) )
|
139 |
|
140 | # ASDF meta package/version manager.
|
141 | # Note that the language-specific plugins are specified (as remote repos)
|
142 | # here: https://github.com/asdf-vm/asdf-plugins/tree/master/plugins
|
143 | # They # could be used for more tests.
|
144 |
|
145 | src=~/git/other/asdf
|
146 | _manifest "shell/$(basename $src)" $src \
|
147 | $(find $src \( -name '*.sh' -o -name '*.bash' \) -a -printf '%P\n' )
|
148 |
|
149 | src=~/git/other/scripts-to-rule-them-all
|
150 | _manifest "shell/$(basename $src)" $src \
|
151 | $(find $src \
|
152 | \( -name .git -a -prune \) -o \
|
153 | \( -type f -a -executable -a -printf '%P\n' \) )
|
154 |
|
155 | src=~/git/wild/shell/shflags
|
156 | _manifest "shell/$(basename $src)" $src \
|
157 | $(find $src \
|
158 | '(' -name .git -a -prune ')' -o \
|
159 | '(' -name '*.sh' -o \
|
160 | -name shflags -o -name shlib -o -name shunit2 -o -name versions \
|
161 | ')' -a -printf '%P\n')
|
162 |
|
163 | _sh-manifest ~/hg/wild/shell-script-library shell
|
164 | _sh-manifest ~/git/other/modernish shell
|
165 | _sh-manifest ~/git/other/posixcube shell
|
166 |
|
167 | # Shells themselves
|
168 | _sh-manifest ~/git/languages/ast shell # korn shell stuff
|
169 | _sh-manifest ~/git/languages/mwc-sh shell
|
170 | _sh-manifest ~/src/mksh shell
|
171 |
|
172 | #
|
173 | # Linux Distros
|
174 | #
|
175 |
|
176 | _sh-manifest ~/git/other/minimal distro
|
177 | _sh-manifest ~/git/other/linuxkit distro
|
178 | _sh-manifest ~/git/other/portage distro
|
179 | _sh-manifest ~/git/wild/distro/woof-CE distro
|
180 | _sh-manifest ~/git/wild/distro/crankshaft distro
|
181 |
|
182 | src=~/git/alpine/aports
|
183 | _manifest distro/alpine-aports $src \
|
184 | $(find $src -name APKBUILD -a -printf '%P\n')
|
185 |
|
186 | src=$ABORIGINAL_DIR
|
187 | _manifest distro/aboriginal $src \
|
188 | $(find $src -name '*.sh' -printf '%P\n')
|
189 |
|
190 | src=/etc/init.d
|
191 | _manifest distro/initd $src \
|
192 | $(find $src -type f -a -executable -a -printf '%P\n')
|
193 |
|
194 | src=/usr/bin
|
195 | _manifest distro/usr-bin $src \
|
196 | $(find $src -name '*.sh' -a -printf '%P\n')
|
197 |
|
198 | # Version 1.0.89 extracts to a version-less dir.
|
199 | src=~/git/basis-build/_tmp/debootstrap
|
200 | _manifest distro/debootstrap $src \
|
201 | $(find $src '(' -name debootstrap -o -name functions ')' -a -printf '%P\n') \
|
202 | $(find $src/scripts -type f -a -printf 'scripts/%P\n')
|
203 |
|
204 | # There are lot of dietpi-* bash scripts that aren't executable, for some
|
205 | # reason. Big hairy shell scripts here.
|
206 | src=~/git/wild/distro/DietPi
|
207 | _manifest distro/DietPi $src \
|
208 | $(find $src '(' -name '*.sh' -o -name 'dietpi-*' ')' -a -printf '%P\n') \
|
209 |
|
210 | src=~/src/grep-2.24
|
211 | _sh-manifest $src gnu
|
212 |
|
213 | src=~/src/coreutils-8.22
|
214 | _sh-manifest $src gnu
|
215 |
|
216 | src=~/src/glibc-2.23
|
217 | _sh-manifest $src gnu
|
218 |
|
219 | src=~/src/binutils-2.26
|
220 | _sh-manifest $src gnu
|
221 |
|
222 | #
|
223 | # Operating Systems
|
224 | #
|
225 |
|
226 | # Skip the autoconf stuff here. Could skip it elsewhere too.
|
227 | src=~/src/freebsd-11.1/usr/src
|
228 | _manifest freebsd-11.1 $src \
|
229 | $(find $src -name '*.sh' -a ! -name 'ltmain.sh' -a -printf '%P\n')
|
230 |
|
231 | _sh-manifest ~/git/other/minix
|
232 | _sh-manifest ~/git/other/illumos-gate
|
233 | _sh-manifest ~/git/other/daemontools-encore
|
234 |
|
235 | #
|
236 | # Cloud Stuff
|
237 | #
|
238 | _sh-manifest ~/git/other/mesos cloud
|
239 | _sh-manifest ~/git/other/rocket cloud
|
240 | _sh-manifest ~/git/other/docker cloud
|
241 | _sh-manifest ~/git/other/chef-bcpc cloud
|
242 | _sh-manifest ~/git/other/sandstorm cloud
|
243 | _sh-manifest ~/git/other/kubernetes cloud
|
244 | _sh-manifest ~/git/other/manta-marlin cloud
|
245 |
|
246 | src=~/git/other/dokku
|
247 | _manifest cloud/dokku $src \
|
248 | $(find $src '(' -name '*.sh' -o -name dokku ')' -a -printf '%P\n')
|
249 |
|
250 | #
|
251 | # Google
|
252 | #
|
253 | _sh-manifest ~/git/other/bazel google
|
254 | _sh-manifest ~/git/other/protobuf google
|
255 | _sh-manifest ~/git/other/kythe google
|
256 | _sh-manifest ~/git/other/tensorflow google
|
257 | # Filenames with spaces!
|
258 | #_sh-manifest ~/git/other/grpc google
|
259 |
|
260 | #
|
261 | # Esoteric
|
262 | #
|
263 |
|
264 | _sh-manifest ~/git/scratch/shasm esoteric
|
265 | _sh-manifest ~/git/wild/esoteric/wwwoosh esoteric
|
266 | _sh-manifest ~/git/wild/esoteric/lishp esoteric
|
267 |
|
268 | # OpenGL for bash is interesting because there is a lot of arithmetic.
|
269 | # Not surprisingly, there are 6 parse errors, almost all of which are due to
|
270 | # "dynamic arithmetic".
|
271 | src=~/git/other/CmdlineGL
|
272 | _manifest esoteric/CmdlineGL $src \
|
273 | $(find $src \
|
274 | -type f -a \
|
275 | '(' -name '*.sh' -o -name '*.lib' ')' -a \
|
276 | -printf '%P\n')
|
277 |
|
278 | src=~/git/wild/esoteric/setup.shl
|
279 | _manifest esoteric/setup.shl $src \
|
280 | $(find $src \
|
281 | -type f -a \
|
282 | '(' -name '*.shl' -o -name setup -o -name Setup ')' -a \
|
283 | -printf '%P\n')
|
284 |
|
285 | src=~/git/wild/esoteric/mal/bash
|
286 | _manifest esoteric/make-a-lisp-bash $src \
|
287 | $(find $src '(' -name '*.sh' ')' -a -printf '%P\n')
|
288 |
|
289 | src=~/git/wild/esoteric/gherkin
|
290 | _manifest esoteric/gherkin $src \
|
291 | $(find $src '(' -name '*.sh' -o -name 'gherkin' ')' -a -printf '%P\n')
|
292 |
|
293 | src=~/git/wild/esoteric/balls
|
294 | _manifest esoteric/balls $src \
|
295 | $(find $src '(' -name '*.sh' -o -name balls -o -name esh ')' -a \
|
296 | -printf '%P\n')
|
297 |
|
298 | src=~/git/wild/esoteric/bashcached
|
299 | _manifest esoteric/bashcached $src \
|
300 | $(find $src '(' -name '*.sh' -o -name 'bashcached' ')' -a -printf '%P\n')
|
301 |
|
302 | src=~/git/wild/esoteric/quinedb
|
303 | _manifest esoteric/quinedb $src \
|
304 | $(find $src '(' -name '*.sh' -o -name 'quinedb' ')' -a -printf '%P\n')
|
305 |
|
306 | src=~/git/wild/esoteric/bashttpd
|
307 | _manifest esoteric/bashttpd $src \
|
308 | $(find $src -name 'bashttpd' -a -printf '%P\n')
|
309 |
|
310 | # JSON Parsers
|
311 | src=~/git/other/j
|
312 | _manifest esoteric/j $src \
|
313 | $(find $src -type f -a -name j -a -printf '%P\n')
|
314 |
|
315 | _sh-manifest ~/git/other/JSON.sh esoteric
|
316 |
|
317 | #
|
318 | # Other Languages
|
319 | #
|
320 |
|
321 | _sh-manifest ~/git/languages/julia
|
322 | _sh-manifest ~/git/languages/reason
|
323 | _sh-manifest ~/git/languages/sdk # Dart SDK?
|
324 |
|
325 | _sh-manifest ~/git/languages/micropython
|
326 | _sh-manifest ~/git/other/staticpython # statically linked build
|
327 |
|
328 | _sh-manifest ~/git/other/exp # Go experimental repo
|
329 |
|
330 | #
|
331 | # Grab Bag
|
332 | #
|
333 |
|
334 | # This overlaps with git too much
|
335 | #src=~/git/other
|
336 | #local depth=3
|
337 | #_manifest git-other $src \
|
338 | # $(find $src -maxdepth $depth -name '*.sh' -a -printf '%P\n')
|
339 |
|
340 | src=~/hg/other
|
341 | _manifest hg-other $src \
|
342 | $(find $src -name '*.sh' -a -printf '%P\n')
|
343 |
|
344 | #
|
345 | # Top-Level
|
346 | #
|
347 |
|
348 | # Kernel
|
349 | _sh-manifest ~/src/linux-4.8.7
|
350 |
|
351 | # Git
|
352 | # git-gui.sh and po2msg.sh are actually Tcl! We could stop parsing at 'exec'
|
353 | # but there's no point right now.
|
354 | src=~/git/other/git
|
355 | _manifest $(basename $src) $src \
|
356 | $(find $src -name '*.sh' -a \
|
357 | ! -name 'git-gui.sh' \
|
358 | ! -name 'po2msg.sh' \
|
359 | -a -printf '%P\n')
|
360 |
|
361 | _sh-manifest ~/git/other/liballocs
|
362 | _sh-manifest ~/git/other/boringssl
|
363 | _sh-manifest ~/git/other/arrow
|
364 |
|
365 | #
|
366 | # Uncategorized
|
367 | #
|
368 |
|
369 | # Has filenames with spaces!
|
370 | #_sh-manifest ~/git/other/linguist
|
371 |
|
372 | # Brendan Gregg's performance scripts.
|
373 | # Find executable scripts, since they don't end in sh.
|
374 | # net/tcpretrans is written in Perl.
|
375 | src=~/git/other/perf-tools
|
376 | _manifest $(basename $src) $src \
|
377 | $(find $src \
|
378 | \( -name .git -a -prune \) -o \
|
379 | \( -name tcpretrans -a -prune \) -o \
|
380 | \( -type f -a -executable -a -printf '%P\n' \) )
|
381 |
|
382 | # Most of these scripts have no extension. So look at executable ones and
|
383 | # then see if the shebang ends with sh!
|
384 |
|
385 | # NOTE: In YSH it would be nice if shebang-is-shell could be a function call.
|
386 | # Don't need to fork every time.
|
387 | src=~/git/other/pixelb-scripts
|
388 | _manifest pixelb-scripts $src \
|
389 | $(find $src \( -name .git -a -prune \) -o \
|
390 | \( -type f -a \
|
391 | -executable -a \
|
392 | ! -name '*.py' -a \
|
393 | -exec test/shebang.sh is-shell {} ';' -a \
|
394 | -printf '%P\n' \) )
|
395 |
|
396 | # Something related to WebDriver
|
397 | # Doesn't parse because of extended glob.
|
398 | src=~/git/other/wd
|
399 | _manifest $(basename $src) $src \
|
400 | $(find $src -type f -a -name wd -a -printf '%P\n')
|
401 |
|
402 | #
|
403 | # Big
|
404 | #
|
405 |
|
406 | return
|
407 | log "Finding Files in Big Projects"
|
408 | readonly BIG_BUILD_ROOT=/media/andy/hdd-8T/big-build/ssd-backup/sdb/build
|
409 |
|
410 | # 2m 18s the first time.
|
411 | # 2 seconds the second time. This is a big slow drive.
|
412 | time {
|
413 | _sh-manifest $BIG_BUILD_ROOT/hg/other/mozilla-central/
|
414 |
|
415 | _sh-manifest $BIG_BUILD_ROOT/chrome
|
416 | _configure-manifest $BIG_BUILD_ROOT/chrome
|
417 |
|
418 | _sh-manifest $BIG_BUILD_ROOT/android
|
419 | _configure-manifest $BIG_BUILD_ROOT/android
|
420 |
|
421 | _sh-manifest $BIG_BUILD_ROOT/openwrt
|
422 | _sh-manifest $BIG_BUILD_ROOT/OpenWireless
|
423 | }
|
424 | }
|
425 |
|
426 | # TODO: Note: duplicated in wild-runner.sh
|
427 | readonly MANIFEST=_tmp/wild/MANIFEST.txt
|
428 |
|
429 | write-manifest() {
|
430 | mkdir -p _tmp/wild
|
431 | local out=$MANIFEST
|
432 | all-manifests > $out
|
433 | wc -l $out
|
434 | }
|
435 |
|
436 | # TODO: Publish this script
|
437 | multi() { ~/hg/tree-tools/bin/multi "$@"; }
|
438 |
|
439 | make-archive() {
|
440 | # Format of manifest:
|
441 | # $1 is project
|
442 | # $2 is abspath of source
|
443 | # $3 is rel path within project
|
444 | local out=_tmp/wild/wild-source.tar.gz
|
445 | rm -f $out
|
446 | awk '{print $2 " " $1 "/" $3 }' $MANIFEST \
|
447 | | multi tar $out
|
448 | ls -l $out
|
449 | }
|
450 |
|
451 | # This is opposed to crawling the file system with 'find'.
|
452 | manifest-from-archive() {
|
453 | mkdir -p $(dirname $MANIFEST)
|
454 | # relative path then absolute path
|
455 | find $DEPS_WILD_DIR/src -type f -a -printf '%P %p\n' > $MANIFEST
|
456 | }
|
457 |
|
458 | # 442K lines without "big" and without ltmain.sh
|
459 | # TODO: Include a few ltmain.sh. Have to de-dupe them.
|
460 | #
|
461 | # 767K lines with aports (It's 250K lines by itself.)
|
462 |
|
463 | # 1.30 M lines with "big".
|
464 | # 760K lines without ltmain.sh. Hm need to get up to 1M.
|
465 |
|
466 | abspaths() {
|
467 | local proj=${1:-}
|
468 | if test -n "$proj"; then
|
469 | awk -v proj=$proj '$1 == proj {print $2}' $MANIFEST
|
470 | else
|
471 | awk '{print $2}' $MANIFEST
|
472 | fi
|
473 | }
|
474 |
|
475 | # Excluding ltmain.sh, goes from 910K lines to 830K.
|
476 | count-lines() {
|
477 | # We need this weird --files0-from because there are too many files. xargs
|
478 | # would split it into multiple invocations.
|
479 | #
|
480 | # It would be nicer if wc just had an option not to sum?
|
481 | time abspaths |
|
482 | grep -v ltmain.sh |
|
483 | tr '\n' '\0' | wc -l --files0-from - | sort -n
|
484 | }
|
485 |
|
486 | grep-features1() {
|
487 | # Hm only 608 files out of 10,000 use a[x]=
|
488 | # But it is used in
|
489 | # /home/andy/src/linux-4.8.7/scripts/decode_stacktrace.sh
|
490 | # portage, bash-completion, uses it
|
491 | time abspaths | grep -v ltmain.sh |
|
492 | xargs egrep '^[[:space:]]*[a-zA-Z0-9]+\[.*\]='
|
493 | }
|
494 |
|
495 | grep-features2() {
|
496 | # Outside of illumos/ast/esoteric, there's only one real usage of associative
|
497 | # array literals!
|
498 | # /home/andy/git/other/tensorflow/tensorflow/tools/ci_build/builds/pip.sh: WHL_TAGS=(["2.7"]="cp27-none" ["3.4"]="cp34-cp34m" ["3.5"]="cp35-cp35m")
|
499 | time abspaths | grep -v ltmain.sh |
|
500 | xargs grep -F '=(['
|
501 | }
|
502 |
|
503 | grep-features3() {
|
504 | # Wow this is ONLY used in a handful of files in bash-completions! And tests.
|
505 | # That might be enough to justify it.
|
506 | time abspaths | grep -v ltmain.sh |
|
507 | xargs grep -F ';&'
|
508 | }
|
509 |
|
510 | grep-features4() {
|
511 | # Wow this is ONLY used in a handful of files in bash-completions! And tests.
|
512 | # That might be enough to justify it.
|
513 | time abspaths | #| grep -v ltmain.sh |
|
514 | xargs grep -E '\[\[ .*-(eq|ne|le|ge|lt|gt)'
|
515 | #xargs grep -E '\${[a-zA-Z0-9_]+\[[^@*]' # looks like ${a[i]}
|
516 | #xargs grep -F '$(('
|
517 | }
|
518 |
|
519 | # Takes ~15 seconds for 8,000+ files.
|
520 | #
|
521 | # NOTE: APKBUILD don't have shebang lines! So there are a bunch of false
|
522 | # detections, e.g. APKBUILD as Makefile, C, etc.
|
523 | detect-all-types() {
|
524 | time abspaths | xargs file | pv > _tmp/wild/file-types.txt
|
525 | }
|
526 |
|
527 | wild-types() {
|
528 | cat _tmp/wild/file-types.txt | test/wild_types.py
|
529 | }
|
530 |
|
531 | all() {
|
532 | ### Run by devtools/release.sh
|
533 |
|
534 | # Make a report for all, but only run some
|
535 |
|
536 | test/wild-runner.sh parse-and-report "$@"
|
537 | }
|
538 |
|
539 | smoke-test() {
|
540 | ### Smoke test on Oils source; takes a few seconds
|
541 | all 'oil/'
|
542 | }
|
543 |
|
544 | find-tracebacks() {
|
545 | find _tmp/wild/raw -name '*__parse.stderr.txt*' |
|
546 | xargs grep -l 'Traceback'
|
547 | }
|
548 |
|
549 | find-with-shebang() {
|
550 | local dir=$1
|
551 |
|
552 | # Look for files without an extension that have shell shebang lines.
|
553 |
|
554 | # Bad things about find:
|
555 | # * -regextype is part of the expression that always returns true, and has a
|
556 | # side effect that only affects later clauses!
|
557 | # * there are TEN different kinds
|
558 | # * emacs is the default regex type!
|
559 | # * -regex matches the whole path, whereas -name only matches the name
|
560 | # - should be name ~ /regex/ and path ~ /regex/
|
561 | # - there is no way to search just the name for a regex
|
562 | # * no character classes in the default type
|
563 | #
|
564 | # https://www.gnu.org/software/findutils/manual/html_node/find_html/Regular-Expressions.html#Regular-Expressions
|
565 |
|
566 | # The regex matches the whole path, e.g. so freebsd-11.1 must be matched.
|
567 |
|
568 | # What might be faster here is to find all the executables first, then put
|
569 | # them in a text file. test/shebang.sh can be invoked with stdin as a path
|
570 | # list and filter them. It's not horribly slow though.
|
571 |
|
572 | # Looking for *.sh misses 590 files in FreeBSD. There are 1088 .sh files.
|
573 |
|
574 | # NOTE: Should get rid of most 'configure' scripts?
|
575 |
|
576 | time find $dir \
|
577 | \( -name .git -a -prune \) -o \
|
578 | \( -regex '.+/[a-zA-Z0-9_\-]+' -a \
|
579 | -type f -a \
|
580 | -executable -a \
|
581 | -exec test/shebang.sh is-shell {} ';' -a \
|
582 | -printf '%p\n' \)
|
583 | }
|
584 |
|
585 | gentoo() {
|
586 | # 38,000 ebuild files
|
587 | local src
|
588 | src=~/git/gentoo/gentoo
|
589 |
|
590 | # 2M lines, because there are a lot of duplicate versions.
|
591 |
|
592 | time find $src -name '*.ebuild' -a -print0 |
|
593 | wc -l --files0-from - | sort -n
|
594 |
|
595 | return
|
596 | _manifest distro/gentoo $src \
|
597 | $(find $src . -name '*.ebuild')
|
598 | }
|
599 |
|
600 | #
|
601 | # ANALYSIS: Find Biggest Shell Scripts in Aboriginal Source Tarballs
|
602 | #
|
603 |
|
604 | readonly AB_PACKAGES=~/hg/scratch/aboriginal/aboriginal-1.2.2/packages
|
605 |
|
606 | aboriginal-packages() {
|
607 | for z in $AB_PACKAGES/*.tar.gz; do
|
608 | local name=$(basename $z .tar.gz)
|
609 | echo $z -z $name
|
610 | done
|
611 | for z in $AB_PACKAGES/*.tar.bz2; do
|
612 | local name=$(basename $z .tar.bz2)
|
613 | echo $z -j $name
|
614 | done
|
615 | }
|
616 |
|
617 | readonly AB_OUT=_tmp/aboriginal
|
618 |
|
619 | aboriginal-manifest() {
|
620 | mkdir -p $AB_OUT
|
621 |
|
622 | aboriginal-packages | while read z tar_flag name; do
|
623 | echo $z $name
|
624 | local listing=$AB_OUT/${name}.txt
|
625 | tar --list --verbose $tar_flag < $z | grep '\.sh$' > $listing || true
|
626 | done
|
627 | }
|
628 |
|
629 | aboriginal-biggest() {
|
630 | # print size and filename
|
631 | cat $AB_OUT/*.txt | awk '{print $3 " " $6}' | sort -n
|
632 | }
|
633 |
|
634 | readonly AB_TIMES=_tmp/parse-aboriginal.csv
|
635 |
|
636 | parse-aboriginal() {
|
637 | #find $ABORIGINAL_DIR -name '*.sh' | xargs wc -l | sort -n
|
638 | #return
|
639 |
|
640 | find $ABORIGINAL_DIR -name '*.sh' | xargs -n 1 -- \
|
641 | benchmarks/time_.py --append --output $AB_TIMES -- \
|
642 | bin/osh -n --ast-format none
|
643 | }
|
644 |
|
645 | # 80 ms max. That is good enough for sure.
|
646 | ab-times() {
|
647 | awk -F ',' '{ if ($2 > max_elapsed) max_elapsed = $2 } END { print(max_elapsed) }' $AB_TIMES
|
648 | }
|
649 |
|
650 | # biggest scripts besides ltmain:
|
651 | #
|
652 | # 8406 binutils-397a64b3/binutils/embedspu.sh
|
653 | # 8597 binutils-397a64b3/ld/emulparams/msp430all.sh
|
654 | # 9951 bash-2.05b/examples/scripts/dd-ex.sh
|
655 | # 12558 binutils-397a64b3/ld/genscripts.sh
|
656 | # 14148 bash-2.05b/examples/scripts/adventure.sh
|
657 | # 21811 binutils-397a64b3/gas/testsuite/gas/xstormy16/allinsn.sh
|
658 | # 28004 bash-2.05b/examples/scripts/bcsh.sh
|
659 | # 29666 gcc-4.2.1/ltcf-gcj.sh
|
660 | # 33972 gcc-4.2.1/ltcf-c.sh
|
661 | # 39048 gcc-4.2.1/ltcf-cxx.sh
|
662 |
|
663 | #
|
664 | # ANALYSIS: Number of comment lines
|
665 | #
|
666 | # TODO: Determine if we should try to save comment lines? I think we should
|
667 | # save more than that.
|
668 |
|
669 |
|
670 | #
|
671 | # ANALYSIS: Which scripts use set -C / set -o noclobber?
|
672 | #
|
673 |
|
674 | # VERY rare, only 13 instances, in ast, freebsd, and illumos-gate.
|
675 | analyze-noclobber() {
|
676 | local out=_tmp/noclobber.txt
|
677 | # Ignore this script
|
678 | time abspaths | grep -v 'test/wild.sh' |
|
679 | xargs grep -E 'noclobber|^set -C|^set +C' > $out || true
|
680 | wc -l $out
|
681 | }
|
682 |
|
683 | # Quick golden test. Test that pretty-printing doesn't regress.
|
684 | golden-subset() {
|
685 | $0 all esoteric
|
686 | }
|
687 |
|
688 | # Make a copy of the output for comparison.
|
689 | copy-golden-ast() {
|
690 | local dest=${1:-_tmp/wild-gold}
|
691 | find _tmp/wild-www/esoteric/ -name '*__ast.html' -a -printf '%p %P\n' \
|
692 | | ~/git/tree-tools/bin/multi cp $dest
|
693 | }
|
694 |
|
695 | # Find shell scripts on the root file system.
|
696 | # 1302 files on my system.
|
697 | rootfs-manifest() {
|
698 | find /bin /lib /sbin /etc/ /opt /root /run /usr /var \
|
699 | -type f -a \
|
700 | -executable -a \
|
701 | -exec test/shebang.sh is-shell {} ';' \
|
702 | -a -print | tee _tmp/rootfs.txt
|
703 | }
|
704 |
|
705 | soil-run() {
|
706 | export-osh-cpp _tmp/native-tar-test opt
|
707 |
|
708 | if test -n "${QUICKLY:-}"; then
|
709 | # Do a quick version
|
710 | all '^oil'
|
711 | else
|
712 | # This takes longer than 15 minutes with build/dev.sh minimal !
|
713 | # That's with xargs -P $MAX_PROCS in test/wild-runner.sh
|
714 |
|
715 | # The whole thing takes 7:25, which means that the 'wild' Soil job takes 10
|
716 | # minutes. It waits for the tarball, then tests it.
|
717 | #
|
718 | # For now, just do 'distro', since that's about half the files.
|
719 |
|
720 | all '^distro'
|
721 | # all '^cloud'
|
722 | # all '^cloud|^gnu|^freebsd'
|
723 | # all
|
724 | fi
|
725 | }
|
726 |
|
727 | if test "$(basename $0)" = 'wild.sh'; then
|
728 | "$@"
|
729 | fi
|