OILS / metrics / source-code.sh View on Github | oilshell.org

575 lines, 313 significant
1#!/usr/bin/env bash
2#
3# Count lines of code in various ways.
4#
5# Usage:
6# metrics/source-code.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd $(dirname $0)/.. && pwd) # tsv-lib.sh uses this
13readonly REPO_ROOT
14
15source test/common.sh
16source test/tsv-lib.sh
17
18filter-py() {
19 grep -E -v '__init__.py$|_gen.py|_test.py|_tests.py|NINJA_subgraph.py$'
20}
21
22readonly -a OSH_ASDL=( {frontend,core}/*.asdl )
23
24# OSH and common
25osh-files() {
26 # Exclude:
27 # - line_input.c because I didn't write it. It still should be minimized.
28 # - code generators
29 # - test library
30
31 ls bin/oils_for_unix.py {osh,core,frontend}/*.py builtin/*_osh.py \
32 pyext/*.c */*.pyi \
33 "${OSH_ASDL[@]}" \
34 | filter-py | grep -E -v 'posixmodule.c$|line_input.c$|_gen.py$|test_lib.py$|os.pyi$'
35}
36
37# cloc doesn't understand ASDL files.
38# Use a wc-like format, filtering out blank lines and comments.
39asdl-cloc() {
40 python -c '
41import sys
42
43total = 0
44for path in sys.argv[1:]:
45 num_lines = 0
46 with open(path) as f:
47 for line in f:
48 line = line.strip()
49 if not line or line.startswith("#"):
50 continue
51 num_lines += 1
52
53 print "%5d %s" % (num_lines, path)
54 total += num_lines
55
56print "%5d %s" % (total, "total")
57' "$@"
58}
59
60cloc-report() {
61 echo '(non-blank non-comment lines)'
62 echo
63
64 echo 'OSH'
65 echo
66 osh-files | xargs cloc --quiet "$@"
67 echo
68 echo
69
70 echo 'YSH'
71 echo
72 ysh-files | xargs cloc --quiet "$@"
73 echo
74 echo
75
76 echo 'Data Languages'
77 echo
78 data-lang-files | xargs cloc --quiet "$@"
79 echo
80 echo
81
82 echo 'Tools'
83 echo
84 tools-files | xargs cloc --quiet "$@"
85 echo
86 echo
87
88 echo 'ASDL SCHEMAS (non-blank non-comment lines)'
89 asdl-cloc "${OSH_ASDL[@]}" data_lang/*.asdl
90 echo
91 echo
92
93 # NOTE: --csv option could be parsed into HTML.
94 # Or just sum with asdl-cloc!
95
96 echo 'Hand-Written C++ code (non-blank non-comment lines)'
97 echo
98 { cpp-binding-files; mycpp-runtime-files; } | xargs cloc --quiet "$@"
99}
100
101preprocessed() {
102 ./NINJA-config.sh
103
104 # Clang has slightly fewer lines, but it's not on the CI machine
105 #local -a files=(_build/preprocessed/{cxx,clang}-{dbg,opt}.txt)
106
107 local -a files=(_build/preprocessed/cxx-{dbg,opt}.txt)
108
109 ninja "${files[@]}"
110
111 # Publish with release and show and CI
112
113 local dir=_tmp/metrics/preprocessed
114 mkdir -p $dir
115 cp -v "${files[@]}" $dir
116
117 head -n 100 $dir/*.txt
118}
119
120#
121# Two variants of the $count function: text and html
122#
123
124category-text() {
125 local header=$1
126 local comment=$2
127
128 echo "$header"
129 # omit comment
130
131 # stdin is the files
132 xargs wc -l | sort --numeric
133 echo
134}
135
136# This is overly clever ...
137shopt -s lastpipe
138SECTION_ID=0 # mutable global
139
140category-html() {
141 # TODO: Don't use wc -l, and just count and sum the lines yourself
142
143 xargs wc -l | metrics/line_counts.py $((++SECTION_ID)) "$@"
144}
145
146#
147# Functions That Count
148#
149
150# Note this style is OVERLY ABSTRACT, but it's hard to do better in shell. We
151# want to parameterize over text and HTML. In Oils I think we would use this:
152#
153# proc p1 {
154# category 'OSH (and common libraries)' {
155# comment = 'This is the input'
156# osh-files | read --lines :files
157# }
158# }
159#
160# This produces a series of dicts that looks like
161# { name: 'OSH ...', comment: "This ...", files: %(one two three) }
162#
163# Then we iterate over the categories and produce text or HTML.
164
165osh-counts() {
166 local count=$1
167 shift
168
169 osh-files | $count \
170 'OSH (and common libraries)' \
171 'This is the input to the translators, written in statically-typed Python. Note that bash is at least 140K lines of code, and OSH implements a large part of bash and more.' \
172 "$@"
173}
174
175ysh-files() {
176 ls ysh/*.{py,pgen2} builtin/{func,method}*.py builtin/*_ysh.py | filter-py
177}
178
179ysh-counts() {
180 local count=$1
181 shift
182
183 ysh-files | $count \
184 'YSH' 'Expression grammar, parser, evaluator, etc.' "$@"
185}
186
187data-lang-files() {
188 ls data_lang/*.asdl
189 ls data_lang/*.py | filter-py
190 ls data_lang/*.{c,h} | egrep -v '_test' # exclude j8_test_lib as well
191}
192
193data-lang-counts() {
194 local count=$1
195 shift
196
197 data-lang-files | $count \
198 'Data Languages' 'JSON, J8 Notation, ...' "$@"
199}
200
201tools-files() {
202 ls tools/*.py | filter-py
203}
204
205tools-counts() {
206 local count=$1
207 shift
208
209 tools-files | $count \
210 'Tools' '' "$@"
211}
212
213cpp-binding-files() {
214 ls cpp/*.{cc,h} | egrep -v '_test.cc'
215}
216
217mycpp-runtime-files() {
218 ls mycpp/*.{cc,h} | egrep -v '_test.cc|bump_leak_heap'
219}
220
221cpp-counts() {
222 local count=$1
223 shift
224
225 cpp-binding-files | $count \
226 'Hand-written C++ Code' \
227 'Includes OS bindings. Small C++ files like cpp/osh_arith_parse.{cc,h} correspond to larger Python files like osh/arith_parse.py.' \
228 "$@"
229
230 # Remove code that isn't "in production"
231 mycpp-runtime-files | $count \
232 'Garbage-Collected Runtime' \
233 'Uses a fork-friendly Mark-Sweep collector.' \
234 "$@"
235
236 ls mycpp/*_test.cc cpp/*_test.cc | $count \
237 'Unit tests in C++' \
238 'The goal is to make the spec tests pass, but unit tests are helpful too.' \
239 "$@"
240
241 ls NINJA*.sh */NINJA*.py build/ninja*.{sh,py} | $count \
242 'Incremental C++ Build' '' "$@"
243}
244
245gen-cpp-counts() {
246 local count=$1
247 shift
248
249 # NOTE: this excludes .re2c.h file
250 ls _gen/*/*.{cc,h} | $count \
251 'Generated C++ Code' \
252 'mycpp generates the big file _gen/bin/oils-for-unix.mycpp.cc. Other programs like Zephyr ASDL and re2c generate other files.' \
253 "$@"
254}
255
256mycpp-counts() {
257 local count=$1
258 shift
259
260 ls mycpp/*.py | grep -v 'NINJA_subgraph.py' | filter-py | $count \
261 'mycpp Translator' \
262 "This prototype uses the MyPy frontend to translate statically-typed Python to C++. The generated code calls a small runtime which implements things like List[T], Dict[K, V], and Python's len()." \
263 "$@"
264
265 ls mycpp/examples/*.py | $count \
266 'mycpp Test Data' \
267 'Small Python examples that translate to C++, compile, and run.' \
268 "$@"
269}
270
271code-generator-counts() {
272 local count=$1
273 shift
274
275 ls asdl/*.py | filter-py | grep -v -E 'arith_|tdop|_demo' | $count \
276 'Zephyr ASDL' \
277 'A DSL for algebraic data types, borrowed from Python. Oils is the most strongly typed Bourne shell implementation!' \
278 "$@"
279
280 ls pgen2/*.py | filter-py | $count \
281 'pgen2 Parser Generator' \
282 'An LL(1) parser generator used to parse YSH expressions. Also borrowed from CPython.' \
283 "$@"
284
285 ls */*_gen.py | $count \
286 'Other Code Generators' \
287 'In order to make Oils statically typed, we had to abandon Python reflection and use C++ source code generation instead. The lexer, flag definitions, and constants can be easily compiled to C++.' \
288 "$@"
289
290 ls yaks/*.py | filter-py | $count \
291 'Yaks' \
292 'Experimental replacement for mycpp' \
293 "$@"
294}
295
296spec-gold-counts() {
297 local count=$1
298 shift
299
300 ls spec/*.test.sh | $count \
301 'Spec Tests' \
302 'A comprehensive test suite that compares OSH against other shells. If OSH passes these tests in BOTH Python and C++, it means that the translation works.' \
303 "$@"
304
305 ls test/gold/*.sh | $count \
306 'Gold Tests' \
307 'Another suite that tests shells "from the outside". Instead of making explicit assertions, we verify that OSH behaves like bash.' \
308 "$@"
309}
310
311#
312# Top Level Summaries
313#
314
315_for-translation() {
316 local count=$1
317 shift
318
319 mycpp-counts $count "$@"
320
321 code-generator-counts $count "$@"
322
323 cpp-counts $count "$@"
324
325 osh-counts $count "$@"
326
327 ysh-counts $count "$@"
328
329 data-lang-counts $count "$@"
330
331 tools-counts $count "$@"
332
333 spec-gold-counts $count "$@"
334
335 gen-cpp-counts $count "$@"
336}
337
338_overview() {
339 local count=$1
340 shift
341
342 osh-counts $count "$@"
343
344 ysh-counts $count "$@"
345
346 data-lang-counts $count "$@"
347
348 tools-counts $count "$@"
349
350 ls stdlib/*.ysh | $count \
351 "YSH stdlib" '' "$@"
352
353 ls pylib/*.py | filter-py | $count \
354 "Code Borrowed from Python's stdlib" '' "$@"
355
356 spec-gold-counts $count "$@"
357
358 test/unit.sh files-to-count | $count \
359 'Python Unit Tests' '' "$@"
360
361 ls test/*.{sh,py,R} | filter-py | grep -v jsontemplate.py | $count \
362 'Other Shell Tests' '' "$@"
363
364 ls */TEST.sh | $count \
365 'Test Automation' '' "$@"
366
367 mycpp-counts $count "$@"
368
369 code-generator-counts $count "$@"
370
371 cpp-counts $count "$@"
372
373 # Leaving off gen-cpp-counts since that requires a C++ build
374
375 ls build/*.{mk,sh,py,c} Makefile configure install \
376 | filter-py | egrep -v 'NINJA|TEST' | $count \
377 'Build Automation' '' "$@"
378
379 ls devtools/release*.sh | $count \
380 'Release Automation' '' "$@"
381
382 ls soil/*.{sh,py} | $count \
383 'Soil: Multi-cloud CI with containers' '' "$@"
384
385 ls benchmarks/*.{sh,py,R} | $count \
386 'Benchmarks' '' "$@"
387
388 ls metrics/*.{sh,R} | $count \
389 'Metrics' '' "$@"
390
391 ls _devbuild/gen/*.py | $count \
392 'Generated Python Code' \
393 'For the Python App Bundle.' \
394 "$@"
395
396 ls {doctools,lazylex}/*.py doctools/*.{h,cc} | filter-py | $count \
397 'Doc Tools' '' "$@"
398
399 ls web/*.js web/*/*.{js,py} | $count \
400 'Web' '' "$@"
401}
402
403for-translation() {
404 _for-translation category-text
405}
406
407overview() {
408 _overview category-text
409}
410
411print-files() {
412 xargs -n 1 -- echo
413}
414
415overview-list() {
416 _overview print-files
417}
418
419#
420# HTML Versions
421#
422
423html-head() {
424 PYTHONPATH=. doctools/html_head.py "$@"
425}
426
427metrics-html-head() {
428 local title="$1"
429
430 local base_url='../../../web'
431
432 html-head --title "$title" "$base_url/base.css" "$base_url/table/table-sort.css" "$base_url/line-counts.css"
433}
434
435counts-html() {
436 local name=$1
437 local title=$2
438
439 local tmp_dir=_tmp/metrics/line-counts/$name
440
441 rm -r -f -v $tmp_dir >& 2
442 mkdir -v -p $tmp_dir >& 2
443
444 tsv-row category category_HREF total_lines num_files > $tmp_dir/INDEX.tsv
445
446 echo $'column_name\ttype
447category\tstring
448category_HREF\tstring
449total_lines\tinteger
450num_files\tinteger' >$tmp_dir/INDEX.schema.tsv
451
452 # Generate the HTML
453 "_$name" category-html $tmp_dir
454
455 metrics-html-head "$title"
456 echo ' <body class="width40">'
457
458 echo "<h1>$title</h1>"
459
460 tsv2html $tmp_dir/INDEX.tsv
461
462 echo '<hr/>'
463
464 echo '<h2>Related Documents</h2>
465 <p>The <a href="https://www.oilshell.org/release/latest/doc/README.html">README for oilshell/oil</a>
466 has another overview of the repository.
467 </p>'
468
469 # All the parts
470 cat $tmp_dir/*.html
471
472 echo ' </body>'
473 echo '</html>'
474}
475
476for-translation-html() {
477 local title='Overview: Translating Oils to C++'
478 counts-html for-translation "$title"
479}
480
481overview-html() {
482 local title='Overview of Oils Code'
483 counts-html overview "$title"
484}
485
486write-reports() {
487 local out_dir=${1:-_tmp/metrics/line-counts}
488
489 mkdir -v -p $out_dir
490
491 for-translation-html > $out_dir/for-translation.html
492
493 overview-html > $out_dir/overview.html
494
495 ls -l $out_dir
496}
497
498#
499# Misc
500#
501
502# count instructions, for fun
503instructions() {
504 # http://pepijndevos.nl/2016/08/24/x86-instruction-distribution.html
505
506 local bin=_build/oil/ovm-opt.stripped
507 objdump -d $bin | cut -f3 | grep -oE "^[a-z]+" | hist
508}
509
510hist() {
511 sort | uniq -c | sort -n
512}
513
514stdlib-imports() {
515 oil-osh-files | xargs grep --no-filename '^import' | hist
516}
517
518imports() {
519 oil-osh-files | xargs grep --no-filename -w import | hist
520}
521
522imports-not-at-top() {
523 oil-osh-files | xargs grep -n -w import | awk -F : ' $2 > 100'
524}
525
526# For the compiler, see what's at the top level.
527top-level() {
528 grep '^[a-zA-Z]' {core,osh}/*.py \
529 | grep -v '_test.py' \
530 | egrep -v ':import|from|class|def' # note: colon is from grep output
531}
532
533_python-symbols() {
534 local main=$1
535 local name=$2
536 local out_dir=$3
537
538 mkdir -p $out_dir
539 local out=${out_dir}/${name}-symbols.txt
540
541 # To debug what version we're running eci
542 /usr/bin/env python2 -V
543 echo
544
545 # Run this from the repository root.
546 PYTHONPATH='.:vendor/' CALLGRAPH=1 $main | tee $out
547
548 wc -l $out
549 echo
550 echo "Wrote $out"
551}
552
553oil-python-symbols() {
554 local out_dir=${1:-_tmp/opy-test}
555 _python-symbols bin/oil.py oil $out_dir
556}
557
558old-style-classes() {
559 oil-python-symbols | grep -v '<'
560}
561
562# Some of these are "abstract classes" like ChildStateChange
563NotImplementedError() {
564 grep NotImplementedError */*.py
565}
566
567py-ext() {
568 # for the py-source build
569 # 35 imports
570 osh-files | xargs -- egrep 'import (fanos|libc|line_input|posix_|yajl)'
571}
572
573if test $(basename $0) = 'source-code.sh'; then
574 "$@"
575fi