OILS / doctools / src-tree.sh View on Github | oilshell.org

266 lines, 158 significant
1#!/usr/bin/env bash
2#
3# Source code -> HTML tree
4#
5# Usage:
6# doctools/src-tree.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd $(dirname $0)/.. && pwd) # tsv-lib.sh uses this
13readonly REPO_ROOT
14
15source build/common.sh # log
16
17export PYTHONPATH=.
18
19install-deps() {
20 sudo apt-get install moreutils # for isutf8
21}
22
23lexer-files() {
24 ### linked from doc/release-quality.md
25
26 for rel_path in \
27 _gen/_tmp/match.re2c-input.h \
28 _gen/frontend/match.re2c.h \
29 _gen/frontend/id_kind.asdl_c.h; do
30 echo $rel_path
31 done
32}
33
34_print-files() {
35 #lexer-files
36
37 find _gen/ -type f
38
39 # TODO: move _devbuild/bin/time-helper elsewhere?
40 find _devbuild/ -type f -a -name '*.py'
41 find _devbuild/help -type f
42
43 # For some reason it shows py-yajl
44 # Remove binary file (probably should delete it altogether, but it's a nice
45 # test of UTF-8)
46
47 git ls-files | egrep -v 'Python-2.7.13|^py-yajl|rsa_travis.enc'
48
49 return
50
51 # We also had this way of categorizing. Should unify these line counts with
52 # micro-syntax.
53 metrics/source-code.sh overview-list
54}
55
56# overview-list has dupes
57sorted-files() {
58 _print-files | sort | uniq
59}
60
61readonly BASE_DIR=_tmp/src-tree
62
63classify() {
64 ### Classify files on stdin
65
66 while read -r path; do
67 case $path in
68 */here-doc.test.sh|*/posix.test.sh|*/gold/complex-here-docs.sh|*/07-unterminated-here-doc.sh)
69 # Plain text since they can have invalid here docs
70 #
71 # TODO: make a style for *.test.sh?
72 echo "$path" >& $txt
73 ;;
74 *.cc|*.c|*.h)
75 echo "$path" >& $cpp
76 ;;
77 *.py|*.pyi|*.pgen2) # pgen2 uses Python lexical syntax
78 echo "$path" >& $py
79 ;;
80 *.sh|*.bash|*.osh|*.ysh|configure|install|uninstall)
81 echo "$path" >& $shell
82 ;;
83 *.asdl)
84 echo "$path" >& $asdl
85 ;;
86 *.R)
87 echo "$path" >& $R
88 ;;
89 *.js)
90 echo "$path" >& $js
91 ;;
92 *.css)
93 echo "$path" >& $css
94 ;;
95 *.md)
96 echo "$path" >& $md
97 ;;
98 *.yml)
99 echo "$path" >& $yaml
100 ;;
101 *.txt)
102 echo "$path" >& $txt
103 ;;
104 *)
105 echo "$path" >& $other
106 esac
107 done {cpp}>$BASE_DIR/cpp.txt \
108 {py}>$BASE_DIR/py.txt \
109 {shell}>$BASE_DIR/shell.txt \
110 {asdl}>$BASE_DIR/asdl.txt \
111 {R}>$BASE_DIR/R.txt \
112 {js}>$BASE_DIR/js.txt \
113 {css}>$BASE_DIR/css.txt \
114 {md}>$BASE_DIR/md.txt \
115 {yaml}>$BASE_DIR/yaml.txt \
116 {txt}>$BASE_DIR/txt.txt \
117 {other}>$BASE_DIR/other.txt
118
119 # Other
120 # .mk
121 # .re2c.txt - rename this one to .h
122 #
123 # Just leave those un-highlighted for now
124
125 wc -l $BASE_DIR/*.txt
126}
127
128all-html-to-files() {
129 local out_dir=$1
130 for lang in cpp py shell asdl R js css md yaml txt other; do
131 log "=== $lang ==="
132
133 cat $BASE_DIR/$lang.txt | xargs _tmp/micro-syntax/micro_syntax -l $lang -w \
134 | doctools/src_tree.py write-html-fragments $out_dir
135 log ''
136 done
137}
138
139check-is-utf8() {
140 local manifest=$1
141
142 log '--- Checking that files are UTF-8'
143 log ''
144
145 if ! xargs isutf8 --list < $manifest; then
146 echo
147 die "The files shown aren't UTF-8"
148 fi
149}
150
151highlight() {
152 local variant=opt
153 #local variant=asan
154
155 doctools/micro-syntax.sh build $variant
156 echo
157
158 local www_dir=_tmp/src-tree-www
159 mkdir -p $BASE_DIR $www_dir
160
161 sorted-files > $BASE_DIR/manifest.txt
162 wc -l $BASE_DIR/manifest.txt
163 echo
164
165 # Fails if there is non UTF-8
166 # Disable until moreutils is in our Soil CI images
167 # check-is-utf8 $BASE_DIR/manifest.txt
168
169 # Figure file types
170 classify < $BASE_DIR/manifest.txt
171
172 local attrs=$BASE_DIR/attrs.txt
173
174 time all-html-to-files $www_dir > $attrs
175
176 # Now write index.html dir listings
177 time doctools/src_tree.py dirs $www_dir < $attrs
178}
179
180soil-run() {
181 ### Write tree starting at _tmp/src-tree/index.html
182
183 highlight
184}
185
186cat-benchmark() {
187 # 355 ms to cat the files! It takes 2.75 seconds to syntax highlight 'src_tree.py files'
188 #
189 # Producing 5.9 MB of text.
190 time sorted-files | xargs cat | wc --bytes
191
192 # Note: wc -l is not much slower.
193}
194
195micro-bench() {
196 # ~435 ms, not bad. cat is ~355 ms, so that's only 70 ms more.
197
198 local variant=opt
199 #local variant=asan
200 doctools/micro-syntax.sh build $variant
201
202 local lang=cpp
203
204 # Buggy!
205 local lang=py
206
207 # optimization:
208 # lang=cpp: 11.4 MB -> 11.3 MB
209 time sorted-files | xargs _tmp/micro-syntax/micro_syntax -l $lang | wc --bytes
210
211 # optimization:
212 # lang=cpp: 18.5 MB -> 18.4 MB
213 time sorted-files | xargs _tmp/micro-syntax/micro_syntax -l $lang -w | wc --bytes
214}
215
216
217#
218# Misc ways of counting files
219# TODO: unify or remove these
220#
221
222repo() {
223 git ls-files
224}
225
226no-cpython() {
227 grep -v 'Python-2.7.13'
228}
229
230compress() {
231 local out=_tmp/source-code.zip
232
233 rm -f -v $out
234
235 repo | no-cpython | xargs --verbose -- zip $out
236 echo
237
238 # 1688 files in 3.6 MB, OK seems fine
239 repo | no-cpython | wc -l
240
241 ls -l -h $out
242}
243
244extensions() {
245 repo \
246 | no-cpython \
247 | grep -v 'testdata/' \
248 | awk --field-separator . '{ print $(NF) }' \
249 | sort | uniq -c | sort -n
250}
251
252#
253# Debug CSS
254#
255
256css-deploy() {
257 local host=oilshell.org
258 ssh $host mkdir -p $host/tmp
259 scp web/src-tree.css $host:$host/tmp
260 sed 's;../../../web/;;g' _tmp/src-tree/www/configure.html > _tmp/configure.html
261 scp _tmp/configure.html $host:$host/tmp
262}
263
264if test $(basename $0) = 'src-tree.sh'; then
265 "$@"
266fi