OILS / doctools / src-tree.sh View on Github | oilshell.org

272 lines, 161 significant
1#!/usr/bin/env bash
2#
3# Source code -> HTML tree
4#
5# Usage:
6# doctools/src-tree.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd $(dirname $0)/.. && pwd) # tsv-lib.sh uses this
13readonly REPO_ROOT
14
15source build/common.sh # log
16
17export PYTHONPATH=.
18
19install-deps() {
20 sudo apt-get install moreutils # for isutf8
21}
22
23lexer-files() {
24 ### linked from doc/release-quality.md
25
26 for rel_path in \
27 _gen/_tmp/match.re2c-input.h \
28 _gen/frontend/match.re2c.h \
29 _gen/frontend/id_kind.asdl_c.h; do
30 echo $rel_path
31 done
32}
33
34_print-files() {
35 #lexer-files
36
37 find _gen/ -type f
38
39 # TODO: move _devbuild/bin/time-helper elsewhere?
40 find _devbuild/ -type f -a -name '*.py'
41 find _devbuild/help -type f
42
43 # For some reason it shows py-yajl
44 # Remove binary file (probably should delete it altogether, but it's a nice
45 # test of UTF-8)
46
47 git ls-files | egrep -v 'Python-2.7.13|^py-yajl|rsa_travis.enc'
48
49 return
50
51 # We also had this way of categorizing. Should unify these line counts with
52 # micro-syntax.
53 metrics/source-code.sh overview-list
54}
55
56# overview-list has dupes
57sorted-files() {
58 _print-files | sort | uniq
59}
60
61readonly BASE_DIR=_tmp/src-tree
62
63classify() {
64 ### Classify files on stdin
65
66 while read -r path; do
67 case $path in
68 */here-doc.test.sh|*/posix.test.sh|*/gold/complex-here-docs.sh|*/07-unterminated-here-doc.sh)
69 # Plain text since they can have invalid here docs
70 #
71 # TODO: make a style for *.test.sh?
72 echo "$path" >& $txt
73 ;;
74
75 # TODO: Fix BUG in micro-syntax: $(( 1 << i )) is confused for here doc!
76 demo/sparse-array.sh)
77 echo "$path" >& $txt
78 ;;
79
80 *.cc|*.c|*.h)
81 echo "$path" >& $cpp
82 ;;
83 *.py|*.pyi|*.pgen2) # pgen2 uses Python lexical syntax
84 echo "$path" >& $py
85 ;;
86 *.sh|*.bash|*.osh|*.ysh|configure|install|uninstall)
87 echo "$path" >& $shell
88 ;;
89 *.asdl)
90 echo "$path" >& $asdl
91 ;;
92 *.R)
93 echo "$path" >& $R
94 ;;
95 *.js)
96 echo "$path" >& $js
97 ;;
98 *.css)
99 echo "$path" >& $css
100 ;;
101 *.md)
102 echo "$path" >& $md
103 ;;
104 *.yml)
105 echo "$path" >& $yaml
106 ;;
107 *.txt)
108 echo "$path" >& $txt
109 ;;
110 *)
111 echo "$path" >& $other
112 esac
113 done {cpp}>$BASE_DIR/cpp.txt \
114 {py}>$BASE_DIR/py.txt \
115 {shell}>$BASE_DIR/shell.txt \
116 {asdl}>$BASE_DIR/asdl.txt \
117 {R}>$BASE_DIR/R.txt \
118 {js}>$BASE_DIR/js.txt \
119 {css}>$BASE_DIR/css.txt \
120 {md}>$BASE_DIR/md.txt \
121 {yaml}>$BASE_DIR/yaml.txt \
122 {txt}>$BASE_DIR/txt.txt \
123 {other}>$BASE_DIR/other.txt
124
125 # Other
126 # .mk
127 # .re2c.txt - rename this one to .h
128 #
129 # Just leave those un-highlighted for now
130
131 wc -l $BASE_DIR/*.txt
132}
133
134all-html-to-files() {
135 local out_dir=$1
136 for lang in cpp py shell asdl R js css md yaml txt other; do
137 log "=== $lang ==="
138
139 cat $BASE_DIR/$lang.txt | xargs _tmp/micro-syntax/micro_syntax -l $lang -w \
140 | doctools/src_tree.py write-html-fragments $out_dir
141 log ''
142 done
143}
144
145check-is-utf8() {
146 local manifest=$1
147
148 log '--- Checking that files are UTF-8'
149 log ''
150
151 if ! xargs isutf8 --list < $manifest; then
152 echo
153 die "The files shown aren't UTF-8"
154 fi
155}
156
157highlight() {
158 local variant=opt
159 #local variant=asan
160
161 doctools/micro-syntax.sh build $variant
162 echo
163
164 local www_dir=_tmp/src-tree-www
165 mkdir -p $BASE_DIR $www_dir
166
167 sorted-files > $BASE_DIR/manifest.txt
168 wc -l $BASE_DIR/manifest.txt
169 echo
170
171 # Fails if there is non UTF-8
172 # Disable until moreutils is in our Soil CI images
173 # check-is-utf8 $BASE_DIR/manifest.txt
174
175 # Figure file types
176 classify < $BASE_DIR/manifest.txt
177
178 local attrs=$BASE_DIR/attrs.txt
179
180 time all-html-to-files $www_dir > $attrs
181
182 # Now write index.html dir listings
183 time doctools/src_tree.py dirs $www_dir < $attrs
184}
185
186soil-run() {
187 ### Write tree starting at _tmp/src-tree/index.html
188
189 highlight
190}
191
192cat-benchmark() {
193 # 355 ms to cat the files! It takes 2.75 seconds to syntax highlight 'src_tree.py files'
194 #
195 # Producing 5.9 MB of text.
196 time sorted-files | xargs cat | wc --bytes
197
198 # Note: wc -l is not much slower.
199}
200
201micro-bench() {
202 # ~435 ms, not bad. cat is ~355 ms, so that's only 70 ms more.
203
204 local variant=opt
205 #local variant=asan
206 doctools/micro-syntax.sh build $variant
207
208 local lang=cpp
209
210 # Buggy!
211 local lang=py
212
213 # optimization:
214 # lang=cpp: 11.4 MB -> 11.3 MB
215 time sorted-files | xargs _tmp/micro-syntax/micro_syntax -l $lang | wc --bytes
216
217 # optimization:
218 # lang=cpp: 18.5 MB -> 18.4 MB
219 time sorted-files | xargs _tmp/micro-syntax/micro_syntax -l $lang -w | wc --bytes
220}
221
222
223#
224# Misc ways of counting files
225# TODO: unify or remove these
226#
227
228repo() {
229 git ls-files
230}
231
232no-cpython() {
233 grep -v 'Python-2.7.13'
234}
235
236compress() {
237 local out=_tmp/source-code.zip
238
239 rm -f -v $out
240
241 repo | no-cpython | xargs --verbose -- zip $out
242 echo
243
244 # 1688 files in 3.6 MB, OK seems fine
245 repo | no-cpython | wc -l
246
247 ls -l -h $out
248}
249
250extensions() {
251 repo \
252 | no-cpython \
253 | grep -v 'testdata/' \
254 | awk --field-separator . '{ print $(NF) }' \
255 | sort | uniq -c | sort -n
256}
257
258#
259# Debug CSS
260#
261
262css-deploy() {
263 local host=oilshell.org
264 ssh $host mkdir -p $host/tmp
265 scp web/src-tree.css $host:$host/tmp
266 sed 's;../../../web/;;g' _tmp/src-tree/www/configure.html > _tmp/configure.html
267 scp _tmp/configure.html $host:$host/tmp
268}
269
270if test $(basename $0) = 'src-tree.sh'; then
271 "$@"
272fi