OILS / build / cpython-defs.sh View on Github | oilshell.org

354 lines, 155 significant
1#!/usr/bin/env bash
2#
3# Usage:
4# build/cpython-defs.sh <function name>
5#
6# Example:
7#
8# # make clean tree of .c files
9# devtools/release.sh quick-oil-tarball
10# devtools/release.sh test-oil-tar # can Ctrl-C this
11#
12# build/cpython-defs.sh oil-py-names # extract names
13# build/cpython-defs.sh filter-methods
14#
15# NOTE: 'build/ovm-compile.sh make-tar' is complex, so it's easier to just extract
16# the tarball, even though it leads to a weird dependency.
17
18set -o nounset
19set -o pipefail
20set -o errexit
21
22REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
23readonly REPO_ROOT
24
25source build/common.sh # $PY27
26source build/dev-shell.sh # R_LIBS_USER
27
28readonly BASE_DIR=_tmp/cpython-defs
29
30# Could be published in metrics?
31readonly PY_NAMES=_tmp/oil-py-names.txt
32
33# Print the .py files in the tarball in their original locations. For slimming
34# down the build. Similar to build/metrics.sh linecounts-pydeps.
35# Hm that doesn't seem to duplicate posixpath while this does?
36oil-py-deps() {
37 cat _build/oil/opy-app-deps.txt | awk ' $1 ~ /\.py$/ { print $1 }'
38}
39
40oil-py-names() {
41 time oil-py-deps | xargs bin/opyc lex-names | sort | uniq > $PY_NAMES
42
43 wc -l $PY_NAMES
44}
45
46# NOTE: We can replace os with posix. Will save 700 lines of code, 25K + 25K.
47# os.getenv() is a trivial wrapper around os.environ.get(). It gets
48# initialized in posixmodule.c.
49os-module-deps() {
50 #oil-py-deps | xargs egrep --no-filename -o '\bos\.[a-z]+' */*.py | sort | uniq -c |sort -n
51 oil-py-deps | xargs egrep -l '\bos\.'
52}
53
54# TODO:
55# Write to a separate file like _build/pydefs/intobject.include
56# #ifdef OVM_MAIN
57# #include "intobject.include"
58# #else
59# ...
60# #end
61#
62# Should those files be checked in an edited by hand? Or join them somehow
63# with oil-symbols.txt?
64# I think this is hard because of METHODS.
65# Maybe you should have a config file that controls it. It takes a .include
66# file and then whitelist/blacklist, and then generates a new one.
67# could put it in build/pydefs-config.txt
68#
69# And then reprint the PyMethoDef without docstrings? It shouldn't be that
70# hard to parse. You can almost do it with a regex, since commas don't appear
71# in the string.
72
73extract-methods() {
74 local path_prefix=$1 # to strip
75 shift
76
77 local edit_list=$BASE_DIR/method-edit-list.txt
78
79 # NOTE: PyMemberDef is also interesting, but we don't need it for the build.
80 gawk -v path_prefix_length=${#path_prefix} -v edit_list=$edit_list '
81 /static.*PyMethodDef/ {
82 if (printing != 0) {
83 printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
84 exit 1;
85 }
86 # NOTE: We had to adjust stringobject.c and _weakref.c so that the name is
87 # on one line! Not a big deal.
88 if (match($0, /static.*PyMethodDef ([a-zA-Z0-9_]+)\[\]/, m)) {
89 def_name = m[1];
90 } else {
91 printf("%s:%d Could not parse declaration name\n",
92 FILENAME, FNR) > "/dev/stderr";
93 exit 1;
94 }
95 printing = 1;
96 line_begin = FNR;
97
98 rel_path = substr(FILENAME, path_prefix_length + 1);
99 if (!found[FILENAME]) {
100 # This special line seems to survive the preprocessor?
101 printf("\n");
102 printf("FILE %s\n", rel_path);
103 printf("\n");
104
105 printf("Filtering %s\n", FILENAME) > "/dev/stderr";
106 found[FILENAME] = 1 # count number of files that have matches
107 }
108 }
109
110 printing { print }
111
112 # Looking for closing brace (with leading space)
113
114 /^[:space:]*\}/ && printing {
115 # Print the edit list for #ifdef #endif.
116 line_end = FNR;
117 printf("%s %s %d %d\n", rel_path, def_name, line_begin, line_end) > edit_list;
118 printing = 0;
119 }
120
121 END {
122 for (name in found) {
123 num_found++;
124 }
125 printf("extract-methods.awk: Found definitions in %d out of %d files\n",
126 num_found, ARGC) > "/dev/stderr";
127 }
128 ' "$@"
129}
130
131preprocess() {
132 # TODO: Use PREPROC_FLAGS from build/ovm-compile.sh.
133 # - What about stuff in pyconfig.h?
134 # - Hack to define WTERMSIG! We really need to include <sys/wait.h>, but
135 # that causes parse errors in cpython_defs.py. Really we should get rid of
136 # this whole hack!
137 # - WIFSTOPPED is another likely thing...
138 gcc -I $PY27 -E -D OVM_MAIN -D WTERMSIG -
139}
140
141readonly TARBALL_ROOT=$(echo _tmp/oil-tar-test/oil-*)
142
143extract-all-methods() {
144 echo '#include "pyconfig.h"'
145 # 52 different instances. Sometimes multiple ones per file.
146 find "$TARBALL_ROOT" -type f -a -name '*.c' \
147 | xargs -- $0 extract-methods "$TARBALL_ROOT/"
148}
149
150cpython-defs() {
151 # Annoying: this depends on Oils for 'R' and 'C', then indirectly imports on
152 # 'typing' module.
153 PYTHONPATH='.:vendor' build/cpython_defs.py "$@"
154}
155
156filter-methods() {
157 local tmp=$BASE_DIR
158 mkdir -p $tmp
159
160 extract-all-methods > $tmp/extracted.txt
161 cat $tmp/extracted.txt | preprocess > $tmp/preprocessed.txt
162
163 local out_dir=build/oil-defs
164 mkdir -p $out_dir
165
166 #head -n 30 $tmp
167 cat $tmp/preprocessed.txt | cpython-defs filter $PY_NAMES $out_dir
168
169 echo
170 find $out_dir -name '*.def' | xargs wc -l | sort -n
171
172 echo
173 wc -l $tmp/*.txt
174
175 # syntax check
176 #cc _tmp/filtered.c
177}
178
179edit-file() {
180 local rel_path=$1
181 local def_name=$2
182 local line_begin=$3
183 local line_end=$4
184
185 local def_path="${rel_path}/${def_name}.def"
186
187 local tmp=_tmp/buf.txt
188
189 # DESTRUCTIVE
190 mv $rel_path $tmp
191
192 gawk -v def_path=$def_path -v line_begin=$line_begin -v line_end=$line_end '
193 NR == line_begin {
194 print("#ifdef OVM_MAIN")
195 printf("#include \"%s\"\n", def_path)
196 print("#else")
197 print # print the PyMethodDef line {
198 next
199 }
200 NR == line_end {
201 print # print the }
202 print("#endif");
203 next
204 }
205 # All other lines just get printed
206 {
207 print
208 }
209 ' $tmp > $rel_path
210
211 echo "Wrote $rel_path"
212}
213
214edit-all() {
215 # Reversed so that edits to the same file work! We are always inserting
216 # lines.
217 #tac $BASE_DIR/method-edit-list.txt | xargs -n 4 -- $0 edit-file
218
219 # One-off editing
220 grep typeobject.c $BASE_DIR/method-edit-list.txt \
221 | tac | xargs -n 4 -- $0 edit-file
222
223}
224
225extract-types() {
226 local path_prefix=$1 # to strip
227 shift
228
229 local edit_list=$BASE_DIR/type-edit-list.txt
230
231 # NOTE: PyMemberDef is also interesting, but we don't need it for the build.
232 gawk -v path_prefix_length=${#path_prefix} -v edit_list=$edit_list '
233 function maybe_print_file_header() {
234 rel_path = substr(FILENAME, path_prefix_length + 1);
235 if (!found[FILENAME]) {
236 # This special line seems to survive the preprocessor?
237 printf("\n");
238 printf("FILE %s\n", rel_path);
239 printf("\n");
240
241 printf("Filtering %s\n", FILENAME) > "/dev/stderr";
242 found[FILENAME] = 1 # count number of files that have matches
243 }
244 }
245
246 /PyTypeObject.*=.*\{.*\}/ {
247 if (printing != 0) {
248 printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
249 exit 1;
250 }
251 // Found it all on one line
252 print
253 num_one_line_types++;
254 next
255 }
256
257 /PyTypeObject.*=.*\{/ {
258 if (printing != 0) {
259 printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
260 exit 1;
261 }
262 printing = 1;
263 line_begin = FNR;
264
265 maybe_print_file_header()
266 num_types++;
267 }
268
269 {
270 if (printing) {
271 print
272 }
273 }
274
275 /^[:space:]*\}/ {
276 if (printing) {
277 # Print the edit list for #ifdef #endif.
278 line_end = FNR;
279 printf("%s %s %d %d\n", rel_path, def_name, line_begin, line_end) > edit_list;
280 printing = 0;
281 }
282 }
283
284 END {
285 for (name in found) {
286 num_found++;
287 }
288 printf("extract-types.awk: Found %d definitions in %d files (of %d files)\n",
289 num_types, num_found, ARGC) > "/dev/stderr";
290 printf("extract-types.awk: Also found %d types on one line\n",
291 num_one_line_types) > "/dev/stderr";
292 }
293 ' "$@"
294}
295
296extract-all-types() {
297 find "$TARBALL_ROOT" -type f -a -name '*.c' \
298 | xargs -- $0 extract-types "$TARBALL_ROOT/"
299}
300
301#
302# Analysis
303#
304
305readonly METRICS_DIR=_tmp/metrics/cpython-defs
306
307# Show current Oil definitions literally.
308show-oil() {
309 find build/oil-defs -name '*.def' | xargs cat | less
310}
311
312# Show in a contenses format.
313methods-audit() {
314 mkdir -p $METRICS_DIR
315 cat $BASE_DIR/preprocessed.txt | cpython-defs audit $PY_NAMES \
316 | tee _tmp/methods.txt
317
318 wc -l _tmp/methods.txt
319}
320
321methods-tsv() {
322 mkdir -p $METRICS_DIR
323 local out=$METRICS_DIR/methods.tsv
324 cat $BASE_DIR/preprocessed.txt | cpython-defs tsv $PY_NAMES | tee $out
325}
326
327_report() {
328 metrics/cpython-defs.R "$@"
329}
330
331report() {
332 _report metrics $METRICS_DIR
333}
334
335run-for-release() {
336 # Repeats what we did at the beginning of the release process, because _tmp/
337 # was deleted
338 oil-py-names
339 filter-methods
340
341 methods-tsv
342 report | tee $METRICS_DIR/overview.txt
343}
344
345unfiltered() {
346 cpython-defs filtered | sort > _tmp/left.txt
347 awk '{print $1}' $BASE_DIR/edit-list.txt \
348 | egrep -o '[^/]+$' \
349 | sort | uniq > _tmp/right.txt
350 diff -u _tmp/{left,right}.txt
351}
352
353
354"$@"