1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Usage:
|
4 | # build/cpython-defs.sh <function name>
|
5 | #
|
6 | # Example:
|
7 | #
|
8 | # # make clean tree of .c files
|
9 | # devtools/release.sh quick-oil-tarball
|
10 | # devtools/release.sh test-oil-tar # can Ctrl-C this
|
11 | #
|
12 | # build/cpython-defs.sh oil-py-names # extract names
|
13 | # build/cpython-defs.sh filter-methods
|
14 | #
|
15 | # NOTE: 'build/ovm-compile.sh make-tar' is complex, so it's easier to just extract
|
16 | # the tarball, even though it leads to a weird dependency.
|
17 |
|
18 | set -o nounset
|
19 | set -o pipefail
|
20 | set -o errexit
|
21 |
|
22 | REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
|
23 | readonly REPO_ROOT
|
24 |
|
25 | source build/common.sh # $PY27
|
26 | source build/dev-shell.sh # R_LIBS_USER
|
27 |
|
28 | readonly BASE_DIR=_tmp/cpython-defs
|
29 |
|
30 | # Could be published in metrics?
|
31 | readonly PY_NAMES=_tmp/oil-py-names.txt
|
32 |
|
33 | # Print the .py files in the tarball in their original locations. For slimming
|
34 | # down the build. Similar to build/metrics.sh linecounts-pydeps.
|
35 | # Hm that doesn't seem to duplicate posixpath while this does?
|
36 | oil-py-deps() {
|
37 | cat _build/oil/opy-app-deps.txt | awk ' $1 ~ /\.py$/ { print $1 }'
|
38 | }
|
39 |
|
40 | oil-py-names() {
|
41 | time oil-py-deps | xargs bin/opyc lex-names | sort | uniq > $PY_NAMES
|
42 |
|
43 | wc -l $PY_NAMES
|
44 | }
|
45 |
|
46 | # NOTE: We can replace os with posix. Will save 700 lines of code, 25K + 25K.
|
47 | # os.getenv() is a trivial wrapper around os.environ.get(). It gets
|
48 | # initialized in posixmodule.c.
|
49 | os-module-deps() {
|
50 | #oil-py-deps | xargs egrep --no-filename -o '\bos\.[a-z]+' */*.py | sort | uniq -c |sort -n
|
51 | oil-py-deps | xargs egrep -l '\bos\.'
|
52 | }
|
53 |
|
54 | # TODO:
|
55 | # Write to a separate file like _build/pydefs/intobject.include
|
56 | # #ifdef OVM_MAIN
|
57 | # #include "intobject.include"
|
58 | # #else
|
59 | # ...
|
60 | # #end
|
61 | #
|
62 | # Should those files be checked in an edited by hand? Or join them somehow
|
63 | # with oil-symbols.txt?
|
64 | # I think this is hard because of METHODS.
|
65 | # Maybe you should have a config file that controls it. It takes a .include
|
66 | # file and then whitelist/blacklist, and then generates a new one.
|
67 | # could put it in build/pydefs-config.txt
|
68 | #
|
69 | # And then reprint the PyMethoDef without docstrings? It shouldn't be that
|
70 | # hard to parse. You can almost do it with a regex, since commas don't appear
|
71 | # in the string.
|
72 |
|
73 | extract-methods() {
|
74 | local path_prefix=$1 # to strip
|
75 | shift
|
76 |
|
77 | local edit_list=$BASE_DIR/method-edit-list.txt
|
78 |
|
79 | # NOTE: PyMemberDef is also interesting, but we don't need it for the build.
|
80 | gawk -v path_prefix_length=${#path_prefix} -v edit_list=$edit_list '
|
81 | /static.*PyMethodDef/ {
|
82 | if (printing != 0) {
|
83 | printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
|
84 | exit 1;
|
85 | }
|
86 | # NOTE: We had to adjust stringobject.c and _weakref.c so that the name is
|
87 | # on one line! Not a big deal.
|
88 | if (match($0, /static.*PyMethodDef ([a-zA-Z0-9_]+)\[\]/, m)) {
|
89 | def_name = m[1];
|
90 | } else {
|
91 | printf("%s:%d Could not parse declaration name\n",
|
92 | FILENAME, FNR) > "/dev/stderr";
|
93 | exit 1;
|
94 | }
|
95 | printing = 1;
|
96 | line_begin = FNR;
|
97 |
|
98 | rel_path = substr(FILENAME, path_prefix_length + 1);
|
99 | if (!found[FILENAME]) {
|
100 | # This special line seems to survive the preprocessor?
|
101 | printf("\n");
|
102 | printf("FILE %s\n", rel_path);
|
103 | printf("\n");
|
104 |
|
105 | printf("Filtering %s\n", FILENAME) > "/dev/stderr";
|
106 | found[FILENAME] = 1 # count number of files that have matches
|
107 | }
|
108 | }
|
109 |
|
110 | printing { print }
|
111 |
|
112 | # Looking for closing brace (with leading space)
|
113 |
|
114 | /^[:space:]*\}/ && printing {
|
115 | # Print the edit list for #ifdef #endif.
|
116 | line_end = FNR;
|
117 | printf("%s %s %d %d\n", rel_path, def_name, line_begin, line_end) > edit_list;
|
118 | printing = 0;
|
119 | }
|
120 |
|
121 | END {
|
122 | for (name in found) {
|
123 | num_found++;
|
124 | }
|
125 | printf("extract-methods.awk: Found definitions in %d out of %d files\n",
|
126 | num_found, ARGC) > "/dev/stderr";
|
127 | }
|
128 | ' "$@"
|
129 | }
|
130 |
|
131 | preprocess() {
|
132 | # TODO: Use PREPROC_FLAGS from build/ovm-compile.sh.
|
133 | # - What about stuff in pyconfig.h?
|
134 | # - Hack to define WTERMSIG! We really need to include <sys/wait.h>, but
|
135 | # that causes parse errors in cpython_defs.py. Really we should get rid of
|
136 | # this whole hack!
|
137 | # - WIFSTOPPED is another likely thing...
|
138 | gcc -I $PY27 -E -D OVM_MAIN -D WTERMSIG -
|
139 | }
|
140 |
|
141 | readonly TARBALL_ROOT=$(echo _tmp/oil-tar-test/oil-*)
|
142 |
|
143 | extract-all-methods() {
|
144 | echo '#include "pyconfig.h"'
|
145 | # 52 different instances. Sometimes multiple ones per file.
|
146 | find "$TARBALL_ROOT" -type f -a -name '*.c' \
|
147 | | xargs -- $0 extract-methods "$TARBALL_ROOT/"
|
148 | }
|
149 |
|
150 | cpython-defs() {
|
151 | # Annoying: this depends on Oils for 'R' and 'C', then indirectly imports on
|
152 | # 'typing' module.
|
153 | PYTHONPATH='.:vendor' build/cpython_defs.py "$@"
|
154 | }
|
155 |
|
156 | filter-methods() {
|
157 | local tmp=$BASE_DIR
|
158 | mkdir -p $tmp
|
159 |
|
160 | extract-all-methods > $tmp/extracted.txt
|
161 | cat $tmp/extracted.txt | preprocess > $tmp/preprocessed.txt
|
162 |
|
163 | local out_dir=build/oil-defs
|
164 | mkdir -p $out_dir
|
165 |
|
166 | #head -n 30 $tmp
|
167 | cat $tmp/preprocessed.txt | cpython-defs filter $PY_NAMES $out_dir
|
168 |
|
169 | echo
|
170 | find $out_dir -name '*.def' | xargs wc -l | sort -n
|
171 |
|
172 | echo
|
173 | wc -l $tmp/*.txt
|
174 |
|
175 | # syntax check
|
176 | #cc _tmp/filtered.c
|
177 | }
|
178 |
|
179 | edit-file() {
|
180 | local rel_path=$1
|
181 | local def_name=$2
|
182 | local line_begin=$3
|
183 | local line_end=$4
|
184 |
|
185 | local def_path="${rel_path}/${def_name}.def"
|
186 |
|
187 | local tmp=_tmp/buf.txt
|
188 |
|
189 | # DESTRUCTIVE
|
190 | mv $rel_path $tmp
|
191 |
|
192 | gawk -v def_path=$def_path -v line_begin=$line_begin -v line_end=$line_end '
|
193 | NR == line_begin {
|
194 | print("#ifdef OVM_MAIN")
|
195 | printf("#include \"%s\"\n", def_path)
|
196 | print("#else")
|
197 | print # print the PyMethodDef line {
|
198 | next
|
199 | }
|
200 | NR == line_end {
|
201 | print # print the }
|
202 | print("#endif");
|
203 | next
|
204 | }
|
205 | # All other lines just get printed
|
206 | {
|
207 | print
|
208 | }
|
209 | ' $tmp > $rel_path
|
210 |
|
211 | echo "Wrote $rel_path"
|
212 | }
|
213 |
|
214 | edit-all() {
|
215 | # Reversed so that edits to the same file work! We are always inserting
|
216 | # lines.
|
217 | #tac $BASE_DIR/method-edit-list.txt | xargs -n 4 -- $0 edit-file
|
218 |
|
219 | # One-off editing
|
220 | grep typeobject.c $BASE_DIR/method-edit-list.txt \
|
221 | | tac | xargs -n 4 -- $0 edit-file
|
222 |
|
223 | }
|
224 |
|
225 | extract-types() {
|
226 | local path_prefix=$1 # to strip
|
227 | shift
|
228 |
|
229 | local edit_list=$BASE_DIR/type-edit-list.txt
|
230 |
|
231 | # NOTE: PyMemberDef is also interesting, but we don't need it for the build.
|
232 | gawk -v path_prefix_length=${#path_prefix} -v edit_list=$edit_list '
|
233 | function maybe_print_file_header() {
|
234 | rel_path = substr(FILENAME, path_prefix_length + 1);
|
235 | if (!found[FILENAME]) {
|
236 | # This special line seems to survive the preprocessor?
|
237 | printf("\n");
|
238 | printf("FILE %s\n", rel_path);
|
239 | printf("\n");
|
240 |
|
241 | printf("Filtering %s\n", FILENAME) > "/dev/stderr";
|
242 | found[FILENAME] = 1 # count number of files that have matches
|
243 | }
|
244 | }
|
245 |
|
246 | /PyTypeObject.*=.*\{.*\}/ {
|
247 | if (printing != 0) {
|
248 | printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
|
249 | exit 1;
|
250 | }
|
251 | // Found it all on one line
|
252 | print
|
253 | num_one_line_types++;
|
254 | next
|
255 | }
|
256 |
|
257 | /PyTypeObject.*=.*\{/ {
|
258 | if (printing != 0) {
|
259 | printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
|
260 | exit 1;
|
261 | }
|
262 | printing = 1;
|
263 | line_begin = FNR;
|
264 |
|
265 | maybe_print_file_header()
|
266 | num_types++;
|
267 | }
|
268 |
|
269 | {
|
270 | if (printing) {
|
271 | print
|
272 | }
|
273 | }
|
274 |
|
275 | /^[:space:]*\}/ {
|
276 | if (printing) {
|
277 | # Print the edit list for #ifdef #endif.
|
278 | line_end = FNR;
|
279 | printf("%s %s %d %d\n", rel_path, def_name, line_begin, line_end) > edit_list;
|
280 | printing = 0;
|
281 | }
|
282 | }
|
283 |
|
284 | END {
|
285 | for (name in found) {
|
286 | num_found++;
|
287 | }
|
288 | printf("extract-types.awk: Found %d definitions in %d files (of %d files)\n",
|
289 | num_types, num_found, ARGC) > "/dev/stderr";
|
290 | printf("extract-types.awk: Also found %d types on one line\n",
|
291 | num_one_line_types) > "/dev/stderr";
|
292 | }
|
293 | ' "$@"
|
294 | }
|
295 |
|
296 | extract-all-types() {
|
297 | find "$TARBALL_ROOT" -type f -a -name '*.c' \
|
298 | | xargs -- $0 extract-types "$TARBALL_ROOT/"
|
299 | }
|
300 |
|
301 | #
|
302 | # Analysis
|
303 | #
|
304 |
|
305 | readonly METRICS_DIR=_tmp/metrics/cpython-defs
|
306 |
|
307 | # Show current Oil definitions literally.
|
308 | show-oil() {
|
309 | find build/oil-defs -name '*.def' | xargs cat | less
|
310 | }
|
311 |
|
312 | # Show in a contenses format.
|
313 | methods-audit() {
|
314 | mkdir -p $METRICS_DIR
|
315 | cat $BASE_DIR/preprocessed.txt | cpython-defs audit $PY_NAMES \
|
316 | | tee _tmp/methods.txt
|
317 |
|
318 | wc -l _tmp/methods.txt
|
319 | }
|
320 |
|
321 | methods-tsv() {
|
322 | mkdir -p $METRICS_DIR
|
323 | local out=$METRICS_DIR/methods.tsv
|
324 | cat $BASE_DIR/preprocessed.txt | cpython-defs tsv $PY_NAMES | tee $out
|
325 | }
|
326 |
|
327 | _report() {
|
328 | metrics/cpython-defs.R "$@"
|
329 | }
|
330 |
|
331 | report() {
|
332 | _report metrics $METRICS_DIR
|
333 | }
|
334 |
|
335 | run-for-release() {
|
336 | # Repeats what we did at the beginning of the release process, because _tmp/
|
337 | # was deleted
|
338 | oil-py-names
|
339 | filter-methods
|
340 |
|
341 | methods-tsv
|
342 | report | tee $METRICS_DIR/overview.txt
|
343 | }
|
344 |
|
345 | unfiltered() {
|
346 | cpython-defs filtered | sort > _tmp/left.txt
|
347 | awk '{print $1}' $BASE_DIR/edit-list.txt \
|
348 | | egrep -o '[^/]+$' \
|
349 | | sort | uniq > _tmp/right.txt
|
350 | diff -u _tmp/{left,right}.txt
|
351 | }
|
352 |
|
353 |
|
354 | "$@"
|