| 1 | #!/usr/bin/env bash
 | 
| 2 | #
 | 
| 3 | # Usage:
 | 
| 4 | #   build/cpython-defs.sh <function name>
 | 
| 5 | #
 | 
| 6 | # Example:
 | 
| 7 | #
 | 
| 8 | #   # make clean tree of .c files
 | 
| 9 | #   devtools/release.sh quick-oil-tarball
 | 
| 10 | #   devtools/release.sh test-oil-tar  # can Ctrl-C this
 | 
| 11 | #
 | 
| 12 | #   build/cpython-defs.sh oil-py-names  # extract names
 | 
| 13 | #   build/cpython-defs.sh filter-methods
 | 
| 14 | #
 | 
| 15 | # NOTE: 'build/ovm-compile.sh make-tar' is complex, so it's easier to just extract
 | 
| 16 | # the tarball, even though it leads to a weird dependency.
 | 
| 17 | 
 | 
| 18 | set -o nounset
 | 
| 19 | set -o pipefail
 | 
| 20 | set -o errexit
 | 
| 21 | 
 | 
| 22 | REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
 | 
| 23 | readonly REPO_ROOT
 | 
| 24 | 
 | 
| 25 | source build/common.sh  # $PY27
 | 
| 26 | source build/dev-shell.sh  # R_LIBS_USER
 | 
| 27 | 
 | 
| 28 | readonly BASE_DIR=_tmp/cpython-defs
 | 
| 29 | 
 | 
| 30 | # Could be published in metrics?
 | 
| 31 | readonly PY_NAMES=_tmp/oil-py-names.txt
 | 
| 32 | 
 | 
| 33 | # Print the .py files in the tarball in their original locations.  For slimming
 | 
| 34 | # down the build.  Similar to build/metrics.sh linecounts-pydeps.
 | 
| 35 | # Hm that doesn't seem to duplicate posixpath while this does?
 | 
| 36 | oil-py-deps() {
 | 
| 37 |   cat _build/oil/opy-app-deps.txt | awk ' $1 ~ /\.py$/ { print $1 }'
 | 
| 38 | }
 | 
| 39 | 
 | 
| 40 | oil-py-names() {
 | 
| 41 |   time oil-py-deps | xargs bin/opyc lex-names | sort | uniq > $PY_NAMES
 | 
| 42 | 
 | 
| 43 |   wc -l $PY_NAMES
 | 
| 44 | }
 | 
| 45 | 
 | 
| 46 | # NOTE: We can replace os with posix.  Will save 700 lines of code, 25K + 25K.
 | 
| 47 | # os.getenv() is a trivial wrapper around os.environ.get().  It gets
 | 
| 48 | # initialized in posixmodule.c.
 | 
| 49 | os-module-deps() {
 | 
| 50 |   #oil-py-deps | xargs egrep --no-filename -o '\bos\.[a-z]+' */*.py | sort | uniq -c |sort -n
 | 
| 51 |   oil-py-deps | xargs egrep -l '\bos\.'
 | 
| 52 | }
 | 
| 53 | 
 | 
| 54 | # TODO:
 | 
| 55 | # Write to a separate file like _build/pydefs/intobject.include
 | 
| 56 | # #ifdef OVM_MAIN
 | 
| 57 | # #include "intobject.include"
 | 
| 58 | # #else
 | 
| 59 | # ...
 | 
| 60 | # #end
 | 
| 61 | #
 | 
| 62 | # Should those files be checked in an edited by hand?  Or join them somehow
 | 
| 63 | # with oil-symbols.txt?
 | 
| 64 | # I think this is hard because of METHODS.
 | 
| 65 | # Maybe you should have a config file that controls it.  It takes a .include
 | 
| 66 | # file and then whitelist/blacklist, and then generates a new one.
 | 
| 67 | # could put it in build/pydefs-config.txt
 | 
| 68 | #
 | 
| 69 | # And then reprint the PyMethoDef without docstrings?  It shouldn't be that
 | 
| 70 | # hard to parse.  You can almost do it with a regex, since commas don't appear
 | 
| 71 | # in the string.
 | 
| 72 | 
 | 
| 73 | extract-methods() {
 | 
| 74 |   local path_prefix=$1  # to strip
 | 
| 75 |   shift
 | 
| 76 | 
 | 
| 77 |   local edit_list=$BASE_DIR/method-edit-list.txt
 | 
| 78 | 
 | 
| 79 |   # NOTE: PyMemberDef is also interesting, but we don't need it for the build.
 | 
| 80 |   gawk -v path_prefix_length=${#path_prefix} -v edit_list=$edit_list '
 | 
| 81 |   /static.*PyMethodDef/ {
 | 
| 82 |     if (printing != 0) {
 | 
| 83 |       printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
 | 
| 84 |       exit 1;
 | 
| 85 |     }
 | 
| 86 |     # NOTE: We had to adjust stringobject.c and _weakref.c so that the name is
 | 
| 87 |     # on one line!  Not a big deal.
 | 
| 88 |     if (match($0, /static.*PyMethodDef ([a-zA-Z0-9_]+)\[\]/, m)) {
 | 
| 89 |       def_name = m[1];
 | 
| 90 |     } else {
 | 
| 91 |       printf("%s:%d Could not parse declaration name\n",
 | 
| 92 |              FILENAME, FNR) > "/dev/stderr";
 | 
| 93 |       exit 1;
 | 
| 94 |     }
 | 
| 95 |     printing = 1;
 | 
| 96 |     line_begin = FNR;
 | 
| 97 | 
 | 
| 98 |     rel_path = substr(FILENAME, path_prefix_length + 1);
 | 
| 99 |     if (!found[FILENAME]) {
 | 
| 100 |       # This special line seems to survive the preprocessor?
 | 
| 101 |       printf("\n");
 | 
| 102 |       printf("FILE %s\n", rel_path);
 | 
| 103 |       printf("\n");
 | 
| 104 | 
 | 
| 105 |       printf("Filtering %s\n", FILENAME) > "/dev/stderr";
 | 
| 106 |       found[FILENAME] = 1  # count number of files that have matches
 | 
| 107 |     }
 | 
| 108 |   }
 | 
| 109 | 
 | 
| 110 |   printing { print }
 | 
| 111 | 
 | 
| 112 |   # Looking for closing brace (with leading space)
 | 
| 113 | 
 | 
| 114 |   /^[:space:]*\}/ && printing {
 | 
| 115 |     # Print the edit list for #ifdef #endif.
 | 
| 116 |     line_end = FNR;
 | 
| 117 |     printf("%s %s %d %d\n", rel_path, def_name, line_begin, line_end) > edit_list;
 | 
| 118 |     printing = 0;
 | 
| 119 |   }
 | 
| 120 | 
 | 
| 121 |   END {
 | 
| 122 |     for (name in found) {
 | 
| 123 |       num_found++;
 | 
| 124 |     }
 | 
| 125 |     printf("extract-methods.awk: Found definitions in %d out of %d files\n",
 | 
| 126 |            num_found, ARGC) > "/dev/stderr";
 | 
| 127 |   }
 | 
| 128 |   ' "$@"
 | 
| 129 | }
 | 
| 130 | 
 | 
| 131 | preprocess() {
 | 
| 132 |   # TODO: Use PREPROC_FLAGS from build/ovm-compile.sh.
 | 
| 133 |   # - What about stuff in pyconfig.h?
 | 
| 134 |   # - Hack to define WTERMSIG!  We really need to include <sys/wait.h>, but
 | 
| 135 |   # that causes parse errors in cpython_defs.py.  Really we should get rid of
 | 
| 136 |   # this whole hack!
 | 
| 137 |   # - WIFSTOPPED is another likely thing...
 | 
| 138 |   gcc -I $PY27 -E -D OVM_MAIN -D WTERMSIG -
 | 
| 139 | }
 | 
| 140 | 
 | 
| 141 | readonly TARBALL_ROOT=$(echo _tmp/oil-tar-test/oil-*)
 | 
| 142 | 
 | 
| 143 | extract-all-methods() {
 | 
| 144 |   echo '#include "pyconfig.h"'
 | 
| 145 |   # 52 different instances.  Sometimes multiple ones per file.
 | 
| 146 |   find "$TARBALL_ROOT" -type f -a -name '*.c' \
 | 
| 147 |     | xargs -- $0 extract-methods "$TARBALL_ROOT/"
 | 
| 148 | }
 | 
| 149 | 
 | 
| 150 | cpython-defs() {
 | 
| 151 |   # Annoying: this depends on Oils for 'R' and 'C', then indirectly imports on
 | 
| 152 |   # 'typing' module.
 | 
| 153 |   PYTHONPATH='.:vendor' build/cpython_defs.py "$@"
 | 
| 154 | }
 | 
| 155 | 
 | 
| 156 | filter-methods() {
 | 
| 157 |   local tmp=$BASE_DIR
 | 
| 158 |   mkdir -p $tmp
 | 
| 159 | 
 | 
| 160 |   extract-all-methods > $tmp/extracted.txt
 | 
| 161 |   cat $tmp/extracted.txt | preprocess > $tmp/preprocessed.txt
 | 
| 162 | 
 | 
| 163 |   local out_dir=build/oil-defs
 | 
| 164 |   mkdir -p $out_dir
 | 
| 165 | 
 | 
| 166 |   #head -n 30 $tmp
 | 
| 167 |   cat $tmp/preprocessed.txt | cpython-defs filter $PY_NAMES $out_dir
 | 
| 168 | 
 | 
| 169 |   echo
 | 
| 170 |   find $out_dir -name '*.def' | xargs wc -l | sort -n
 | 
| 171 | 
 | 
| 172 |   echo
 | 
| 173 |   wc -l $tmp/*.txt
 | 
| 174 | 
 | 
| 175 |   # syntax check
 | 
| 176 |   #cc _tmp/filtered.c
 | 
| 177 | }
 | 
| 178 | 
 | 
| 179 | edit-file() {
 | 
| 180 |   local rel_path=$1
 | 
| 181 |   local def_name=$2
 | 
| 182 |   local line_begin=$3
 | 
| 183 |   local line_end=$4
 | 
| 184 | 
 | 
| 185 |   local def_path="${rel_path}/${def_name}.def"
 | 
| 186 | 
 | 
| 187 |   local tmp=_tmp/buf.txt
 | 
| 188 | 
 | 
| 189 |   # DESTRUCTIVE
 | 
| 190 |   mv $rel_path $tmp
 | 
| 191 | 
 | 
| 192 |   gawk -v def_path=$def_path -v line_begin=$line_begin -v line_end=$line_end '
 | 
| 193 |   NR == line_begin {
 | 
| 194 |     print("#ifdef OVM_MAIN")
 | 
| 195 |     printf("#include \"%s\"\n", def_path)
 | 
| 196 |     print("#else")
 | 
| 197 |     print  # print the PyMethodDef line {
 | 
| 198 |     next
 | 
| 199 |   }
 | 
| 200 |   NR == line_end {
 | 
| 201 |     print  # print the }
 | 
| 202 |     print("#endif"); 
 | 
| 203 |     next
 | 
| 204 |   }
 | 
| 205 |   # All other lines just get printed
 | 
| 206 |   {
 | 
| 207 |     print
 | 
| 208 |   }
 | 
| 209 |   ' $tmp > $rel_path
 | 
| 210 | 
 | 
| 211 |   echo "Wrote $rel_path"
 | 
| 212 | }
 | 
| 213 | 
 | 
| 214 | edit-all() {
 | 
| 215 |   # Reversed so that edits to the same file work!  We are always inserting
 | 
| 216 |   # lines.
 | 
| 217 |   #tac $BASE_DIR/method-edit-list.txt | xargs -n 4 -- $0 edit-file
 | 
| 218 | 
 | 
| 219 |   # One-off editing
 | 
| 220 | 	grep typeobject.c $BASE_DIR/method-edit-list.txt \
 | 
| 221 |     | tac | xargs -n 4 -- $0 edit-file
 | 
| 222 | 
 | 
| 223 | }
 | 
| 224 | 
 | 
| 225 | extract-types() {
 | 
| 226 |   local path_prefix=$1  # to strip
 | 
| 227 |   shift
 | 
| 228 | 
 | 
| 229 |   local edit_list=$BASE_DIR/type-edit-list.txt
 | 
| 230 | 
 | 
| 231 |   # NOTE: PyMemberDef is also interesting, but we don't need it for the build.
 | 
| 232 |   gawk -v path_prefix_length=${#path_prefix} -v edit_list=$edit_list '
 | 
| 233 |   function maybe_print_file_header() {
 | 
| 234 |     rel_path = substr(FILENAME, path_prefix_length + 1);
 | 
| 235 |     if (!found[FILENAME]) {
 | 
| 236 |       # This special line seems to survive the preprocessor?
 | 
| 237 |       printf("\n");
 | 
| 238 |       printf("FILE %s\n", rel_path);
 | 
| 239 |       printf("\n");
 | 
| 240 | 
 | 
| 241 |       printf("Filtering %s\n", FILENAME) > "/dev/stderr";
 | 
| 242 |       found[FILENAME] = 1  # count number of files that have matches
 | 
| 243 |     }
 | 
| 244 |   }
 | 
| 245 | 
 | 
| 246 |   /PyTypeObject.*=.*\{.*\}/ {
 | 
| 247 |     if (printing != 0) {
 | 
| 248 |       printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
 | 
| 249 |       exit 1;
 | 
| 250 |     }
 | 
| 251 |     // Found it all on one line
 | 
| 252 |     print
 | 
| 253 |     num_one_line_types++;
 | 
| 254 |     next
 | 
| 255 |   }
 | 
| 256 | 
 | 
| 257 |   /PyTypeObject.*=.*\{/ {
 | 
| 258 |     if (printing != 0) {
 | 
| 259 |       printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
 | 
| 260 |       exit 1;
 | 
| 261 |     }
 | 
| 262 |     printing = 1;
 | 
| 263 |     line_begin = FNR;
 | 
| 264 | 
 | 
| 265 |     maybe_print_file_header()
 | 
| 266 |     num_types++;
 | 
| 267 |   }
 | 
| 268 | 
 | 
| 269 |   {
 | 
| 270 |     if (printing) {
 | 
| 271 |       print
 | 
| 272 |     }
 | 
| 273 |   }
 | 
| 274 | 
 | 
| 275 |   /^[:space:]*\}/ {
 | 
| 276 |     if (printing) {
 | 
| 277 |       # Print the edit list for #ifdef #endif.
 | 
| 278 |       line_end = FNR;
 | 
| 279 |       printf("%s %s %d %d\n", rel_path, def_name, line_begin, line_end) > edit_list;
 | 
| 280 |       printing = 0;
 | 
| 281 |     }
 | 
| 282 |   }
 | 
| 283 | 
 | 
| 284 |   END {
 | 
| 285 |     for (name in found) {
 | 
| 286 |       num_found++;
 | 
| 287 |     }
 | 
| 288 |     printf("extract-types.awk: Found %d definitions in %d files (of %d files)\n",
 | 
| 289 |            num_types, num_found, ARGC) > "/dev/stderr";
 | 
| 290 |     printf("extract-types.awk: Also found %d types on one line\n",
 | 
| 291 |            num_one_line_types) > "/dev/stderr";
 | 
| 292 |   }
 | 
| 293 |   ' "$@"
 | 
| 294 | }
 | 
| 295 | 
 | 
| 296 | extract-all-types() {
 | 
| 297 |   find "$TARBALL_ROOT" -type f -a -name '*.c' \
 | 
| 298 |     | xargs -- $0 extract-types "$TARBALL_ROOT/"
 | 
| 299 | }
 | 
| 300 | 
 | 
| 301 | #
 | 
| 302 | # Analysis
 | 
| 303 | #
 | 
| 304 | 
 | 
| 305 | readonly METRICS_DIR=_tmp/metrics/cpython-defs
 | 
| 306 | 
 | 
| 307 | # Show current Oil definitions literally.
 | 
| 308 | show-oil() {
 | 
| 309 |   find build/oil-defs -name '*.def' | xargs cat | less
 | 
| 310 | }
 | 
| 311 | 
 | 
| 312 | # Show in a contenses format.
 | 
| 313 | methods-audit() {
 | 
| 314 |   mkdir -p $METRICS_DIR
 | 
| 315 |   cat $BASE_DIR/preprocessed.txt | cpython-defs audit $PY_NAMES \
 | 
| 316 |     | tee _tmp/methods.txt
 | 
| 317 | 
 | 
| 318 |   wc -l _tmp/methods.txt
 | 
| 319 | }
 | 
| 320 | 
 | 
| 321 | methods-tsv() {
 | 
| 322 |   mkdir -p $METRICS_DIR
 | 
| 323 |   local out=$METRICS_DIR/methods.tsv
 | 
| 324 |   cat $BASE_DIR/preprocessed.txt | cpython-defs tsv $PY_NAMES | tee $out
 | 
| 325 | }
 | 
| 326 | 
 | 
| 327 | _report() {
 | 
| 328 |   metrics/cpython-defs.R "$@"
 | 
| 329 | }
 | 
| 330 | 
 | 
| 331 | report() {
 | 
| 332 |   _report metrics $METRICS_DIR
 | 
| 333 | }
 | 
| 334 | 
 | 
| 335 | run-for-release() {
 | 
| 336 |   # Repeats what we did at the beginning of the release process, because _tmp/
 | 
| 337 |   # was deleted
 | 
| 338 |   oil-py-names
 | 
| 339 |   filter-methods
 | 
| 340 | 
 | 
| 341 |   methods-tsv
 | 
| 342 |   report | tee $METRICS_DIR/overview.txt
 | 
| 343 | }
 | 
| 344 | 
 | 
| 345 | unfiltered() {
 | 
| 346 |   cpython-defs filtered | sort > _tmp/left.txt
 | 
| 347 |   awk '{print $1}' $BASE_DIR/edit-list.txt \
 | 
| 348 |     | egrep -o '[^/]+$' \
 | 
| 349 |     | sort | uniq > _tmp/right.txt
 | 
| 350 |   diff -u _tmp/{left,right}.txt
 | 
| 351 | }
 | 
| 352 | 
 | 
| 353 | 
 | 
| 354 | "$@"
 |