devtools/refactor.sh

OILS / devtools / refactor.sh View on Github | oilshell.org

279 lines, 165 significant

1	#!/usr/bin/env bash
2	#
3	# Junk drawer for refactoring. Also see test/lint.sh
4	#
5	# Usage:
6	# devtools/refactor.sh <function name>
7
8	set -o nounset
9	set -o pipefail
10	set -o errexit
11
12	source devtools/task-five.sh # run-task
13
14	change-kind() {
15	local kind=$1
16	local kind2=${2:-$kind}
17
18	# First make it all lower case
19	sed -r -i "s/${kind}_([A-Z]+)/Id.${kind2}_\\L\\1/g" /.py
20
21	# Now make the first one upper case
22	sed -r -i "s/${kind}_([a-zA-Z]+)/${kind2}_\\u\\1/g" /.py
23	}
24
25	k2() {
26	local kind=$1
27	local lower=${kind,,}
28
29	local title=${lower^}
30	local replace=${2:-$title}
31
32	sed -r -i "s/Id.${kind}_/Id.${replace}_/g" /.py
33	sed -r -i "s/TokenKind.${kind}/TokenKind.${replace}/g" /.py
34	}
35
36	# Execute a bunch of find/replace pairs in a text file.
37	replace() {
38	local file=$1
39	local include_asdl=${2:-}
40
41	# NOTE: Escaping here is messed up. sed doesn't have --name like awk?
42	# To match literal parentheses I had to double-escape like this
43	# (shell-escape, then sed-escape).
44	# MakeMatcher\$\$ MATCHER
45
46	local -a files=( /.py )
47	if test -n "$include_asdl"; then
48	files+=( /.asdl )
49	fi
50
51	while read pat replace; do
52	sed -r -i "s/${pat}/${replace}/g" "${files[@]}"
53
54	# word-anchored version
55	#sed -r -i "s/\b${pat}\b/${replace}/g" "${files[@]}"
56	done < $file
57	}
58
59	replace2() {
60	#sed -r -i "s/^from osh import parse_lib/from frontend import parse_lib/g" /.py
61	#sed -r -i "s/^from core import libstr/from osh import string_ops/g" /.py
62	#sed -r -i "s/^from osh import word$/from osh import word_/g" /.py
63	#sed -r -i 's/from _devbuild.gen.syntax_asdl import word as osh_word/from _devbuild.gen.syntax_asdl import word/g' /.py
64	#sed -r -i 's/osh_word/word/g' /.py
65
66	if test -n ''; then
67	sed -r -i 's/bool_expr.BoolUnary/bool_expr.Unary/g' /.py
68	sed -r -i 's/bool_expr.BoolBinary/bool_expr.Binary/g' /.py
69	sed -r -i 's/bool_expr_e.BoolUnary/bool_expr_e.Unary/g' /.py
70	sed -r -i 's/bool_expr_e.BoolBinary/bool_expr_e.Binary/g' /.py
71	sed -r -i 's/bool_expr__BoolUnary/bool_expr__Unary/g' /.py
72	sed -r -i 's/bool_expr__BoolBinary/bool_expr__Binary/g' /.py
73	fi
74
75	sed -r -i 's/command.SimpleCommand/command.Simple/g' /.py
76	sed -r -i 's/command_e.SimpleCommand/command_e.Simple/g' /.py
77	sed -r -i 's/command__SimpleCommand/command__Simple/g' /.py
78	}
79
80	trailing-ws() {
81	sed -r -i 's/[ ]+$//g' "$@"
82	}
83
84	#
85	# OLD STUFF
86	#
87
88	# Hm all of the solutions involve grep --perl or perl itself?
89	# https://stackoverflow.com/questions/3001177/how-do-i-grep-for-all-non-ascii-characters-in-unix
90
91	# Found a latin-1 character in Python-2.7.13/Lib/heapq.py. Had to add LC_ALL=C.
92	grep-unicode() {
93	LC_ALL=C grep --color='auto' --perl -n '[^\x00-\x7F]' "$@"
94	}
95
96	find-old-asdl() {
97	egrep 'import.\bruntime\b' /*.py \|\| true
98	echo ---
99
100	# Only tests left
101	egrep 'import.\bast\b' /*.py \|\| true
102	}
103
104	# This should be cleaned up
105	grep-span-funcs() {
106	grep MostSpan {osh,core,frontend}/*.py
107	}
108
109	cmd-val() {
110	local file=$1
111	sed -i 's/arg_vec.strs/cmd_val.argv/g' $file
112	sed -i 's/arg_vec.spids/cmd_val.arg_spids/g' $file
113	sed -i 's/arg_vector/cmd_value__Argv/g' $file
114	sed -i 's/arg_vec/cmd_val/g' $file
115	}
116
117	id-kind() {
118	sed --regexp-extended -i 's/import id_kind$/import id_kind_def/' /.py
119	sed --regexp-extended -i 's/id_kind[.]/id_kind_def./g' /.py
120	}
121
122	options() {
123	#sed -i 's/simple_echo/simple_echo/g' /.{py,md,sh}
124
125	#sed -i 's/simple_eval_builtin/simple_eval_builtin/g' /.{py,md,sh}
126	#sed -i 's/simple_trap/simple_trap/g' /.{py,md,sh}
127
128	sed -i 's/parse_backslash/parse_backslash/g' /.{py,md,sh}
129	sed -i 's/parse_backticks/parse_backticks/g' /.{py,md,sh}
130	sed -i 's/parse_dollar/parse_dollar/g' /.{py,md,sh}
131	}
132
133	rename() {
134	cat cpp/cc.txt \| while read name; do
135	local base=$(basename $name .cc)
136	local new=leaky_${base%_leaky}.cc
137	echo $name $new
138
139	#sed -i "s/$name/$new/g" /.{py,sh} \|\| true
140	git mv cpp/$name cpp/$new
141	done
142	return
143
144	cat cpp/h.txt \| while read name; do
145	local base=$(basename $name .h)
146	local new=leaky_${base%_leaky}.h
147	echo $name $new
148
149	#sed -i "s/$name/$new/g" /.{sh,py,cc,h}
150	git mv cpp/$name cpp/$new
151	done
152	return
153	}
154
155	revert() {
156	cat cpp/cc.txt \| while read name; do
157	local new=$(basename $name .cc)_leaky.cc
158	echo $name $new
159
160	mv cpp/$new cpp/$name
161	done
162	}
163
164	#
165	# Things we want to get rid of
166	#
167
168	show-usages() {
169	local out=$1
170	shift
171	"$@" \| grep -v _test.py \| tee $out
172	echo
173	wc -l $out
174	}
175
176	# 2024-02: 36 usages. Maybe use mylib.ToMachineInt() or mylib.ToInt32(). exit
177	# status is a machine int?
178	# Sometimes the lexer will validate, as with converting 2>&1
179
180	int-convert() {
181	show-usages _tmp/int-convert \
182	egrep -n '\bint\(\b' $(metrics/source-code.sh osh-files)
183	}
184
185	# TokenVal() is generally bad in evaluators. However most of these are in
186	# error paths, which is OK.
187	#
188	# 2024-02: 11 instances
189	TokenVal-eval() {
190	show-usages _tmp/TokenVal-eval \
191	grep -n -w TokenVal /_eval.py
192	}
193
194	TokenVal-all() {
195	show-usages _tmp/TokenVal-all \
196	grep -n -w TokenVal /.py
197	}
198
199	# 2024-02: 71 left, mostly in ysh_ify which is the only thing that uses it
200	spid-all() {
201	show-usages _tmp/spid-all \
202	egrep -n 'span_id\|spid' /.py
203	}
204
205	# 2024-04: 4 left in ysh_ify
206	spid-sig() {
207	show-usages _tmp/spid-sig \
208	egrep -n 'def.(span_id\|spid)' /*.py
209	}
210
211	# 2024-04: 12 usages, mostly ysh_ify
212	no-spid() {
213	show-usages _tmp/no-spid \
214	egrep -n 'runtime.NO_SPID' /.py
215	}
216
217	# 69 instances
218	loc-word() {
219	# should NOT wrap CompoundWord
220	show-usages _tmp/loc-word \
221	fgrep -n 'loc.Word(' /.py
222	}
223
224	# 2023-08: 155
225	loc-missing() {
226	show-usages _tmp/loc-m \
227	egrep -n 'loc.Missing' /.py
228	}
229
230	mylib-python() {
231	show-usages _tmp/py \
232	egrep -n 'mylib.PYTHON' /.py
233	}
234
235	asdl-create() {
236	fgrep -n 'CreateNull(alloc' /.py //*.py \
237	\| egrep -v '_devbuild\|_test.py' \| tee _tmp/asdl
238	}
239
240	readline() {
241	metrics/source-code.sh oils-files \| xargs fgrep -n 'readline('
242	}
243
244	#
245	# To improve code formatting
246	#
247
248	long-sigs() {
249	# 32 of these
250	egrep --no-filename '^[ ]# type' /*.py \
251	\| awk 'length($0) >= 80 { print length($0) $0 }' \
252	\| sort -n
253	}
254
255	long-sigs-where() {
256	# jump to the file
257	egrep -n '^[ ]# type' /*.py \
258	\| awk 'length($0) >= 110 { print }' \| tee _tmp/long
259	}
260
261	#
262	# Refactor tests
263	#
264
265	print-names() {
266	egrep -o '[a-zA-Z_-]+'
267	}
268
269	make-sed() {
270	awk '{ print "s/" $0 "/unquoted-" $0 "/g;" }'
271	}
272
273	test-files() {
274	cat _tmp/r \| print-names \| make-sed \| tee _tmp/sedr
275
276	sed -i -f _tmp/sedr test/runtime-errors.sh
277	}
278
279	task-five "$@"