benchmarks/compute/word

47 lines, 22 significant

1	#!/usr/bin/env bash
2
3	set -o noglob # for unquoted $text splitting
4
5	tokenize() {
6	# read it once
7	read -r -d '' text
8
9	for word in $text; do # relies on word splitting
10	echo "$word"
11	done
12	}
13
14	main() {
15	iters=${1:-100}
16
17	# read it once
18	read -r -d '' text
19
20	declare -A words
21
22	# do it a bunch of times
23	for (( i = 0; i < iters; ++i )); do
24
25	# Relies on unquoted IFS splitting. Difference with Python: Python will
26	# give you \, but IFS splitting won't.
27	for word in $text; do
28
29	# Hm this isn't correct in bash!
30	old=${words["$word"]}
31	words["$word"]=$((old + 1))
32
33	# BUG in bash, see spec/assoc case #37
34	#(( words["$word"] += 1 ))
35	#(( words[\$word] += 1 ))
36	done
37	done
38
39	# note: we can sort the output in the benchmark and assert that it's the same?
40
41	for word in "${!words[@]}"; do
42	echo "${words["$word"]} $word"
43	done
44	}
45
46	main "$@"
47	#tokenize "$@"