1 | #!/usr/bin/env bash
|
2 |
|
3 | set -o noglob # for unquoted $text splitting
|
4 |
|
5 | tokenize() {
|
6 | # read it once
|
7 | read -r -d '' text
|
8 |
|
9 | for word in $text; do # relies on word splitting
|
10 | echo "$word"
|
11 | done
|
12 | }
|
13 |
|
14 | main() {
|
15 | iters=${1:-100}
|
16 |
|
17 | # read it once
|
18 | read -r -d '' text
|
19 |
|
20 | declare -A words
|
21 |
|
22 | # do it a bunch of times
|
23 | for (( i = 0; i < iters; ++i )); do
|
24 |
|
25 | # Relies on unquoted IFS splitting. Difference with Python: Python will
|
26 | # give you \, but IFS splitting won't.
|
27 | for word in $text; do
|
28 |
|
29 | # Hm this isn't correct in bash!
|
30 | old=${words["$word"]}
|
31 | words["$word"]=$((old + 1))
|
32 |
|
33 | # BUG in bash, see spec/assoc case #37
|
34 | #(( words["$word"] += 1 ))
|
35 | #(( words[\$word] += 1 ))
|
36 | done
|
37 | done
|
38 |
|
39 | # note: we can sort the output in the benchmark and assert that it's the same?
|
40 |
|
41 | for word in "${!words[@]}"; do
|
42 | echo "${words["$word"]} $word"
|
43 | done
|
44 | }
|
45 |
|
46 | main "$@"
|
47 | #tokenize "$@"
|