| 1 | #!/usr/bin/env bash
|
| 2 |
|
| 3 | set -o noglob # for unquoted $text splitting
|
| 4 |
|
| 5 | tokenize() {
|
| 6 | # read it once
|
| 7 | read -r -d '' text
|
| 8 |
|
| 9 | for word in $text; do # relies on word splitting
|
| 10 | echo "$word"
|
| 11 | done
|
| 12 | }
|
| 13 |
|
| 14 | main() {
|
| 15 | iters=${1:-100}
|
| 16 |
|
| 17 | # read it once
|
| 18 | read -r -d '' text
|
| 19 |
|
| 20 | declare -A words
|
| 21 |
|
| 22 | # do it a bunch of times
|
| 23 | for (( i = 0; i < iters; ++i )); do
|
| 24 |
|
| 25 | # Relies on unquoted IFS splitting. Difference with Python: Python will
|
| 26 | # give you \, but IFS splitting won't.
|
| 27 | for word in $text; do
|
| 28 |
|
| 29 | # Hm this isn't correct in bash!
|
| 30 | old=${words["$word"]}
|
| 31 | words["$word"]=$((old + 1))
|
| 32 |
|
| 33 | # BUG in bash, see spec/assoc case #37
|
| 34 | #(( words["$word"] += 1 ))
|
| 35 | #(( words[\$word] += 1 ))
|
| 36 | done
|
| 37 | done
|
| 38 |
|
| 39 | # note: we can sort the output in the benchmark and assert that it's the same?
|
| 40 |
|
| 41 | for word in "${!words[@]}"; do
|
| 42 | echo "${words["$word"]} $word"
|
| 43 | done
|
| 44 | }
|
| 45 |
|
| 46 | main "$@"
|
| 47 | #tokenize "$@"
|