| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Usage:
|
| 4 | # demo/survey-case-fold.sh <function name>
|
| 5 |
|
| 6 | # https://www.gnu.org/software/libc/manual/html_node/Locale-Names.html
|
| 7 | # de_DE.UTF-8
|
| 8 |
|
| 9 | show() {
|
| 10 | locale -a
|
| 11 | echo
|
| 12 |
|
| 13 | # https://serverfault.com/questions/54591/how-to-install-change-locale-on-debian
|
| 14 |
|
| 15 | cat /usr/share/i18n/SUPPORTED
|
| 16 | echo
|
| 17 |
|
| 18 | # German or Turkish
|
| 19 | cat /usr/share/i18n/SUPPORTED | egrep 'de_DE|tr_TR'
|
| 20 | }
|
| 21 |
|
| 22 | install() {
|
| 23 | # Otherwise I don't have Turkish and German
|
| 24 | sudo apt-get install locales
|
| 25 | }
|
| 26 |
|
| 27 | config() {
|
| 28 | # This is a GUI, which needs GUI
|
| 29 | #sudo dpkg-reconfigure locales
|
| 30 |
|
| 31 | # Uncomment en_US.UTF-8 for inclusion in generation
|
| 32 | sudo sed -i 's/^# *\(de_DE.UTF-8\)/\1/' /etc/locale.gen
|
| 33 | sudo sed -i 's/^# *\(tr_TR.UTF-8\)/\1/' /etc/locale.gen
|
| 34 |
|
| 35 | sudo locale-gen
|
| 36 |
|
| 37 | # Output
|
| 38 | # Generating locales (this might take a while)...
|
| 39 | # de_DE.UTF-8... done
|
| 40 | # en_US.UTF-8... done
|
| 41 | # tr_TR.UTF-8... done
|
| 42 | # Generation complete.
|
| 43 | }
|
| 44 |
|
| 45 | spec-tests() {
|
| 46 | test/spec.sh var-op-bash
|
| 47 | test/spec.sh ysh-func-builtin
|
| 48 | }
|
| 49 |
|
| 50 | # locale dependent
|
| 51 | # https://stackoverflow.com/questions/30326167/getting-the-upper-or-lower-case-of-a-unicode-code-point-as-uint32-t
|
| 52 |
|
| 53 | # Two issues
|
| 54 | #
|
| 55 | # - Does case folding depend on locale?
|
| 56 | # - No: Python
|
| 57 | # - Is it a global variable (bash) or a parameter (JavaScript)?
|
| 58 | #
|
| 59 | # - Does case folding take into account MULTIPLE code points? Not multiple
|
| 60 | # bytes
|
| 61 | # - No: bash, Python 2
|
| 62 | # - Yes: Python 3, node.js
|
| 63 |
|
| 64 |
|
| 65 | test-langs() {
|
| 66 |
|
| 67 | # OK this works
|
| 68 | export LANG=tr_TR.UTF-8
|
| 69 |
|
| 70 | #export LANG=de_DE.UTF-8
|
| 71 |
|
| 72 | bash << 'EOF'
|
| 73 | echo shell
|
| 74 | german=$'\u00DF'
|
| 75 | turkish='i'
|
| 76 | for small in $german $turkish; do
|
| 77 | echo u ${small^}
|
| 78 | echo U ${small^^}
|
| 79 |
|
| 80 | echo l ${small,}
|
| 81 | echo L ${small,,}
|
| 82 |
|
| 83 | echo
|
| 84 | done
|
| 85 |
|
| 86 | EOF
|
| 87 | echo
|
| 88 |
|
| 89 | echo python3
|
| 90 | python3 -c '
|
| 91 | import sys
|
| 92 |
|
| 93 | # Python case folding is NOT locale sensitive!
|
| 94 | #
|
| 95 | # https://stackoverflow.com/questions/19030948/python-utf-8-lowercase-turkish-specific-letter
|
| 96 |
|
| 97 | import locale
|
| 98 | #locale.setlocale(locale.LC_ALL, "tr_TR")
|
| 99 | # Does not work?
|
| 100 | #locale.setlocale(locale.LC_ALL, "tr_TR.UTF-8")
|
| 101 | locale.setlocale(locale.LC_ALL, "tr_TR.utf8")
|
| 102 |
|
| 103 | #print(sys.getdefaultencoding())
|
| 104 |
|
| 105 | for small in [u"\u00DF", "i"]:
|
| 106 | sys.stdout.buffer.write(small.upper().encode("utf-8") + b"\n")
|
| 107 | sys.stdout.buffer.write(small.lower().encode("utf-8") + b"\n")
|
| 108 |
|
| 109 | print()
|
| 110 | big ="SS"
|
| 111 | sys.stdout.buffer.write(big.upper().encode("utf-8") + b"\n")
|
| 112 | sys.stdout.buffer.write(big.lower().encode("utf-8") + b"\n")
|
| 113 |
|
| 114 | '
|
| 115 | echo
|
| 116 |
|
| 117 | echo node.js
|
| 118 |
|
| 119 | nodejs -e '
|
| 120 | for (small of ["\u00DF", "i", "SS"]) {
|
| 121 | console.log("no locale")
|
| 122 | console.log(small.toUpperCase())
|
| 123 | console.log(small.toLowerCase())
|
| 124 | console.log("")
|
| 125 |
|
| 126 | console.log("turkish")
|
| 127 | console.log(small.toLocaleUpperCase("tr"))
|
| 128 | console.log(small.toLocaleLowerCase("tr"))
|
| 129 | console.log("")
|
| 130 |
|
| 131 | console.log("german")
|
| 132 | console.log(small.toLocaleUpperCase("de"))
|
| 133 | console.log(small.toLocaleLowerCase("de"))
|
| 134 | console.log("")
|
| 135 | }
|
| 136 | console.log("")
|
| 137 | '
|
| 138 | }
|
| 139 |
|
| 140 | "$@"
|
| 141 |
|
| 142 |
|