| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Usage:
|
| 4 | # demo/04-unicode.sh <function name>
|
| 5 | #
|
| 6 | # TODO: Test what happens if you read binary data into a $(command sub)
|
| 7 | # - internal NUL
|
| 8 | # - invalid utf-8 sequence
|
| 9 | #
|
| 10 | # It would be nice to move some of this into test/gold? It depends on the
|
| 11 | # locale.
|
| 12 |
|
| 13 | set -o nounset
|
| 14 | set -o pipefail
|
| 15 | set -o errexit
|
| 16 |
|
| 17 | # https://www.gnu.org/software/bash/manual/bash.html#Shell-Parameter-Expansion
|
| 18 | #
|
| 19 | # See doc/unicode.txt.
|
| 20 |
|
| 21 | unicode-char() {
|
| 22 | python -c 'print u"[\u03bc]".encode("utf-8")'
|
| 23 | }
|
| 24 |
|
| 25 | # http://stackoverflow.com/questions/602912/how-do-you-echo-a-4-digit-unicode-character-in-bash
|
| 26 | echo-char() {
|
| 27 | #echo -e "\xE2\x98\xA0"
|
| 28 | echo -e "\xE2\x98\xA0"
|
| 29 |
|
| 30 | #echo -e "\x03\xbc"
|
| 31 |
|
| 32 | # Woah bash has this! Interesting. Not documented in "help echo" though.
|
| 33 | echo -e '\u2620'
|
| 34 |
|
| 35 | # GNU echo does not have it.
|
| 36 | /bin/echo -e '\u2620'
|
| 37 | }
|
| 38 |
|
| 39 | raw-char() {
|
| 40 | # Use vim to put utf-8 in this source file:
|
| 41 | # 1. i to enter Insert mode
|
| 42 | # 2. Ctrl-V
|
| 43 | # 3. u
|
| 44 | # 4. 03bc -- 4 digits of hex0
|
| 45 | echo [μ]
|
| 46 | }
|
| 47 |
|
| 48 | quoted-chars() {
|
| 49 | echo '[μ]'
|
| 50 | echo "[μ]"
|
| 51 | echo $'[\u03bc]' # C-escaped string
|
| 52 |
|
| 53 | # Not implementing this
|
| 54 | # https://www.gnu.org/software/bash/manual/html_node/Locale-Translation.html
|
| 55 | echo $"hello"
|
| 56 | }
|
| 57 |
|
| 58 | test-unicode() {
|
| 59 | locale # displays state
|
| 60 | echo
|
| 61 | echo $LANG
|
| 62 |
|
| 63 | unicode-char
|
| 64 |
|
| 65 | local u=$(unicode-char)
|
| 66 | echo $u
|
| 67 |
|
| 68 | # This changes bash behavior!
|
| 69 |
|
| 70 | #LANG=C
|
| 71 | echo ${#u} # three chars
|
| 72 |
|
| 73 | # OK bash respect utf-8 when doing string slicing. Does it have its own
|
| 74 | # unicode support, or does it use libc?
|
| 75 | echo ${u:0} ${u:1} ${u:2}
|
| 76 |
|
| 77 | local u=$(raw-char)
|
| 78 | echo ${u:0} ${u:1} ${u:2}
|
| 79 | }
|
| 80 |
|
| 81 | json() {
|
| 82 | python -c 'print "\"\u03bc\""' | python -c '
|
| 83 | import sys, json
|
| 84 | print json.loads(sys.stdin.read())
|
| 85 | '
|
| 86 |
|
| 87 | # \0u000 code point seems to be representable
|
| 88 | python -c 'print "\"[\u0000]\""' | python -c '
|
| 89 | import sys, json
|
| 90 | print repr(json.loads(sys.stdin.read()))
|
| 91 | '
|
| 92 | # Works in python3 too.
|
| 93 | python -c 'print "\"[\u0000]\""' | python3 -c '
|
| 94 | import sys, json
|
| 95 | print(repr(json.loads(sys.stdin.read())))
|
| 96 | '
|
| 97 | }
|
| 98 |
|
| 99 | # Right now it's split into (Lit_Other '\xce') and (Lit_Other '\xbc'). This is
|
| 100 | # fine for most purposes, although we could probably simplify this.
|
| 101 | osh-literal() {
|
| 102 | bin/osh -n -c 'echo [μ]'
|
| 103 | # This works fine
|
| 104 | bin/osh -c 'echo [μ]'
|
| 105 | }
|
| 106 |
|
| 107 | "$@"
|