demo/04-unicode.sh

OILS / demo / 04-unicode.sh View on Github | oilshell.org

107 lines, 41 significant

1	#!/usr/bin/env bash
2	#
3	# Usage:
4	# demo/04-unicode.sh <function name>
5	#
6	# TODO: Test what happens if you read binary data into a $(command sub)
7	# - internal NUL
8	# - invalid utf-8 sequence
9	#
10	# It would be nice to move some of this into test/gold? It depends on the
11	# locale.
12
13	set -o nounset
14	set -o pipefail
15	set -o errexit
16
17	# https://www.gnu.org/software/bash/manual/bash.html#Shell-Parameter-Expansion
18	#
19	# See doc/unicode.txt.
20
21	unicode-char() {
22	python -c 'print u"[\u03bc]".encode("utf-8")'
23	}
24
25	# http://stackoverflow.com/questions/602912/how-do-you-echo-a-4-digit-unicode-character-in-bash
26	echo-char() {
27	#echo -e "\xE2\x98\xA0"
28	echo -e "\xE2\x98\xA0"
29
30	#echo -e "\x03\xbc"
31
32	# Woah bash has this! Interesting. Not documented in "help echo" though.
33	echo -e '\u2620'
34
35	# GNU echo does not have it.
36	/bin/echo -e '\u2620'
37	}
38
39	raw-char() {
40	# Use vim to put utf-8 in this source file:
41	# 1. i to enter Insert mode
42	# 2. Ctrl-V
43	# 3. u
44	# 4. 03bc -- 4 digits of hex0
45	echo [μ]
46	}
47
48	quoted-chars() {
49	echo '[μ]'
50	echo "[μ]"
51	echo $'[\u03bc]' # C-escaped string
52
53	# Not implementing this
54	# https://www.gnu.org/software/bash/manual/html_node/Locale-Translation.html
55	echo $"hello"
56	}
57
58	test-unicode() {
59	locale # displays state
60	echo
61	echo $LANG
62
63	unicode-char
64
65	local u=$(unicode-char)
66	echo $u
67
68	# This changes bash behavior!
69
70	#LANG=C
71	echo ${#u} # three chars
72
73	# OK bash respect utf-8 when doing string slicing. Does it have its own
74	# unicode support, or does it use libc?
75	echo ${u:0} ${u:1} ${u:2}
76
77	local u=$(raw-char)
78	echo ${u:0} ${u:1} ${u:2}
79	}
80
81	json() {
82	python -c 'print "\"\u03bc\""' \| python -c '
83	import sys, json
84	print json.loads(sys.stdin.read())
85	'
86
87	# \0u000 code point seems to be representable
88	python -c 'print "\"[\u0000]\""' \| python -c '
89	import sys, json
90	print repr(json.loads(sys.stdin.read()))
91	'
92	# Works in python3 too.
93	python -c 'print "\"[\u0000]\""' \| python3 -c '
94	import sys, json
95	print(repr(json.loads(sys.stdin.read())))
96	'
97	}
98
99	# Right now it's split into (Lit_Other '\xce') and (Lit_Other '\xbc'). This is
100	# fine for most purposes, although we could probably simplify this.
101	osh-literal() {
102	bin/osh -n -c 'echo [μ]'
103	# This works fine
104	bin/osh -c 'echo [μ]'
105	}
106
107	"$@"