this repo has no description
1#!/usr/bin/env python3
2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
3import unicodedata
4import unittest
5
6
7class UnicodedataTests(unittest.TestCase):
8 def test_UCD_dunder_new_raises_type_error(self):
9 with self.assertRaises(TypeError):
10 unicodedata.UCD()
11
12 def test_UCD_bidirectional_uses_old_version(self):
13 self.assertEqual(unicodedata.ucd_3_2_0.bidirectional(" "), "WS")
14 self.assertEqual(unicodedata.ucd_3_2_0.bidirectional("+"), "ET")
15 self.assertEqual(unicodedata.ucd_3_2_0.bidirectional("A"), "L")
16 self.assertEqual(unicodedata.ucd_3_2_0.bidirectional("\uFFFE"), "")
17 self.assertEqual(unicodedata.ucd_3_2_0.bidirectional("\U00020000"), "L")
18
19 def test_UCD_category_uses_old_version(self):
20 self.assertEqual(unicodedata.ucd_3_2_0.category("A"), "Lu")
21 self.assertEqual(unicodedata.ucd_3_2_0.category("a"), "Ll")
22 self.assertEqual(unicodedata.ucd_3_2_0.category("\u00A7"), "So")
23 self.assertEqual(unicodedata.ucd_3_2_0.category("\uFFFE"), "Cn")
24 self.assertEqual(unicodedata.ucd_3_2_0.category("\U0001012A"), "Cn")
25 self.assertEqual(unicodedata.ucd_3_2_0.category("\U00020000"), "Lo")
26
27 def test_UCD_decimal_uses_old_version(self):
28 self.assertEqual(unicodedata.ucd_3_2_0.decimal("5"), 5)
29 self.assertEqual(unicodedata.ucd_3_2_0.decimal("\u0E50"), 0)
30
31 # changed since 3.2.0
32 self.assertEqual(unicodedata.ucd_3_2_0.decimal("\u00B2"), 2)
33
34 self.assertRaises(TypeError, unicodedata.ucd_3_2_0.decimal)
35 self.assertRaises(TypeError, unicodedata.ucd_3_2_0.decimal, "xx")
36 self.assertRaises(ValueError, unicodedata.ucd_3_2_0.decimal, "a")
37 self.assertRaises(ValueError, unicodedata.ucd_3_2_0.decimal, "\u00BD")
38
39 def test_UCD_decomposition_uses_old_version(self):
40 self.assertEqual(unicodedata.ucd_3_2_0.decomposition("\uFFFE"), "")
41 self.assertEqual(
42 unicodedata.ucd_3_2_0.decomposition("\u00BC"), "<fraction> 0031 2044 0034"
43 )
44
45 # unassigned in 3.2.0
46 self.assertEqual(unicodedata.ucd_3_2_0.decomposition("\u0221"), "")
47
48 self.assertRaises(TypeError, unicodedata.ucd_3_2_0.decomposition)
49 self.assertRaises(TypeError, unicodedata.ucd_3_2_0.decomposition, "xx")
50
51 def test_UCD_digit_uses_old_version(self):
52 self.assertEqual(unicodedata.ucd_3_2_0.digit("2"), 2)
53 self.assertEqual(unicodedata.ucd_3_2_0.digit("\u0E50"), 0)
54 self.assertEqual(unicodedata.ucd_3_2_0.digit("\u00B2"), 2)
55
56 self.assertRaises(TypeError, unicodedata.ucd_3_2_0.digit)
57 self.assertRaises(TypeError, unicodedata.ucd_3_2_0.digit, "xx")
58 self.assertRaises(ValueError, unicodedata.ucd_3_2_0.digit, "a")
59 self.assertRaises(ValueError, unicodedata.ucd_3_2_0.digit, "\u00BD")
60
61 def test_UCD_normalize_with_non_UCD_raises_type_error(self):
62 with self.assertRaises(TypeError):
63 unicodedata.UCD.normalize(1, "NFC", "foo")
64
65 def test_UCD_normalize_with_non_str_form_raises_type_error(self):
66 with self.assertRaises(TypeError):
67 unicodedata.ucd_3_2_0.normalize(2, "foo")
68
69 def test_UCD_normalize_with_non_str_src_raises_type_error(self):
70 with self.assertRaises(TypeError):
71 unicodedata.ucd_3_2_0.normalize("foo", 2)
72
73 def test_UCD_normalize_with_empty_str_ignores_form(self):
74 self.assertEqual(unicodedata.ucd_3_2_0.normalize("invalid", ""), "")
75
76 def test_UCD_normalize_with_invalid_form_raises_value_error(self):
77 with self.assertRaises(ValueError):
78 unicodedata.ucd_3_2_0.normalize("invalid", "foo")
79
80 def test_UCD_normalize_uses_old_version(self):
81 self.assertEqual(
82 unicodedata.ucd_3_2_0.normalize(
83 "NFD", u"\U0002F868 \U0002F874 \U0002F91F \U0002F95F \U0002F9bF"
84 ),
85 u"\U0002136A \u5F33 \u43AB \u7AAE \u4D57",
86 )
87
88 def test_bidirectional_uses_current_version(self):
89 self.assertEqual(unicodedata.bidirectional(" "), "WS")
90 self.assertEqual(unicodedata.bidirectional("+"), "ES")
91 self.assertEqual(unicodedata.bidirectional("A"), "L")
92 self.assertEqual(unicodedata.bidirectional("\uFFFE"), "")
93 self.assertEqual(unicodedata.bidirectional("\U00020000"), "L")
94
95 def test_category_uses_current_version(self):
96 self.assertEqual(unicodedata.category("A"), "Lu")
97 self.assertEqual(unicodedata.category("a"), "Ll")
98 self.assertEqual(unicodedata.category("\u00A7"), "Po")
99 self.assertEqual(unicodedata.category("\uFFFE"), "Cn")
100 self.assertEqual(unicodedata.category("\U0001012A"), "No")
101 self.assertEqual(unicodedata.category("\U00020000"), "Lo")
102
103 def test_decomposition_uses_current_version(self):
104 self.assertEqual(unicodedata.decomposition("\u0221"), "")
105 self.assertEqual(unicodedata.decomposition("\uFFFE"), "")
106 self.assertEqual(
107 unicodedata.decomposition("\u00BC"), "<fraction> 0031 2044 0034"
108 )
109
110 self.assertRaises(TypeError, unicodedata.decomposition)
111 self.assertRaises(TypeError, unicodedata.decomposition, "xx")
112
113 # unassigned in 3.2.0
114
115 def test_decimal_uses_current_version(self):
116 self.assertEqual(unicodedata.decimal("2"), 2)
117 self.assertEqual(unicodedata.decimal("\u0E50"), 0)
118
119 self.assertRaises(TypeError, unicodedata.decimal)
120 self.assertRaises(TypeError, unicodedata.decimal, "xx")
121 self.assertRaises(ValueError, unicodedata.decimal, "a")
122 self.assertRaises(ValueError, unicodedata.decimal, "\u00B2")
123 self.assertRaises(ValueError, unicodedata.decimal, "\u00BD")
124
125 def test_digit_uses_current_version(self):
126 self.assertEqual(unicodedata.digit("2"), 2)
127 self.assertEqual(unicodedata.digit("\u0E50"), 0)
128 self.assertEqual(unicodedata.digit("\u00B2"), 2)
129
130 self.assertRaises(TypeError, unicodedata.digit)
131 self.assertRaises(TypeError, unicodedata.digit, "xx")
132 self.assertRaises(ValueError, unicodedata.digit, "a")
133 self.assertRaises(ValueError, unicodedata.digit, "\u00BD")
134
135 def test_lookup_uses_current_version(self):
136 self.assertEqual(unicodedata.lookup("latin CAPITAL Letter a"), "A")
137 self.assertEqual(unicodedata.lookup("digit zero"), "0")
138 self.assertEqual(unicodedata.lookup("TAI VIET LETTER LOW VO"), "\uAAAA")
139
140 # Hangul
141 self.assertEqual(unicodedata.lookup("Hangul jongseong RIEUL-PIEUP"), "\u11B2")
142 self.assertEqual(unicodedata.lookup("HANGUL SYLLABLE JJWAENH"), "\uCAFA")
143
144 # CJK
145 self.assertEqual(unicodedata.lookup("CJK UNIFIED IDEOGRAPH-35AB"), "\u35AB")
146 self.assertEqual(
147 unicodedata.lookup("CJK UNIFIED IDEOGRAPH-20000"), "\U00020000"
148 )
149
150 # Named Sequences
151 self.assertEqual(unicodedata.lookup("TAI VIET LETTER LOW VO"), "\uAAAA")
152
153 self.assertRaises(KeyError, unicodedata.lookup, "letter b")
154 self.assertRaises(KeyError, unicodedata.lookup, "hangul syllable son")
155 self.assertRaises(KeyError, unicodedata.lookup, "cjk unified ideograph-20000")
156
157 def test_numeric_uses_current_version(self):
158 self.assertEqual(unicodedata.numeric("7"), 7.0)
159 self.assertEqual(unicodedata.numeric("\u00BE"), 0.75)
160 self.assertEqual(unicodedata.numeric("\u09F7"), 0.25)
161 self.assertEqual(unicodedata.numeric("\U000109D3"), 200.0)
162 self.assertEqual(unicodedata.numeric("\U00020AFD"), 3.0)
163
164 self.assertEqual(unicodedata.numeric("A", "default"), "default")
165
166 self.assertRaises(TypeError, unicodedata.numeric, 2)
167 self.assertRaises(TypeError, unicodedata.numeric, "")
168 self.assertRaises(TypeError, unicodedata.numeric, "foo")
169 self.assertRaises(ValueError, unicodedata.numeric, "A")
170 self.assertRaises(ValueError, unicodedata.numeric, "\u4EAC")
171
172 def test_old_unidata_version(self):
173 self.assertEqual(unicodedata.ucd_3_2_0.unidata_version, "3.2.0")
174
175 def test_ucd_3_2_0_isinstance_of_UCD(self):
176 self.assertIsInstance(unicodedata.ucd_3_2_0, unicodedata.UCD)
177
178
179if __name__ == "__main__":
180 unittest.main()