this repo has no description
1#!/usr/bin/env python3
2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
3# WARNING: This is a temporary copy of code from the cpython library to
4# facilitate bringup. Please file a task for anything you change!
5# flake8: noqa
6# fmt: off
7
8import sys
9import unittest
10from http.client import HTTPException
11from test.support import open_urlresource
12from unicodedata import normalize, is_normalized, unidata_version
13
14
15TESTDATAFILE = "NormalizationTest.txt"
16TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE
17
18def check_version(testfile):
19 hdr = testfile.readline()
20 return unidata_version in hdr
21
22class RangeError(Exception):
23 pass
24
25def NFC(str):
26 return normalize("NFC", str)
27
28def NFKC(str):
29 return normalize("NFKC", str)
30
31def NFD(str):
32 return normalize("NFD", str)
33
34def NFKD(str):
35 return normalize("NFKD", str)
36
37def unistr(data):
38 data = [int(x, 16) for x in data.split(" ")]
39 for x in data:
40 if x > sys.maxunicode:
41 raise RangeError
42 return "".join([chr(x) for x in data])
43
44class NormalizationTest(unittest.TestCase):
45 # TODO(T66751447): Normalization module fails to download test data
46 @unittest.skip("Normalization module fails to download test data")
47 def test_main(self):
48 # Hit the exception early
49 try:
50 testdata = open_urlresource(TESTDATAURL, encoding="utf-8",
51 check=check_version)
52 except PermissionError:
53 self.skipTest(f"Permission error when downloading {TESTDATAURL} "
54 f"into the test data directory")
55 except (OSError, HTTPException):
56 self.fail(f"Could not retrieve {TESTDATAURL}")
57
58 with testdata:
59 self.run_normalization_tests(testdata)
60
61 def run_normalization_tests(self, testdata):
62 part = None
63 part1_data = {}
64
65 for line in testdata:
66 if '#' in line:
67 line = line.split('#')[0]
68 line = line.strip()
69 if not line:
70 continue
71 if line.startswith("@Part"):
72 part = line.split()[0]
73 continue
74 try:
75 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
76 except RangeError:
77 # Skip unsupported characters;
78 # try at least adding c1 if we are in part1
79 if part == "@Part1":
80 try:
81 c1 = unistr(line.split(';')[0])
82 except RangeError:
83 pass
84 else:
85 part1_data[c1] = 1
86 continue
87
88 # Perform tests
89 self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line)
90 self.assertTrue(c4 == NFC(c4) == NFC(c5), line)
91 self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line)
92 self.assertTrue(c5 == NFD(c4) == NFD(c5), line)
93 self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
94 NFKC(c3) == NFKC(c4) == NFKC(c5),
95 line)
96 self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
97 NFKD(c3) == NFKD(c4) == NFKD(c5),
98 line)
99
100 self.assertTrue(is_normalized("NFC", c2))
101 self.assertTrue(is_normalized("NFC", c4))
102
103 self.assertTrue(is_normalized("NFD", c3))
104 self.assertTrue(is_normalized("NFD", c5))
105
106 self.assertTrue(is_normalized("NFKC", c4))
107 self.assertTrue(is_normalized("NFKD", c5))
108
109 # Record part 1 data
110 if part == "@Part1":
111 part1_data[c1] = 1
112
113 # Perform tests for all other data
114 for c in range(sys.maxunicode+1):
115 X = chr(c)
116 if X in part1_data:
117 continue
118 self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
119
120 def test_bug_834676(self):
121 # Check for bug 834676
122 normalize('NFC', '\ud55c\uae00')
123
124
125if __name__ == "__main__":
126 unittest.main()