this repo has no description
at trunk 126 lines 4.2 kB view raw
1#!/usr/bin/env python3 2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 3# WARNING: This is a temporary copy of code from the cpython library to 4# facilitate bringup. Please file a task for anything you change! 5# flake8: noqa 6# fmt: off 7 8import sys 9import unittest 10from http.client import HTTPException 11from test.support import open_urlresource 12from unicodedata import normalize, is_normalized, unidata_version 13 14 15TESTDATAFILE = "NormalizationTest.txt" 16TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE 17 18def check_version(testfile): 19 hdr = testfile.readline() 20 return unidata_version in hdr 21 22class RangeError(Exception): 23 pass 24 25def NFC(str): 26 return normalize("NFC", str) 27 28def NFKC(str): 29 return normalize("NFKC", str) 30 31def NFD(str): 32 return normalize("NFD", str) 33 34def NFKD(str): 35 return normalize("NFKD", str) 36 37def unistr(data): 38 data = [int(x, 16) for x in data.split(" ")] 39 for x in data: 40 if x > sys.maxunicode: 41 raise RangeError 42 return "".join([chr(x) for x in data]) 43 44class NormalizationTest(unittest.TestCase): 45 # TODO(T66751447): Normalization module fails to download test data 46 @unittest.skip("Normalization module fails to download test data") 47 def test_main(self): 48 # Hit the exception early 49 try: 50 testdata = open_urlresource(TESTDATAURL, encoding="utf-8", 51 check=check_version) 52 except PermissionError: 53 self.skipTest(f"Permission error when downloading {TESTDATAURL} " 54 f"into the test data directory") 55 except (OSError, HTTPException): 56 self.fail(f"Could not retrieve {TESTDATAURL}") 57 58 with testdata: 59 self.run_normalization_tests(testdata) 60 61 def run_normalization_tests(self, testdata): 62 part = None 63 part1_data = {} 64 65 for line in testdata: 66 if '#' in line: 67 line = line.split('#')[0] 68 line = line.strip() 69 if not line: 70 continue 71 if line.startswith("@Part"): 72 part = line.split()[0] 73 continue 74 try: 75 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] 76 except RangeError: 77 # Skip unsupported characters; 78 # try at least adding c1 if we are in part1 79 if part == "@Part1": 80 try: 81 c1 = unistr(line.split(';')[0]) 82 except RangeError: 83 pass 84 else: 85 part1_data[c1] = 1 86 continue 87 88 # Perform tests 89 self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) 90 self.assertTrue(c4 == NFC(c4) == NFC(c5), line) 91 self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) 92 self.assertTrue(c5 == NFD(c4) == NFD(c5), line) 93 self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \ 94 NFKC(c3) == NFKC(c4) == NFKC(c5), 95 line) 96 self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \ 97 NFKD(c3) == NFKD(c4) == NFKD(c5), 98 line) 99 100 self.assertTrue(is_normalized("NFC", c2)) 101 self.assertTrue(is_normalized("NFC", c4)) 102 103 self.assertTrue(is_normalized("NFD", c3)) 104 self.assertTrue(is_normalized("NFD", c5)) 105 106 self.assertTrue(is_normalized("NFKC", c4)) 107 self.assertTrue(is_normalized("NFKD", c5)) 108 109 # Record part 1 data 110 if part == "@Part1": 111 part1_data[c1] = 1 112 113 # Perform tests for all other data 114 for c in range(sys.maxunicode+1): 115 X = chr(c) 116 if X in part1_data: 117 continue 118 self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c) 119 120 def test_bug_834676(self): 121 # Check for bug 834676 122 normalize('NFC', '\ud55c\uae00') 123 124 125if __name__ == "__main__": 126 unittest.main()