library/test/test_normalization.py at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / library / test / test_normalization.py
at trunk 126 lines 4.2 kB view raw
wrap content
Max Bernstein Add license headers 4y ago
29d072a3
  1#!/usr/bin/env python3
  2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
  3# WARNING: This is a temporary copy of code from the cpython library to
  4# facilitate bringup. Please file a task for anything you change!
  5# flake8: noqa
  6# fmt: off
  7
  8import sys
  9import unittest
 10from http.client import HTTPException
 11from test.support import open_urlresource
 12from unicodedata import normalize, is_normalized, unidata_version
 13
 14
 15TESTDATAFILE = "NormalizationTest.txt"
 16TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE
 17
 18def check_version(testfile):
 19    hdr = testfile.readline()
 20    return unidata_version in hdr
 21
 22class RangeError(Exception):
 23    pass
 24
 25def NFC(str):
 26    return normalize("NFC", str)
 27
 28def NFKC(str):
 29    return normalize("NFKC", str)
 30
 31def NFD(str):
 32    return normalize("NFD", str)
 33
 34def NFKD(str):
 35    return normalize("NFKD", str)
 36
 37def unistr(data):
 38    data = [int(x, 16) for x in data.split(" ")]
 39    for x in data:
 40        if x > sys.maxunicode:
 41            raise RangeError
 42    return "".join([chr(x) for x in data])
 43
 44class NormalizationTest(unittest.TestCase):
 45    # TODO(T66751447): Normalization module fails to download test data
 46    @unittest.skip("Normalization module fails to download test data")
 47    def test_main(self):
 48        # Hit the exception early
 49        try:
 50            testdata = open_urlresource(TESTDATAURL, encoding="utf-8",
 51                                        check=check_version)
 52        except PermissionError:
 53            self.skipTest(f"Permission error when downloading {TESTDATAURL} "
 54                          f"into the test data directory")
 55        except (OSError, HTTPException):
 56            self.fail(f"Could not retrieve {TESTDATAURL}")
 57
 58        with testdata:
 59            self.run_normalization_tests(testdata)
 60
 61    def run_normalization_tests(self, testdata):
 62        part = None
 63        part1_data = {}
 64
 65        for line in testdata:
 66            if '#' in line:
 67                line = line.split('#')[0]
 68            line = line.strip()
 69            if not line:
 70                continue
 71            if line.startswith("@Part"):
 72                part = line.split()[0]
 73                continue
 74            try:
 75                c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
 76            except RangeError:
 77                # Skip unsupported characters;
 78                # try at least adding c1 if we are in part1
 79                if part == "@Part1":
 80                    try:
 81                        c1 = unistr(line.split(';')[0])
 82                    except RangeError:
 83                        pass
 84                    else:
 85                        part1_data[c1] = 1
 86                continue
 87
 88            # Perform tests
 89            self.assertTrue(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
 90            self.assertTrue(c4 ==  NFC(c4) ==  NFC(c5), line)
 91            self.assertTrue(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
 92            self.assertTrue(c5 ==  NFD(c4) ==  NFD(c5), line)
 93            self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
 94                            NFKC(c3) == NFKC(c4) == NFKC(c5),
 95                            line)
 96            self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
 97                            NFKD(c3) == NFKD(c4) == NFKD(c5),
 98                            line)
 99
100            self.assertTrue(is_normalized("NFC", c2))
101            self.assertTrue(is_normalized("NFC", c4))
102
103            self.assertTrue(is_normalized("NFD", c3))
104            self.assertTrue(is_normalized("NFD", c5))
105
106            self.assertTrue(is_normalized("NFKC", c4))
107            self.assertTrue(is_normalized("NFKD", c5))
108
109            # Record part 1 data
110            if part == "@Part1":
111                part1_data[c1] = 1
112
113        # Perform tests for all other data
114        for c in range(sys.maxunicode+1):
115            X = chr(c)
116            if X in part1_data:
117                continue
118            self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
119
120    def test_bug_834676(self):
121        # Check for bug 834676
122        normalize('NFC', '\ud55c\uae00')
123
124
125if __name__ == "__main__":
126    unittest.main()