this repo has no description
at trunk 1486 lines 61 kB view raw
1#!/usr/bin/env python3 2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 3import _codecs 4import unittest 5from array import array 6 7from test_support import pyro_only 8 9 10try: 11 from builtins import _str_array 12except ImportError: 13 pass 14 15 16class CharmapTests(unittest.TestCase): 17 def test_charmap_decode_with_empty_bytes_input_returns_tuple(self): 18 self.assertEqual(_codecs.charmap_decode(b"", "strict", "abc"), ("", 0)) 19 self.assertEqual( 20 _codecs.charmap_decode(b"", "strict", {0: "a", 1: "b", 2: "c"}), ("", 0) 21 ) 22 23 def test_charmap_decode_with_string_map_raises_unicode_decode_error(self): 24 self.assertRaises( 25 UnicodeDecodeError, _codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab" 26 ) 27 28 def test_charmap_decode_with_string_map_with_bom_raises_unicode_decode_error(self): 29 self.assertRaises( 30 UnicodeDecodeError, 31 _codecs.charmap_decode, 32 b"\x00\x01\x02", 33 "strict", 34 "ab\ufffe", 35 ) 36 37 def test_charmap_decode_with_string_map_returns_tuple(self): 38 self.assertEqual( 39 _codecs.charmap_decode(b"\x00\x01\x02", "strict", "abc"), ("abc", 3) 40 ) 41 42 def test_charmap_decode_with_int2str_map_returns_tuple(self): 43 self.assertEqual( 44 _codecs.charmap_decode(b"\x00\x01\x02", "strict", {0: "a", 1: "b", 2: "c"}), 45 ("abc", 3), 46 ) 47 self.assertEqual( 48 _codecs.charmap_decode( 49 b"\x00\x01\x02", "strict", {0: "Aa", 1: "Bb", 2: "Cc"} 50 ), 51 ("AaBbCc", 3), 52 ) 53 self.assertEqual( 54 _codecs.charmap_decode( 55 b"\x00\x01\x02", "strict", {0: "\U0010FFFF", 1: "b", 2: "c"} 56 ), 57 ("\U0010FFFFbc", 3), 58 ) 59 self.assertEqual( 60 _codecs.charmap_decode(b"\x00\x01\x02", "strict", {0: "a", 1: "b", 2: ""}), 61 ("ab", 3), 62 ) 63 self.assertEqual( 64 _codecs.charmap_decode(b"\x00\x01\x02", "strict", {0: "A", 1: "Bb", 2: 37}), 65 ("ABb%", 3), 66 ) 67 68 def test_charmap_decode_with_int2str_map_raises_unicode_decode_error(self): 69 self.assertRaises( 70 UnicodeDecodeError, 71 _codecs.charmap_decode, 72 b"\x00\x01\x02", 73 "strict", 74 {0: "a", 1: "b"}, 75 ) 76 77 def test_charmap_decode_with_int2str_map_with_none_raises_unicode_decode_error( 78 self, 79 ): 80 self.assertRaises( 81 UnicodeDecodeError, 82 _codecs.charmap_decode, 83 b"\x00\x01\x02", 84 "strict", 85 {0: "a", 1: "b", 2: None}, 86 ) 87 88 def test_charmap_decode_with_int2str_map_with_bom_raises_unicode_decode_error(self): 89 self.assertRaises( 90 UnicodeDecodeError, 91 _codecs.charmap_decode, 92 b"\x00\x01\x02", 93 "strict", 94 {0: "a", 1: "b", 2: "\ufffe"}, 95 ) 96 97 def test_charmap_decode_with_mapped_value_tuple_raises_type_error(self): 98 self.assertRaises( 99 TypeError, 100 _codecs.charmap_decode, 101 b"\x00\x01\x02", 102 "strict", 103 {0: "a", 1: "b", 2: ("c",)}, 104 ) 105 106 def test_charmap_decode_with_mapped_value_list_raises_type_error(self): 107 self.assertRaises( 108 TypeError, 109 _codecs.charmap_decode, 110 b"\x00\x01\x02", 111 "strict", 112 {0: "a", 1: "b", 2: ["c"]}, 113 ) 114 115 def test_charmap_decode_with_mapped_value_out_of_range_min_raises_type_error(self): 116 self.assertRaises( 117 TypeError, 118 _codecs.charmap_decode, 119 b"\x00\x01\x02", 120 "strict", 121 {0: "a", 1: "b", 2: -2}, 122 ) 123 124 def test_charmap_decode_with_mapped_value_out_of_range_max_raises_type_error(self): 125 self.assertRaises( 126 TypeError, 127 _codecs.charmap_decode, 128 b"\x00\x01\x02", 129 "strict", 130 {0: "a", 1: "b", 2: 9999999}, 131 ) 132 133 134class CodecsTests(unittest.TestCase): 135 def test_register_error_with_non_string_first_raises_type_error(self): 136 with self.assertRaises(TypeError): 137 _codecs.register_error([], []) 138 139 def test_register_error_with_non_callable_second_raises_type_error(self): 140 with self.assertRaises(TypeError): 141 _codecs.register_error("", []) 142 143 def test_lookup_error_with_non_string_raises_type_error(self): 144 with self.assertRaises(TypeError): 145 _codecs.lookup_error([]) 146 147 def test_lookup_error_with_unknown_name_raises_lookup_error(self): 148 with self.assertRaises(LookupError): 149 _codecs.lookup_error("not-an-error") 150 151 def test_lookup_error_with_ignore_returns_ignore_function(self): 152 func = _codecs.lookup_error("ignore") 153 self.assertEqual(func.__name__, "ignore_errors") 154 155 def test_lookup_error_with_strict_returns_strict_function(self): 156 func = _codecs.lookup_error("strict") 157 self.assertEqual(func.__name__, "strict_errors") 158 159 def test_lookup_error_with_registered_error_returns_function(self): 160 def test_func(): 161 pass 162 163 _codecs.register_error("test", test_func) 164 func = _codecs.lookup_error("test") 165 self.assertEqual(func.__name__, "test_func") 166 167 def test_register_with_non_callable_fails(self): 168 with self.assertRaises(TypeError): 169 _codecs.register("not-a-callable") 170 171 def test_lookup_unknown_codec_raises_lookup_error(self): 172 with self.assertRaises(LookupError): 173 _codecs.lookup("not-the-name-of-a-codec") 174 175 def test_lookup_that_doesnt_return_tuple_raises_type_error(self): 176 def lookup_function(encoding): 177 if encoding == "lookup_that_doesnt_return_tuple": 178 return "not-a-tuple" 179 180 _codecs.register(lookup_function) 181 with self.assertRaises(TypeError): 182 _codecs.lookup("lookup_that_doesnt_return_tuple") 183 184 def test_lookup_that_doesnt_return_four_tuple_raises_type_error(self): 185 def lookup_function(encoding): 186 if encoding == "lookup_that_doesnt_return_four_tuple": 187 return "one", "two", "three" 188 189 _codecs.register(lookup_function) 190 with self.assertRaises(TypeError): 191 _codecs.lookup("lookup_that_doesnt_return_four_tuple") 192 193 def test_lookup_returns_first_registered_codec(self): 194 def first_lookup_function(encoding): 195 if encoding == "lookup_return_of_registered_codec": 196 return 1, 2, 3, 4 197 198 def second_lookup_function(encoding): 199 if encoding == "lookup_return_of_registered_codec": 200 return 5, 6, 7, 8 201 202 _codecs.register(first_lookup_function) 203 _codecs.register(second_lookup_function) 204 self.assertEqual( 205 _codecs.lookup("lookup_return_of_registered_codec"), (1, 2, 3, 4) 206 ) 207 208 def test_lookup_properly_caches_encodings(self): 209 accumulator = [] 210 211 def incrementing_lookup_function(encoding): 212 if ( 213 encoding == "incrementing_state_one" 214 or encoding == "incrementing_state_two" 215 ): 216 accumulator.append(1) 217 return len(accumulator), 0, 0, 0 218 219 _codecs.register(incrementing_lookup_function) 220 first_one = _codecs.lookup("incrementing_state_one") 221 first_two = _codecs.lookup("incrementing_state_two") 222 second_one = _codecs.lookup("incrementing_state_one") 223 self.assertNotEqual(first_one, first_two) 224 self.assertEqual(first_one, second_one) 225 226 def test_lookup_properly_normalizes_encoding_string(self): 227 def lookup_function(encoding): 228 if encoding == "normalized-string": 229 return 0, 0, 0, 0 230 231 _codecs.register(lookup_function) 232 self.assertEqual(_codecs.lookup("normalized string"), (0, 0, 0, 0)) 233 234 def test_decode_with_unknown_codec_raises_lookup_error(self): 235 with self.assertRaises(LookupError) as context: 236 _codecs.decode(b"bytes", "not-a-codec") 237 self.assertEqual(str(context.exception), "unknown encoding: not-a-codec") 238 239 def test_decode_with_function_with_int_codec_raises_type_error(self): 240 def lookup_function(encoding): 241 if encoding == "decode-with-function-with-int-codec": 242 return 0, 0, 0, 0 243 244 _codecs.register(lookup_function) 245 with self.assertRaises(TypeError) as context: 246 _codecs.decode(b"bytes", "decode-with-function-with-int-codec") 247 self.assertIn("'int' object is not callable", str(context.exception)) 248 249 def test_decode_with_function_with_non_tuple_return_raises_type_error(self): 250 def lookup_function(encoding): 251 if encoding == "decode-with-function-with-faulty-codec": 252 return 0, (lambda uni: 0), 0, 0 253 254 _codecs.register(lookup_function) 255 with self.assertRaises(TypeError) as context: 256 _codecs.decode(b"bytes", "decode-with-function-with-faulty-codec") 257 self.assertEqual( 258 str(context.exception), "decoder must return a tuple (object,integer)" 259 ) 260 261 def test_decode_with_function_with_tuple_return_returns_first_element(self): 262 def decoder(s): 263 return ("one", "two") 264 265 def lookup_function(encoding): 266 if encoding == "decode-with-function-with-two-tuple-codec": 267 return 0, decoder, 0, 0 268 269 _codecs.register(lookup_function) 270 self.assertEqual( 271 _codecs.decode(b"bytes", "decode-with-function-with-two-tuple-codec"), "one" 272 ) 273 274 def test_decode_with_errors_passes_multiple_arguments(self): 275 def decoder(s, err): 276 return (s, err) 277 278 def lookup_function(encoding): 279 if encoding == "decode-with-function-with-two-arguments": 280 return 0, decoder, 0, 0 281 282 _codecs.register(lookup_function) 283 self.assertEqual( 284 _codecs.decode( 285 b"bytes", "decode-with-function-with-two-arguments", "error" 286 ), 287 b"bytes", 288 ) 289 290 def test_encode_with_unknown_codec_raises_lookup_error(self): 291 with self.assertRaises(LookupError) as context: 292 _codecs.encode("str", "not-a-codec") 293 self.assertEqual(str(context.exception), "unknown encoding: not-a-codec") 294 295 def test_encode_with_function_with_int_codec_raises_type_error(self): 296 def lookup_function(encoding): 297 if encoding == "encode-with-function-with-int-codec": 298 return 0, 0, 0, 0 299 300 _codecs.register(lookup_function) 301 with self.assertRaises(TypeError) as context: 302 _codecs.encode("str", "encode-with-function-with-int-codec") 303 self.assertIn("'int' object is not callable", str(context.exception)) 304 305 def test_encode_with_function_with_non_tuple_return_raises_type_error(self): 306 def lookup_function(encoding): 307 if encoding == "encode-with-function-with-faulty-codec": 308 return (lambda uni: 0), 0, 0, 0 309 310 _codecs.register(lookup_function) 311 with self.assertRaises(TypeError) as context: 312 _codecs.encode("str", "encode-with-function-with-faulty-codec") 313 self.assertEqual( 314 str(context.exception), "encoder must return a tuple (object, integer)" 315 ) 316 317 def test_encode_with_function_with_tuple_return_returns_first_element(self): 318 def encoder(s): 319 return ("one", "two") 320 321 def lookup_function(encoding): 322 if encoding == "encode-with-function-with-two-tuple-codec": 323 return encoder, 0, 0, 0 324 325 _codecs.register(lookup_function) 326 self.assertEqual( 327 _codecs.encode("str", "encode-with-function-with-two-tuple-codec"), "one" 328 ) 329 330 def test_encode_with_errors_passes_multiple_arguments(self): 331 def encoder(s, err): 332 return (s, err) 333 334 def lookup_function(encoding): 335 if encoding == "encode-with-function-with-two-arguments": 336 return encoder, 0, 0, 0 337 338 _codecs.register(lookup_function) 339 self.assertEqual( 340 _codecs.encode("str", "encode-with-function-with-two-arguments", "error"), 341 "str", 342 ) 343 344 @pyro_only 345 def test_getincrementaldecoder_with_utf_8_returns_utf_8_incremental_decoder(self): 346 inc_dec = _codecs.getincrementaldecoder("utf-8") 347 self.assertIs(inc_dec, _codecs.UTF8IncrementalDecoder) 348 349 @pyro_only 350 def test_getincrementalencoder_with_utf_8_returns_utf_8_incremental_encoder(self): 351 inc_dec = _codecs.getincrementalencoder("utf-8") 352 self.assertIs(inc_dec, _codecs.UTF8IncrementalEncoder) 353 354 355class DecodeASCIITests(unittest.TestCase): 356 def test_decode_ascii_with_non_bytes_first_raises_type_error(self): 357 with self.assertRaises(TypeError): 358 _codecs.ascii_decode([]) 359 360 def test_decode_ascii_with_non_string_second_raises_type_error(self): 361 with self.assertRaises(TypeError): 362 _codecs.ascii_decode(b"", []) 363 364 def test_decode_ascii_with_zero_length_returns_empty_string(self): 365 decoded, consumed = _codecs.ascii_decode(b"") 366 self.assertEqual(decoded, "") 367 self.assertEqual(consumed, 0) 368 369 def test_decode_ascii_with_well_formed_ascii_returns_string(self): 370 decoded, consumed = _codecs.ascii_decode(b"hello") 371 self.assertEqual(decoded, "hello") 372 self.assertEqual(consumed, 5) 373 374 def test_decode_ascii_with_well_formed_ascii_array_returns_string(self): 375 decoded, consumed = _codecs.ascii_decode(array("B", b"hello")) 376 self.assertEqual(decoded, "hello") 377 self.assertEqual(consumed, 5) 378 379 def test_decode_ascii_with_well_formed_ascii_bytearray_returns_string(self): 380 decoded, consumed = _codecs.ascii_decode(bytearray(b"hello")) 381 self.assertEqual(decoded, "hello") 382 self.assertEqual(consumed, 5) 383 384 def test_decode_ascii_with_well_formed_ascii_bytearray_subclass_returns_string( 385 self, 386 ): 387 class B(bytearray): 388 pass 389 390 decoded, consumed = _codecs.ascii_decode(B(b"hello")) 391 self.assertEqual(decoded, "hello") 392 self.assertEqual(consumed, 5) 393 394 def test_decode_ascii_with_well_formed_ascii_memoryview_returns_string(self): 395 decoded, consumed = _codecs.ascii_decode(memoryview(b"hello")) 396 self.assertEqual(decoded, "hello") 397 self.assertEqual(consumed, 5) 398 399 def test_decode_ascii_with_custom_error_handler_returns_string(self): 400 _codecs.register_error("test", lambda x: ("-testing-", x.end)) 401 decoded, consumed = _codecs.ascii_decode(b"ab\x90c", "test") 402 self.assertEqual(decoded, "ab-testing-c") 403 self.assertEqual(consumed, 4) 404 405 406class DecodeEscapeTests(unittest.TestCase): 407 def test_decode_escape_with_non_bytes_first_raises_type_error(self): 408 with self.assertRaises(TypeError): 409 _codecs.escape_decode([]) 410 411 def test_decode_escape_with_non_string_second_raises_type_error(self): 412 with self.assertRaises(TypeError): 413 _codecs.escape_decode(b"", []) 414 415 def test_decode_escape_with_zero_length_returns_empty_string(self): 416 decoded, consumed = _codecs.escape_decode(b"") 417 self.assertEqual(decoded, b"") 418 self.assertEqual(consumed, 0) 419 420 # TODO(atalaba): This should not need @pyro_only 421 @pyro_only 422 def test_decode_escape_with_well_formed_latin_1_returns_string(self): 423 decoded, consumed = _codecs.escape_decode(b"hello\x95") 424 self.assertEqual(decoded, b"hello\xC2\x95") 425 self.assertEqual(consumed, 6) 426 427 def test_decode_escape_with_end_of_string_slash_raises_value_error(self): 428 with self.assertRaises(ValueError) as context: 429 _codecs.escape_decode(b"ab\\") 430 self.assertEqual(str(context.exception), "Trailing \\ in string") 431 432 def test_decode_escape_with_truncated_hex_raises_value_error(self): 433 with self.assertRaises(ValueError) as context: 434 _codecs.escape_decode(b"ab\\x1h") 435 self.assertEqual(str(context.exception), "invalid \\x escape at position 2") 436 437 def test_decode_escape_with_truncated_hex_unknown_error_raises_value_error(self): 438 with self.assertRaises(ValueError) as context: 439 _codecs.escape_decode(b"ab\\x1h", "unknown") 440 self.assertEqual( 441 str(context.exception), 442 "decoding error; unknown error handling code: unknown", 443 ) 444 445 @pyro_only 446 def test_decode_escape_stateful_returns_first_invalid_escape(self): 447 decoded, consumed, first_invalid = _codecs._escape_decode_stateful(b"ab\\yc") 448 self.assertEqual(decoded, b"ab\\yc") 449 self.assertEqual(consumed, 5) 450 self.assertEqual(first_invalid, 3) 451 452 453class DecodeLatin1Tests(unittest.TestCase): 454 def test_decode_latin_1_with_non_bytes_first_raises_type_error(self): 455 with self.assertRaises(TypeError): 456 _codecs.latin_1_decode([]) 457 458 def test_decode_latin_1_with_non_string_second_raises_type_error(self): 459 with self.assertRaises(TypeError): 460 _codecs.latin_1_decode(b"", []) 461 462 def test_decode_latin_1_with_zero_length_returns_empty_string(self): 463 decoded, consumed = _codecs.latin_1_decode(b"") 464 self.assertEqual(decoded, "") 465 self.assertEqual(consumed, 0) 466 467 def test_decode_latin_1_with_ascii_returns_string(self): 468 decoded, consumed = _codecs.latin_1_decode(b"hello") 469 self.assertEqual(decoded, "hello") 470 self.assertEqual(consumed, 5) 471 472 def test_decode_latin_1_with_ascii_array_returns_string(self): 473 decoded, consumed = _codecs.latin_1_decode(array("B", b"hello")) 474 self.assertEqual(decoded, "hello") 475 self.assertEqual(consumed, 5) 476 477 def test_decode_latin_1_with_ascii_bytearray_returns_string(self): 478 decoded, consumed = _codecs.latin_1_decode(bytearray(b"hello")) 479 self.assertEqual(decoded, "hello") 480 self.assertEqual(consumed, 5) 481 482 def test_decode_latin_1_with_ascii_bytearray_subclass_returns_string(self): 483 class B(bytearray): 484 pass 485 486 decoded, consumed = _codecs.latin_1_decode(B(b"hello")) 487 self.assertEqual(decoded, "hello") 488 self.assertEqual(consumed, 5) 489 490 def test_decode_latin_1_with_ascii_memoryview_returns_string(self): 491 decoded, consumed = _codecs.latin_1_decode(memoryview(b"hello")) 492 self.assertEqual(decoded, "hello") 493 self.assertEqual(consumed, 5) 494 495 def test_decode_latin_1_with_latin_1_returns_string(self): 496 decoded, consumed = _codecs.latin_1_decode(b"\x7D\x7E\x7F\x80\x81\x82") 497 self.assertEqual(decoded, "\x7D\x7E\x7F\x80\x81\x82") 498 self.assertEqual(consumed, 6) 499 500 501class DecodeUnicodeEscapeTests(unittest.TestCase): 502 def test_decode_unicode_escape_with_non_bytes_first_raises_type_error(self): 503 with self.assertRaises(TypeError): 504 _codecs.unicode_escape_decode([]) 505 506 def test_decode_unicode_escape_with_non_string_second_raises_type_error(self): 507 with self.assertRaises(TypeError): 508 _codecs.unicode_escape_decode(b"", []) 509 510 def test_decode_unicode_escape_with_zero_length_returns_empty_string(self): 511 decoded, consumed = _codecs.unicode_escape_decode(b"") 512 self.assertEqual(decoded, "") 513 self.assertEqual(consumed, 0) 514 515 def test_decode_unicode_escape_with_well_formed_latin_1_returns_string(self): 516 decoded, consumed = _codecs.unicode_escape_decode(b"hello\x95") 517 self.assertEqual(decoded, "hello\x95") 518 self.assertEqual(consumed, 6) 519 520 def test_decode_unicode_escape_with_well_formed_latin_1_array_returns_string( 521 self, 522 ): 523 decoded, consumed = _codecs.unicode_escape_decode(array("B", b"hello\x95")) 524 self.assertEqual(decoded, "hello\x95") 525 self.assertEqual(consumed, 6) 526 527 def test_decode_unicode_escape_with_well_formed_latin_1_bytearray_returns_string( 528 self, 529 ): 530 decoded, consumed = _codecs.unicode_escape_decode(bytearray(b"hello\x95")) 531 self.assertEqual(decoded, "hello\x95") 532 self.assertEqual(consumed, 6) 533 534 def test_decode_unicode_escape_with_latin_1_bytearray_subclass_returns_string(self): 535 class B(bytearray): 536 pass 537 538 decoded, consumed = _codecs.unicode_escape_decode(B(b"hello\x95")) 539 self.assertEqual(decoded, "hello\x95") 540 self.assertEqual(consumed, 6) 541 542 def test_decode_unicode_escape_with_well_formed_latin_1_memoryview_returns_string( 543 self, 544 ): 545 decoded, consumed = _codecs.unicode_escape_decode(memoryview(b"hello\x95")) 546 self.assertEqual(decoded, "hello\x95") 547 self.assertEqual(consumed, 6) 548 549 def test_decode_unicode_escape_with_escaped_back_slash_returns_string(self): 550 decoded, consumed = _codecs.unicode_escape_decode(b"hello\\x95") 551 self.assertEqual(decoded, "hello\x95") 552 self.assertEqual(consumed, 9) 553 554 def test_decode_unicode_escape_with_valid_hangul_returns_string(self): 555 decoded, consumed = _codecs.unicode_escape_decode( 556 b"\\N{HANGUL SYLLABLE BBYAENG}" 557 ) 558 self.assertEqual(decoded, "\uBEC9") 559 self.assertEqual(consumed, 27) 560 561 def test_decode_unicode_escape_with_lowercase_hangul_raises_exception(self): 562 with self.assertRaises(UnicodeDecodeError): 563 _codecs.unicode_escape_decode(b"\\N{HANGUL SYLLABLE ddalg}") 564 565 def test_decode_unicode_escape_with_invalid_leading_raises_exception(self): 566 with self.assertRaises(UnicodeDecodeError): 567 _codecs.unicode_escape_decode(b"\\N{HANGUL SYLLABLE BLANJ}") 568 569 def test_decode_unicode_escape_with_invalid_vowel_raises_exception(self): 570 with self.assertRaises(UnicodeDecodeError): 571 _codecs.unicode_escape_decode(b"\\N{HANGUL SYLLABLE CAOGS}") 572 573 def test_decode_unicode_escape_with_invalid_trailing_raises_exception(self): 574 with self.assertRaises(UnicodeDecodeError): 575 _codecs.unicode_escape_decode(b"\\N{HANGUL SYLLABLE PYOLL}") 576 577 def test_decode_unicode_escape_with_valid_cjk_ideograph_returns_string(self): 578 decoded, consumed = _codecs.unicode_escape_decode( 579 b"\\N{CJK UNIFIED IDEOGRAPH-4DB0}" 580 ) 581 self.assertEqual(decoded, "\u4DB0") 582 self.assertEqual(consumed, 30) 583 584 decoded, consumed = _codecs.unicode_escape_decode( 585 b"\\N{CJK UNIFIED IDEOGRAPH-2B75A}" 586 ) 587 self.assertEqual(decoded, "\U0002B75A") 588 self.assertEqual(consumed, 31) 589 590 def test_decode_unicode_escape_with_lowercase_cjk_ideograph_returns_string(self): 591 with self.assertRaises(UnicodeDecodeError): 592 _codecs.unicode_escape_decode(b"\\N{CJK UNIFIED IDEOGRAPH-4db0}") 593 594 def test_decode_unicode_escape_with_invalid_cjk_ideograph_returns_string(self): 595 with self.assertRaises(UnicodeDecodeError): 596 _codecs.unicode_escape_decode(b"\\N{CJK UNIFIED IDEOGRAPH-4DB6}") 597 598 def test_decode_unicode_escape_with_valid_name_escape_returns_string(self): 599 decoded, consumed = _codecs.unicode_escape_decode( 600 b"\\N{LATIN SMALL LETTER A WITH MACRON}" 601 ) 602 self.assertEqual(decoded, "\u0101") 603 self.assertEqual(consumed, 36) 604 605 def test_decode_unicode_escape_with_invalid_word_raises_unicode_decode_error(self): 606 with self.assertRaises(UnicodeDecodeError): 607 _codecs.unicode_escape_decode(b"\\N{INVALID}") 608 609 def test_decode_unicode_escape_with_invalid_name_raises_unicode_decode_error(self): 610 with self.assertRaises(UnicodeDecodeError): 611 _codecs.unicode_escape_decode(b"\\N{LATIN S LETTER CAPITAL}") 612 613 def test_decode_unicode_escape_with_custom_error_handler_returns_string(self): 614 _codecs.register_error("test", lambda x: ("-testing-", x.end)) 615 decoded, consumed = _codecs.unicode_escape_decode(b"ab\\U90gc", "test") 616 self.assertEqual(decoded, "ab-testing-gc") 617 self.assertEqual(consumed, 8) 618 619 @pyro_only 620 def test_decode_unicode_escape_stateful_returns_first_invalid_escape(self): 621 decoded, consumed, first_invalid = _codecs._unicode_escape_decode_stateful( 622 b"ab\\yc" 623 ) 624 self.assertEqual(decoded, "ab\\yc") 625 self.assertEqual(consumed, 5) 626 self.assertEqual(first_invalid, 3) 627 628 629class DecodeRawUnicodeEscapeTests(unittest.TestCase): 630 def test_decode_raw_unicode_escape_with_non_bytes_first_raises_type_error(self): 631 with self.assertRaises(TypeError): 632 _codecs.raw_unicode_escape_decode([]) 633 634 def test_decode_raw_unicode_escape_with_non_string_second_raises_type_error(self): 635 with self.assertRaises(TypeError): 636 _codecs.raw_unicode_escape_decode(b"", []) 637 638 def test_decode_raw_unicode_escape_with_zero_length_returns_empty_string(self): 639 decoded, consumed = _codecs.raw_unicode_escape_decode(b"") 640 self.assertEqual(decoded, "") 641 self.assertEqual(consumed, 0) 642 643 def test_decode_raw_unicode_escape_with_well_formed_latin_1_returns_string(self): 644 decoded, consumed = _codecs.raw_unicode_escape_decode(b"hello\x95") 645 self.assertEqual(decoded, "hello\x95") 646 self.assertEqual(consumed, 6) 647 648 def test_decode_raw_unicode_escape_with_escaped_back_slash_returns_string(self): 649 decoded, consumed = _codecs.raw_unicode_escape_decode(b"hello\\x95") 650 self.assertEqual(decoded, "hello\\x95") 651 self.assertEqual(consumed, 9) 652 653 def test_decode_raw_unicode_escape_with_well_formed_latin_1_array_returns_string( 654 self, 655 ): 656 decoded, consumed = _codecs.raw_unicode_escape_decode(array("B", b"hello\x95")) 657 self.assertEqual(decoded, "hello\x95") 658 self.assertEqual(consumed, 6) 659 660 def test_decode_raw_unicode_escape_with_well_formed_latin_1_bytearray_returns_string( 661 self, 662 ): 663 decoded, consumed = _codecs.raw_unicode_escape_decode(bytearray(b"hello\x95")) 664 self.assertEqual(decoded, "hello\x95") 665 self.assertEqual(consumed, 6) 666 667 def test_decode_raw_unicode_escape_with_well_formed_latin_1_memoryview_returns_string( 668 self, 669 ): 670 decoded, consumed = _codecs.raw_unicode_escape_decode(memoryview(b"hello\x95")) 671 self.assertEqual(decoded, "hello\x95") 672 self.assertEqual(consumed, 6) 673 674 def test_decode_raw_unicode_escape_with_latin_1_bytearray_subclass_returns_string( 675 self, 676 ): 677 class B(bytearray): 678 pass 679 680 decoded, consumed = _codecs.raw_unicode_escape_decode(B(b"hello\x95")) 681 self.assertEqual(decoded, "hello\x95") 682 self.assertEqual(consumed, 6) 683 684 def test_decode_raw_unicode_escape_with_out_of_range_32_bit_unicode_raises_error( 685 self, 686 ): 687 with self.assertRaises(UnicodeDecodeError) as context: 688 _codecs.raw_unicode_escape_decode(b"\\U00FFFFFF") 689 exc = context.exception 690 self.assertEqual(exc.encoding, "rawunicodeescape") 691 self.assertEqual(exc.reason, "\\Uxxxxxxxx out of range") 692 self.assertEqual(exc.object, b"\\U00FFFFFF") 693 self.assertEqual(exc.start, 0) 694 self.assertEqual(exc.end, 10) 695 696 def test_decode_raw_unicode_escape_with_truncated_16_bit_unicode_raises_error( 697 self, 698 ): 699 with self.assertRaises(UnicodeDecodeError) as context: 700 _codecs.raw_unicode_escape_decode(b"\\u123") 701 exc = context.exception 702 self.assertEqual(exc.encoding, "rawunicodeescape") 703 self.assertEqual(exc.reason, "truncated \\uXXXX escape") 704 self.assertEqual(exc.object, b"\\u123") 705 self.assertEqual(exc.start, 0) 706 self.assertEqual(exc.end, 5) 707 708 def test_decode_raw_unicode_escape_with_truncated_32_bit_unicode_raises_error( 709 self, 710 ): 711 with self.assertRaises(UnicodeDecodeError) as context: 712 _codecs.raw_unicode_escape_decode(b"\\U001234") 713 exc = context.exception 714 self.assertEqual(exc.encoding, "rawunicodeescape") 715 self.assertEqual(exc.reason, "truncated \\UXXXXXXXX escape") 716 self.assertEqual(exc.object, b"\\U001234") 717 self.assertEqual(exc.start, 0) 718 self.assertEqual(exc.end, 8) 719 720 def test_decode_raw_unicode_escape_with_valid_unicode_returns_string(self): 721 decoded, consumed = _codecs.raw_unicode_escape_decode(b"\u26f7") 722 self.assertEqual(decoded, "\u26F7") 723 self.assertEqual(consumed, 6) 724 725 def test_decode_raw_unicode_escape_with_valid_cjk_ideograph_returns_string(self): 726 decoded, consumed = _codecs.unicode_escape_decode(b"\\u4DB0") 727 self.assertEqual(decoded, "\u4DB0") 728 self.assertEqual(consumed, 6) 729 730 decoded, consumed = _codecs.raw_unicode_escape_decode(b"\\U0002B75A") 731 self.assertEqual(decoded, "\U0002B75A") 732 self.assertEqual(consumed, 10) 733 734 def test_decode_raw_unicode_escape_with_valid_name_escape_returns_string(self): 735 decoded, consumed = _codecs.raw_unicode_escape_decode( 736 b"\\N{HANGUL SYLLABLE BBYAENG}" 737 ) 738 self.assertEqual(decoded, "\\N{HANGUL SYLLABLE BBYAENG}") 739 self.assertEqual(consumed, 27) 740 741 def test_decode_raw_unicode_escape_with_invalid_word_returns_string(self): 742 decoded, consumed = _codecs.raw_unicode_escape_decode(b"\\N{INVALID}") 743 self.assertEqual(decoded, "\\N{INVALID}") 744 self.assertEqual(consumed, 11) 745 746 def test_decode_raw_unicode_escape_with_ignore_error_handler_returns_string(self): 747 decoded, consumed = _codecs.raw_unicode_escape_decode(b"ab\\U90gc", "ignore") 748 self.assertEqual(decoded, "abgc") 749 self.assertEqual(consumed, 8) 750 751 def test_decode_raw_unicode_escape_with_custom_error_handler_returns_string(self): 752 _codecs.register_error("test", lambda x: ("-testing-", x.end)) 753 decoded, consumed = _codecs.raw_unicode_escape_decode(b"ab\\U90gc", "test") 754 self.assertEqual(decoded, "ab-testing-gc") 755 self.assertEqual(consumed, 8) 756 757 def test_decode_raw_unicode_escape_with_replace_error_handler_returns_string(self): 758 decoded, consumed = _codecs.raw_unicode_escape_decode(b"ab\\U90gc", "replace") 759 self.assertEqual(decoded, "ab\uFFFDgc") 760 self.assertEqual(consumed, 8) 761 762 def test_decode_raw_unicode_escape_with_trailing_back_slash_returns_string( 763 self, 764 ): 765 decoded, consumed = _codecs.raw_unicode_escape_decode(b"porcupine\\") 766 self.assertEqual(decoded, "porcupine\\") 767 self.assertEqual(consumed, 10) 768 769 770class DecodeUTF8Tests(unittest.TestCase): 771 def test_decode_utf_8_with_non_bytes_first_raises_type_error(self): 772 with self.assertRaises(TypeError): 773 _codecs.utf_8_decode([]) 774 775 def test_decode_utf_8_with_non_string_second_raises_type_error(self): 776 with self.assertRaises(TypeError): 777 _codecs.utf_8_decode(b"", []) 778 779 def test_decode_utf_8_with_zero_length_returns_empty_string(self): 780 decoded, consumed = _codecs.utf_8_decode(b"") 781 self.assertEqual(decoded, "") 782 self.assertEqual(consumed, 0) 783 784 def test_decode_utf_8_with_well_formed_utf_8_returns_string(self): 785 decoded, consumed = _codecs.utf_8_decode( 786 b"\xf0\x9f\x86\x92h\xc3\xa4l\xe2\xb3\x80" 787 ) 788 self.assertEqual(decoded, "\U0001f192h\xe4l\u2cc0") 789 self.assertEqual(consumed, 11) 790 791 def test_decode_utf_8_with_well_formed_utf8_array_returns_string(self): 792 decoded, consumed = _codecs.utf_8_decode( 793 array("B", b"\xf0\x9f\x86\x92h\xc3\xa4l\xe2\xb3\x80") 794 ) 795 self.assertEqual(decoded, "\U0001f192h\xe4l\u2cc0") 796 self.assertEqual(consumed, 11) 797 798 def test_decode_utf_8_with_well_formed_utf8_bytearray_returns_string(self): 799 decoded, consumed = _codecs.utf_8_decode( 800 bytearray(b"\xf0\x9f\x86\x92h\xc3\xa4l\xe2\xb3\x80") 801 ) 802 self.assertEqual(decoded, "\U0001f192h\xe4l\u2cc0") 803 self.assertEqual(consumed, 11) 804 805 def test_decode_utf_8_with_well_formed_utf8_bytearray_subclass_returns_string(self): 806 class B(bytearray): 807 pass 808 809 decoded, consumed = _codecs.utf_8_decode( 810 B(b"\xf0\x9f\x86\x92h\xc3\xa4l\xe2\xb3\x80") 811 ) 812 self.assertEqual(decoded, "\U0001f192h\xe4l\u2cc0") 813 self.assertEqual(consumed, 11) 814 815 def test_decode_utf_8_with_well_formed_utf8_memoryview_returns_string(self): 816 decoded, consumed = _codecs.utf_8_decode( 817 memoryview(b"\xf0\x9f\x86\x92h\xc3\xa4l\xe2\xb3\x80") 818 ) 819 self.assertEqual(decoded, "\U0001f192h\xe4l\u2cc0") 820 self.assertEqual(consumed, 11) 821 822 def test_decode_utf_8_with_custom_error_handler_returns_string(self): 823 _codecs.register_error("test", lambda x: ("-testing-", x.end)) 824 decoded, consumed = _codecs.utf_8_decode(b"ab\x90c", "test") 825 self.assertEqual(decoded, "ab-testing-c") 826 self.assertEqual(consumed, 4) 827 828 def test_decode_utf_8_with_invalid_start_byte_raises_decode_error(self): 829 with self.assertRaises(UnicodeDecodeError) as context: 830 _codecs.utf_8_decode(b"ab\x90c") 831 self.assertEqual(str(context.exception.reason), "invalid start byte") 832 833 834class EncodeASCIITests(unittest.TestCase): 835 def test_encode_ascii_with_non_str_first_argument_raises_type_error(self): 836 with self.assertRaises(TypeError): 837 _codecs.ascii_encode([]) 838 839 def test_encode_ascii_with_non_str_second_argument_raises_type_error(self): 840 with self.assertRaises(TypeError): 841 _codecs.ascii_encode("", []) 842 843 def test_encode_ascii_with_zero_length_returns_empty_bytes(self): 844 encoded, consumed = _codecs.ascii_encode("") 845 self.assertEqual(encoded, b"") 846 self.assertEqual(consumed, 0) 847 848 def test_encode_ascii_with_well_formed_ascii_returns_bytes(self): 849 encoded, consumed = _codecs.ascii_encode("hello") 850 self.assertEqual(encoded, b"hello") 851 self.assertEqual(consumed, 5) 852 853 def test_encode_ascii_with_well_formed_latin_1_raises_encode_error(self): 854 with self.assertRaises(UnicodeEncodeError): 855 _codecs.ascii_encode("hell\xe5") 856 857 def test_encode_ascii_with_custom_error_handler_mid_bytes_error_returns_bytes(self): 858 _codecs.register_error("test", lambda x: (b"-testing-", x.end)) 859 encoded, consumed = _codecs.ascii_encode("ab\udc80c", "test") 860 self.assertEqual(encoded, b"ab-testing-c") 861 self.assertEqual(consumed, 4) 862 863 def test_encode_ascii_with_custom_error_handler_end_bytes_error_returns_bytes(self): 864 _codecs.register_error("test", lambda x: (b"-testing-", x.end)) 865 encoded, consumed = _codecs.ascii_encode("ab\x80", "test") 866 self.assertEqual(encoded, b"ab-testing-") 867 self.assertEqual(consumed, 3) 868 869 def test_encode_ascii_with_non_ascii_error_handler_raises_encode_error(self): 870 _codecs.register_error("test", lambda x: ("\x80", x.end)) 871 with self.assertRaises(UnicodeEncodeError) as context: 872 _codecs.ascii_encode("ab\x80", "test") 873 exc = context.exception 874 self.assertEqual(exc.encoding, "ascii") 875 self.assertEqual(exc.reason, "ordinal not in range(128)") 876 self.assertEqual(exc.object, "ab\x80") 877 self.assertEqual(exc.start, 2) 878 self.assertEqual(exc.end, 3) 879 880 881class EncodeLatin1Tests(unittest.TestCase): 882 def test_encode_latin_1_with_non_str_first_argument_raises_type_error(self): 883 with self.assertRaises(TypeError): 884 _codecs.latin_1_encode([]) 885 886 def test_encode_latin_1_with_non_str_second_argument_raises_type_error(self): 887 with self.assertRaises(TypeError): 888 _codecs.latin_1_encode("", []) 889 890 def test_encode_latin_1_with_zero_length_returns_empty_bytes(self): 891 encoded, consumed = _codecs.latin_1_encode("") 892 self.assertEqual(encoded, b"") 893 self.assertEqual(consumed, 0) 894 895 def test_encode_latin_1_with_well_formed_latin_1_returns_bytes(self): 896 encoded, consumed = _codecs.latin_1_encode("hell\xe5") 897 self.assertEqual(encoded, b"hell\xe5") 898 self.assertEqual(consumed, 5) 899 900 def test_encode_ascii_with_well_formed_non_latin_1_raises_encode_error(self): 901 with self.assertRaises(UnicodeEncodeError): 902 _codecs.ascii_encode("hell\u01ff") 903 904 def test_encode_latin_1_with_custom_error_handler_mid_bytes_error_returns_bytes( 905 self, 906 ): 907 _codecs.register_error("test", lambda x: (b"-testing-", x.end)) 908 encoded, consumed = _codecs.latin_1_encode("ab\udc80c", "test") 909 self.assertEqual(encoded, b"ab-testing-c") 910 self.assertEqual(consumed, 4) 911 912 def test_encode_latin_1_with_custom_error_handler_end_bytes_error_returns_bytes( 913 self, 914 ): 915 _codecs.register_error("test", lambda x: (b"-testing-", x.end)) 916 encoded, consumed = _codecs.latin_1_encode("ab\u0180", "test") 917 self.assertEqual(encoded, b"ab-testing-") 918 self.assertEqual(consumed, 3) 919 920 def test_encode_latin_1_with_non_ascii_error_handler_returns_bytes(self): 921 _codecs.register_error("test", lambda x: ("\x80", x.end)) 922 encoded, consumed = _codecs.latin_1_encode("ab\u0180", "test") 923 self.assertEqual(encoded, b"ab\x80") 924 self.assertEqual(consumed, 3) 925 926 def test_encode_latin_1_with_non_latin_1_error_handler_raises_encode_error(self): 927 _codecs.register_error("test", lambda x: ("\u0180", x.end)) 928 with self.assertRaises(UnicodeEncodeError) as context: 929 _codecs.latin_1_encode("ab\u0f80", "test") 930 exc = context.exception 931 self.assertEqual(exc.encoding, "latin-1") 932 self.assertEqual(exc.reason, "ordinal not in range(256)") 933 self.assertEqual(exc.object, "ab\u0f80") 934 self.assertEqual(exc.start, 2) 935 self.assertEqual(exc.end, 3) 936 937 938class EncodeUTF16Tests(unittest.TestCase): 939 def test_encode_utf_16_with_non_str_first_argument_raises_type_error(self): 940 with self.assertRaises(TypeError): 941 _codecs.utf_16_encode([]) 942 943 def test_encode_utf_16_with_non_str_second_argument_raises_type_error(self): 944 with self.assertRaises(TypeError): 945 _codecs.utf_16_encode("", []) 946 947 def test_encode_utf_16_with_zero_length_returns_bom(self): 948 encoded, consumed = _codecs.utf_16_encode("") 949 self.assertEqual(encoded, b"\xff\xfe") 950 self.assertEqual(consumed, 0) 951 952 def test_encode_utf_16_with_ascii_returns_bytes(self): 953 encoded, consumed = _codecs.utf_16_encode("hi") 954 self.assertEqual(encoded, b"\xff\xfeh\x00i\x00") 955 self.assertEqual(consumed, 2) 956 957 def test_encode_utf_16_with_latin_1_returns_bytes(self): 958 encoded, consumed = _codecs.utf_16_encode("h\xe5") 959 self.assertEqual(encoded, b"\xff\xfeh\x00\xe5\x00") 960 self.assertEqual(consumed, 2) 961 962 def test_encode_utf_16_with_bmp_returns_bytes(self): 963 encoded, consumed = _codecs.utf_16_encode("h\u1005") 964 self.assertEqual(encoded, b"\xff\xfeh\x00\x05\x10") 965 self.assertEqual(consumed, 2) 966 967 def test_encode_utf_16_with_supplementary_plane_returns_bytes(self): 968 encoded, consumed = _codecs.utf_16_encode("h\U0001d1f0i") 969 self.assertEqual(encoded, b"\xff\xfeh\x004\xd8\xf0\xddi\x00") 970 self.assertEqual(consumed, 3) 971 972 def test_encode_utf_16_le_with_supplementary_plane_returns_bytes(self): 973 encoded, consumed = _codecs.utf_16_le_encode("h\U0001d1f0i") 974 self.assertEqual(encoded, b"h\x004\xd8\xf0\xddi\x00") 975 self.assertEqual(consumed, 3) 976 977 def test_encode_utf_16_be_with_supplementary_plane_returns_bytes(self): 978 encoded, consumed = _codecs.utf_16_be_encode("h\U0001d1f0i") 979 self.assertEqual(encoded, b"\x00h\xd84\xdd\xf0\x00i") 980 self.assertEqual(consumed, 3) 981 982 def test_encode_utf_16_with_custom_error_handler_mid_bytes_error_returns_bytes( 983 self, 984 ): 985 _codecs.register_error("test", lambda x: (b"--", x.end)) 986 encoded, consumed = _codecs.utf_16_encode("ab\udc80c", "test") 987 self.assertEqual(encoded, b"\xff\xfea\x00b\x00--c\x00") 988 self.assertEqual(consumed, 4) 989 990 def test_encode_utf_16_with_custom_error_handler_end_bytes_error_returns_bytes( 991 self, 992 ): 993 _codecs.register_error("test", lambda x: (b"--", x.end)) 994 encoded, consumed = _codecs.utf_16_encode("ab\udc80", "test") 995 self.assertEqual(encoded, b"\xff\xfea\x00b\x00--") 996 self.assertEqual(consumed, 3) 997 998 def test_encode_utf_16_with_string_returning_error_handler_returns_bytes(self): 999 _codecs.register_error("test", lambda x: ("h", x.end)) 1000 encoded, consumed = _codecs.utf_16_encode("ab\udc80", "test") 1001 self.assertEqual(encoded, b"\xff\xfea\x00b\x00h\x00") 1002 self.assertEqual(consumed, 3) 1003 1004 def test_encode_utf_16_with_non_ascii_error_handler_raises_encode_error(self): 1005 _codecs.register_error("test", lambda x: ("\x80", x.end)) 1006 with self.assertRaises(UnicodeEncodeError) as context: 1007 _codecs.utf_16_encode("ab\udc80", "test") 1008 exc = context.exception 1009 self.assertEqual(exc.encoding, "utf-16") 1010 self.assertEqual(exc.reason, "surrogates not allowed") 1011 self.assertEqual(exc.object, "ab\udc80") 1012 self.assertEqual(exc.start, 2) 1013 self.assertEqual(exc.end, 3) 1014 1015 1016class EncodeUTF32Tests(unittest.TestCase): 1017 def test_encode_utf_32_with_non_str_first_argument_raises_type_error(self): 1018 with self.assertRaises(TypeError): 1019 _codecs.utf_32_encode([]) 1020 1021 def test_encode_utf_32_with_non_str_second_argument_raises_type_error(self): 1022 with self.assertRaises(TypeError): 1023 _codecs.utf_32_encode("", []) 1024 1025 def test_encode_utf_32_with_zero_length_returns_bom(self): 1026 encoded, consumed = _codecs.utf_32_encode("") 1027 self.assertEqual(encoded, b"\xff\xfe\x00\x00") 1028 self.assertEqual(consumed, 0) 1029 1030 def test_encode_utf_32_with_ascii_returns_bytes(self): 1031 encoded, consumed = _codecs.utf_32_encode("hi") 1032 self.assertEqual(encoded, b"\xff\xfe\x00\x00h\x00\x00\x00i\x00\x00\x00") 1033 self.assertEqual(consumed, 2) 1034 1035 def test_encode_utf_32_with_latin_1_returns_bytes(self): 1036 encoded, consumed = _codecs.utf_32_encode("h\xe5") 1037 self.assertEqual(encoded, b"\xff\xfe\x00\x00h\x00\x00\x00\xe5\x00\x00\x00") 1038 self.assertEqual(consumed, 2) 1039 1040 def test_encode_utf_32_with_bmp_returns_bytes(self): 1041 encoded, consumed = _codecs.utf_32_encode("h\u1005") 1042 self.assertEqual(encoded, b"\xff\xfe\x00\x00h\x00\x00\x00\x05\x10\x00\x00") 1043 self.assertEqual(consumed, 2) 1044 1045 def test_encode_utf_32_with_supplementary_plane_returns_bytes(self): 1046 encoded, consumed = _codecs.utf_32_encode("h\U0001d1f0i") 1047 self.assertEqual( 1048 encoded, b"\xff\xfe\x00\x00h\x00\x00\x00\xf0\xd1\x01\x00i\x00\x00\x00" 1049 ) 1050 self.assertEqual(consumed, 3) 1051 1052 def test_encode_utf_32_le_with_supplementary_plane_returns_bytes(self): 1053 encoded, consumed = _codecs.utf_32_le_encode("h\U0001d1f0i") 1054 self.assertEqual(encoded, b"h\x00\x00\x00\xf0\xd1\x01\x00i\x00\x00\x00") 1055 self.assertEqual(consumed, 3) 1056 1057 def test_encode_utf_32_be_with_supplementary_plane_returns_bytes(self): 1058 encoded, consumed = _codecs.utf_32_be_encode("h\U0001d1f0i") 1059 self.assertEqual(encoded, b"\x00\x00\x00h\x00\x01\xd1\xf0\x00\x00\x00i") 1060 self.assertEqual(consumed, 3) 1061 1062 def test_encode_utf_32_with_custom_error_handler_mid_bytes_error_returns_bytes( 1063 self, 1064 ): 1065 _codecs.register_error("test", lambda x: (b"----", x.end)) 1066 encoded, consumed = _codecs.utf_32_encode("ab\udc80c", "test") 1067 self.assertEqual( 1068 encoded, b"\xff\xfe\x00\x00a\x00\x00\x00b\x00\x00\x00----c\x00\x00\x00" 1069 ) 1070 self.assertEqual(consumed, 4) 1071 1072 def test_encode_utf_32_with_custom_error_handler_end_bytes_error_returns_bytes( 1073 self, 1074 ): 1075 _codecs.register_error("test", lambda x: (b"----", x.end)) 1076 encoded, consumed = _codecs.utf_32_encode("ab\udc80", "test") 1077 self.assertEqual(encoded, b"\xff\xfe\x00\x00a\x00\x00\x00b\x00\x00\x00----") 1078 self.assertEqual(consumed, 3) 1079 1080 def test_encode_utf_32_with_string_returning_error_handler_returns_bytes(self): 1081 _codecs.register_error("test", lambda x: ("h", x.end)) 1082 encoded, consumed = _codecs.utf_32_encode("ab\udc80", "test") 1083 self.assertEqual( 1084 encoded, b"\xff\xfe\x00\x00a\x00\x00\x00b\x00\x00\x00h\x00\x00\x00" 1085 ) 1086 self.assertEqual(consumed, 3) 1087 1088 def test_encode_utf_32_with_non_ascii_error_handler_raises_encode_error(self): 1089 _codecs.register_error("test", lambda x: ("\x80", x.end)) 1090 with self.assertRaises(UnicodeEncodeError) as context: 1091 _codecs.utf_32_encode("ab\udc80", "test") 1092 exc = context.exception 1093 self.assertEqual(exc.encoding, "utf-32") 1094 self.assertEqual(exc.reason, "surrogates not allowed") 1095 self.assertEqual(exc.object, "ab\udc80") 1096 self.assertEqual(exc.start, 2) 1097 self.assertEqual(exc.end, 3) 1098 1099 1100class EncodeUTF8Tests(unittest.TestCase): 1101 def test_encode_utf_8_with_non_str_first_argument_raises_type_error(self): 1102 with self.assertRaises(TypeError): 1103 _codecs.utf_8_encode([]) 1104 1105 def test_encode_utf_8_with_non_str_second_argument_raises_type_error(self): 1106 with self.assertRaises(TypeError): 1107 _codecs.utf_8_encode("", []) 1108 1109 def test_encode_utf_8_with_zero_length_returns_empty_bytes(self): 1110 encoded, consumed = _codecs.utf_8_encode("") 1111 self.assertEqual(encoded, b"") 1112 self.assertEqual(consumed, 0) 1113 1114 def test_encode_utf_8_with_well_formed_ascii_returns_bytes(self): 1115 encoded, consumed = _codecs.utf_8_encode("hello") 1116 self.assertEqual(encoded, b"hello") 1117 self.assertEqual(consumed, 5) 1118 1119 def test_encode_utf_8_with_surrogatepass_passes_surrogate(self): 1120 # high surrogate 1121 encoded, consumed = _codecs.utf_8_encode("ab\udc80c", "surrogatepass") 1122 self.assertEqual(encoded, b"ab\xed\xb2\x80c") 1123 self.assertEqual(consumed, 4) 1124 1125 # low surrogate 1126 encoded, consumed = _codecs.utf_8_encode("ab\ud9a0c", "surrogatepass") 1127 self.assertEqual(encoded, b"ab\xed\xa6\xa0c") 1128 self.assertEqual(consumed, 4) 1129 1130 def test_encode_utf_8_without_surrogatepass_raises_on_surrogate(self): 1131 with self.assertRaises(UnicodeEncodeError): 1132 _codecs.utf_8_encode("ab\udc80c") 1133 1134 def test_encode_utf_8_with_custom_error_handler_mid_bytes_error_returns_bytes(self): 1135 _codecs.register_error("test", lambda x: (b"-testing-", x.end)) 1136 encoded, consumed = _codecs.utf_8_encode("ab\udc80c", "test") 1137 self.assertEqual(encoded, b"ab-testing-c") 1138 self.assertEqual(consumed, 4) 1139 1140 def test_encode_utf_8_with_custom_error_handler_end_bytes_error_returns_bytes(self): 1141 _codecs.register_error("test", lambda x: (b"-testing-", x.end)) 1142 encoded, consumed = _codecs.utf_8_encode("ab\udc80", "test") 1143 self.assertEqual(encoded, b"ab-testing-") 1144 self.assertEqual(consumed, 3) 1145 1146 def test_encode_utf_8_with_non_ascii_error_handler_raises_encode_error(self): 1147 _codecs.register_error("test", lambda x: ("\x80", x.end)) 1148 with self.assertRaises(UnicodeEncodeError) as context: 1149 _codecs.utf_8_encode("ab\udc80", "test") 1150 exc = context.exception 1151 self.assertEqual(exc.encoding, "utf-8") 1152 self.assertEqual(exc.reason, "surrogates not allowed") 1153 self.assertEqual(exc.object, "ab\udc80") 1154 self.assertEqual(exc.start, 2) 1155 self.assertEqual(exc.end, 3) 1156 1157 1158class EncodeRawUnicodeEscapeTests(unittest.TestCase): 1159 def test_encode_raw_unicode_escape_with_non_str_first_argument_raises_type_error( 1160 self, 1161 ): 1162 with self.assertRaises(TypeError): 1163 _codecs.raw_unicode_escape_encode([]) 1164 1165 def test_encode_raw_unicode_escape_with_non_str_second_argument_raises_type_error( 1166 self, 1167 ): 1168 with self.assertRaises(TypeError): 1169 _codecs.raw_unicode_escape_encode("", []) 1170 1171 def test_encode_raw_unicode_escape_with_zero_length_returns_bytes(self): 1172 encoded, consumed = _codecs.raw_unicode_escape_encode("") 1173 self.assertEqual(encoded, b"") 1174 self.assertEqual(consumed, 0) 1175 1176 def test_encode_raw_unicode_escape_with_ascii_returns_bytes(self): 1177 encoded, consumed = _codecs.raw_unicode_escape_encode("hi") 1178 self.assertEqual(encoded, b"hi") 1179 self.assertEqual(consumed, 2) 1180 1181 def test_encode_raw_unicode_escape_with_latin_1_returns_bytes(self): 1182 encoded, consumed = _codecs.raw_unicode_escape_encode("h\xe5") 1183 self.assertEqual(encoded, b"h\xe5") 1184 self.assertEqual(consumed, 2) 1185 1186 def test_encode_raw_unicode_escape_with_bmp_returns_bytes(self): 1187 encoded, consumed = _codecs.raw_unicode_escape_encode("h\u1005") 1188 self.assertEqual(encoded, b"h\\u1005") 1189 self.assertEqual(consumed, 2) 1190 1191 def test_encode_raw_unicode_escape_with_supplementary_plane_returns_bytes(self): 1192 encoded, consumed = _codecs.raw_unicode_escape_encode("h\U0001d1f0i") 1193 self.assertEqual(encoded, b"h\\U0001d1f0i") 1194 self.assertEqual(consumed, 3) 1195 1196 1197class ErrorHandlerTests(unittest.TestCase): 1198 def test_backslashreplace_with_non_unicode_error_raises_type_error(self): 1199 handler = _codecs.lookup_error("backslashreplace") 1200 e = UserWarning() 1201 with self.assertRaisesRegex( 1202 TypeError, "don't know how to handle UserWarning in error callback" 1203 ): 1204 handler(e) 1205 1206 def test_backslashreplace_with_unicode_decode_error_returns_tuple(self): 1207 handler = _codecs.lookup_error("backslashreplace") 1208 result = handler(UnicodeDecodeError("foo", b"a", 0, 1, "baz")) 1209 self.assertIs(type(result), tuple) 1210 self.assertEqual(result, ("\\x61", 1)) 1211 result = handler( 1212 UnicodeDecodeError("foo", b"i need \xca\xfe now!", 6, 11, "baz") 1213 ) 1214 self.assertIs(type(result), tuple) 1215 self.assertEqual(result, ("\\x20\\xca\\xfe\\x20\\x6e", 11)) 1216 1217 def test_backslashreplace_with_unicode_encode_error_returns_tuple(self): 1218 handler = _codecs.lookup_error("backslashreplace") 1219 result = handler(UnicodeEncodeError("foo", "a", 0, 1, "baz")) 1220 self.assertIs(type(result), tuple) 1221 self.assertEqual(result, ("\\x61", 1)) 1222 result = handler(UnicodeEncodeError("foo", "\xdc", 0, 1, "baz")) 1223 self.assertEqual(result, ("\\xdc", 1)) 1224 result = handler(UnicodeEncodeError("foo", "\u1234", 0, 1, "baz")) 1225 self.assertEqual(result, ("\\u1234", 1)) 1226 result = handler(UnicodeEncodeError("foo", "\U00012345", 0, 1, "baz")) 1227 self.assertEqual(result, ("\\U00012345", 1)) 1228 1229 result = handler( 1230 UnicodeEncodeError("foo", "hello\xac\u4213\U0001f40dbaz!", 4, 9, "baz") 1231 ) 1232 self.assertIs(type(result), tuple) 1233 self.assertEqual(result, ("\\x6f\\xac\\u4213\\U0001f40d\\x62", 9)) 1234 1235 def test_backslashreplace_with_unicode_translate_error_returns_tuple(self): 1236 handler = _codecs.lookup_error("backslashreplace") 1237 result = handler(UnicodeTranslateError("a", 0, 1, "baz")) 1238 self.assertIs(type(result), tuple) 1239 self.assertEqual(result, ("\\x61", 1)) 1240 result = handler(UnicodeTranslateError("\xdc", 0, 1, "baz")) 1241 self.assertEqual(result, ("\\xdc", 1)) 1242 result = handler(UnicodeTranslateError("\u1234", 0, 1, "baz")) 1243 self.assertEqual(result, ("\\u1234", 1)) 1244 result = handler(UnicodeTranslateError("\U00012345", 0, 1, "baz")) 1245 self.assertEqual(result, ("\\U00012345", 1)) 1246 1247 result = handler( 1248 UnicodeTranslateError("hello\xac\u4213\U0001f40dbaz!", 4, 9, "baz") 1249 ) 1250 self.assertIs(type(result), tuple) 1251 self.assertEqual(result, ("\\x6f\\xac\\u4213\\U0001f40d\\x62", 9)) 1252 1253 def test_ignore_with_unicode_encode_error_returns_tuple(self): 1254 handler = _codecs.lookup_error("ignore") 1255 e = UnicodeEncodeError("foo", "bar", 44, 2, "baz") 1256 self.assertEqual(handler(e), ("", 2)) 1257 1258 def test_ignore_with_unicode_decode_error_returns_tuple(self): 1259 handler = _codecs.lookup_error("ignore") 1260 e = UnicodeDecodeError("foo", b"barbam", 99, 3, "baz") 1261 self.assertEqual(handler(e), ("", 3)) 1262 1263 def test_ignore_with_unicode_translate_error_returns_tuple(self): 1264 handler = _codecs.lookup_error("ignore") 1265 e = UnicodeTranslateError("barbazbam", 99, 5, "baz") 1266 self.assertEqual(handler(e), ("", 5)) 1267 1268 def test_ignore_with_non_unicode_error_raises_type_error(self): 1269 handler = _codecs.lookup_error("ignore") 1270 with self.assertRaisesRegex( 1271 TypeError, "don't know how to handle int in error callback" 1272 ): 1273 handler(42) 1274 1275 def test_strict_with_exception_raises(self): 1276 handler = _codecs.lookup_error("strict") 1277 e = UserWarning() 1278 with self.assertRaises(UserWarning) as ctx: 1279 handler(e) 1280 self.assertIs(ctx.exception, e) 1281 1282 def test_strict_without_exception_raises_type_error(self): 1283 handler = _codecs.lookup_error("strict") 1284 with self.assertRaisesRegex(TypeError, "codec must pass exception instance"): 1285 handler(42) 1286 1287 1288@pyro_only 1289class GeneralizedErrorHandlerTests(unittest.TestCase): 1290 def test_call_decode_error_with_strict_raises_unicode_decode_error(self): 1291 with self.assertRaises(UnicodeDecodeError): 1292 _codecs._call_decode_errorhandler( 1293 "strict", b"bad input", _str_array(), "reason", "encoding", 0, 0 1294 ) 1295 1296 def test_call_decode_error_with_ignore_returns_tuple(self): 1297 new_input, new_pos = _codecs._call_decode_errorhandler( 1298 "ignore", b"bad_input", _str_array(), "reason", "encoding", 1, 2 1299 ) 1300 self.assertEqual(new_input, b"bad_input") 1301 self.assertEqual(new_pos, 2) 1302 1303 def test_call_decode_error_with_non_tuple_return_raises_type_error(self): 1304 def error_function(exc): 1305 return "not-a-tuple" 1306 1307 _codecs.register_error("not-a-tuple", error_function) 1308 with self.assertRaises(TypeError): 1309 _codecs._call_decode_errorhandler( 1310 "not-a-tuple", b"bad_input", _str_array(), "reason", "encoding", 1, 2 1311 ) 1312 1313 def test_call_decode_error_with_small_tuple_return_raises_type_error(self): 1314 def error_function(exc): 1315 return ("one",) 1316 1317 _codecs.register_error("small-tuple", error_function) 1318 with self.assertRaises(TypeError): 1319 _codecs._call_decode_errorhandler( 1320 "small-tuple", b"bad_input", _str_array(), "reason", "encoding", 1, 2 1321 ) 1322 1323 def test_call_decode_error_with_int_first_tuple_return_raises_type_error(self): 1324 def error_function(exc): 1325 return 1, 1 1326 1327 _codecs.register_error("int-first", error_function) 1328 with self.assertRaises(TypeError): 1329 _codecs._call_decode_errorhandler( 1330 "int-first", b"bad_input", _str_array(), "reason", "encoding", 1, 2 1331 ) 1332 1333 def test_call_decode_error_with_string_second_tuple_return_raises_type_error(self): 1334 def error_function(exc): 1335 return "str_to_append", "new_pos" 1336 1337 _codecs.register_error("str-second", error_function) 1338 with self.assertRaises(TypeError): 1339 _codecs._call_decode_errorhandler( 1340 "str-second", b"bad_input", _str_array(), "reason", "encoding", 1, 2 1341 ) 1342 1343 def test_call_decode_error_with_non_bytes_changed_input_returns_error(self): 1344 def error_function(err): 1345 err.object = 1 1346 return "str_to_append", err.end 1347 1348 _codecs.register_error("change-input-to-int", error_function) 1349 with self.assertRaises(TypeError): 1350 _codecs._call_decode_errorhandler( 1351 "change-input-to-int", 1352 b"bad_input", 1353 _str_array(), 1354 "reason", 1355 "encoding", 1356 1, 1357 2, 1358 ) 1359 1360 def test_call_decode_error_with_improper_index_returns_error(self): 1361 def error_function(exc): 1362 return "str_to_append", 10 1363 1364 _codecs.register_error("out-of-bounds-pos", error_function) 1365 with self.assertRaises(IndexError): 1366 _codecs._call_decode_errorhandler( 1367 "out-of-bounds-pos", 1368 b"bad_input", 1369 _str_array(), 1370 "reason", 1371 "encoding", 1372 1, 1373 2, 1374 ) 1375 1376 def test_call_decode_error_with_negative_index_return_returns_proper_index(self): 1377 def error_function(exc): 1378 return "str_to_append", -1 1379 1380 _codecs.register_error("negative-pos", error_function) 1381 new_input, new_pos = _codecs._call_decode_errorhandler( 1382 "negative-pos", b"bad_input", _str_array(), "reason", "encoding", 1, 2 1383 ) 1384 self.assertEqual(new_input, b"bad_input") 1385 self.assertEqual(new_pos, 8) 1386 1387 def test_call_decode_appends_string_to_output(self): 1388 def error_function(exc): 1389 return "str_to_append", exc.end 1390 1391 _codecs.register_error("well-behaved-test", error_function) 1392 result = _str_array() 1393 _codecs._call_decode_errorhandler( 1394 "well-behaved-test", b"bad_input", result, "reason", "encoding", 1, 2 1395 ) 1396 self.assertEqual(str(result), "str_to_append") 1397 1398 def test_call_encode_error_with_strict_calls_function(self): 1399 with self.assertRaises(UnicodeEncodeError): 1400 _codecs._call_encode_errorhandler( 1401 "strict", "bad_input", "reason", "encoding", 0, 0 1402 ) 1403 1404 def test_call_encode_error_with_ignore_calls_function(self): 1405 result, new_pos = _codecs._call_encode_errorhandler( 1406 "ignore", "bad_input", "reason", "encoding", 1, 2 1407 ) 1408 self.assertEqual(result, "") 1409 self.assertEqual(new_pos, 2) 1410 1411 def test_call_encode_error_with_non_tuple_return_raises_type_error(self): 1412 def error_function(exc): 1413 return "not-a-tuple" 1414 1415 _codecs.register_error("not-a-tuple", error_function) 1416 with self.assertRaises(TypeError): 1417 _codecs._call_encode_errorhandler( 1418 "not-a-tuple", "bad_input", "reason", "encoding", 1, 2 1419 ) 1420 1421 def test_call_encode_error_with_small_tuple_return_raises_type_error(self): 1422 def error_function(exc): 1423 return ("one",) 1424 1425 _codecs.register_error("small-tuple", error_function) 1426 with self.assertRaises(TypeError): 1427 _codecs._call_encode_errorhandler( 1428 "small-tuple", "bad_input", "reason", "encoding", 1, 2 1429 ) 1430 1431 def test_call_encode_error_with_int_first_tuple_return_raises_type_error(self): 1432 def error_function(exc): 1433 return 1, 1 1434 1435 _codecs.register_error("int-first", error_function) 1436 with self.assertRaises(TypeError): 1437 _codecs._call_encode_errorhandler( 1438 "int-first", "bad_input", "reason", "encoding", 1, 2 1439 ) 1440 1441 def test_call_encode_error_with_str_second_tuple_return_raises_type_error(self): 1442 def error_function(exc): 1443 return "str_to_append", "new_pos" 1444 1445 _codecs.register_error("str-second", error_function) 1446 with self.assertRaises(TypeError): 1447 _codecs._call_encode_errorhandler( 1448 "str-second", "bad_input", "reason", "encoding", 1, 2 1449 ) 1450 1451 def test_call_encode_error_with_changed_input_ignores_change(self): 1452 def error_function(err): 1453 err.object = 1 1454 return "str_to_append", err.end 1455 1456 _codecs.register_error("change-input-to-int", error_function) 1457 result, new_pos = _codecs._call_encode_errorhandler( 1458 "change-input-to-int", "bad_input", "reason", "encoding", 1, 2 1459 ) 1460 self.assertEqual(result, "str_to_append") 1461 self.assertEqual(new_pos, 2) 1462 1463 def test_call_encode_error_with_out_of_bounds_index_raises_index_error(self): 1464 def error_function(exc): 1465 return "str_to_append", 10 1466 1467 _codecs.register_error("out-of-bounds-pos", error_function) 1468 with self.assertRaises(IndexError): 1469 _codecs._call_encode_errorhandler( 1470 "out-of-bounds-pos", "bad_input", "reason", "encoding", 1, 2 1471 ) 1472 1473 def test_call_encode_error_with_negative_index_returns_proper_index(self): 1474 def error_function(exc): 1475 return "str_to_append", -1 1476 1477 _codecs.register_error("negative-pos", error_function) 1478 result, new_pos = _codecs._call_encode_errorhandler( 1479 "negative-pos", "bad_input", "reason", "encoding", 1, 2 1480 ) 1481 self.assertEqual(result, "str_to_append") 1482 self.assertEqual(new_pos, 8) 1483 1484 1485if __name__ == "__main__": 1486 unittest.main()