this repo has no description
at trunk 3017 lines 109 kB view raw
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 2#include <cstring> 3 4#include "Python.h" 5#include "gtest/gtest.h" 6 7#include "capi-fixture.h" 8#include "capi-testing.h" 9 10extern "C" int _Py_EncodeUTF8Ex(const wchar_t*, char**, size_t*, const char**, 11 int, _Py_error_handler); 12extern "C" wchar_t* _Py_DecodeUTF8_surrogateescape(const char*, Py_ssize_t, 13 size_t*); 14extern "C" int _Py_DecodeUTF8Ex(const char*, Py_ssize_t, wchar_t**, size_t*, 15 const char**, _Py_error_handler); 16extern "C" int _Py_normalize_encoding(const char*, char*, size_t); 17 18namespace py { 19namespace testing { 20 21using UnicodeExtensionApiTest = ExtensionApi; 22 23TEST_F(UnicodeExtensionApiTest, AsEncodedStringFromNonStringReturnsNull) { 24 EXPECT_EQ(PyUnicode_AsEncodedString(Py_None, nullptr, nullptr), nullptr); 25 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 26} 27 28TEST_F(UnicodeExtensionApiTest, AsEncodedStringWithNullSizeReturnsUTF8) { 29 const char* str = "utf-8 \xc3\xa8"; 30 PyObjectPtr pyunicode(PyUnicode_FromString(str)); 31 32 PyObjectPtr bytes(PyUnicode_AsEncodedString(pyunicode, nullptr, nullptr)); 33 EXPECT_TRUE(isBytesEqualsCStr(bytes, str)); 34} 35 36TEST_F(UnicodeExtensionApiTest, AsEncodedStringASCIIUsesErrorHandler) { 37 PyObjectPtr pyunicode(PyUnicode_FromString("non\xc3\xa8-ascii")); 38 39 PyObjectPtr bytes(PyUnicode_AsEncodedString(pyunicode, "ascii", "ignore")); 40 EXPECT_TRUE(isBytesEqualsCStr(bytes, "non-ascii")); 41} 42 43TEST_F(UnicodeExtensionApiTest, AsEncodedStringLatin1ReturnsLatin1) { 44 PyObjectPtr pyunicode(PyUnicode_FromString("latin-1 \xc3\xa8")); 45 46 PyObjectPtr bytes(PyUnicode_AsEncodedString(pyunicode, "latin-1", nullptr)); 47 EXPECT_TRUE(isBytesEqualsCStr(bytes, "latin-1 \xe8")); 48} 49 50TEST_F(UnicodeExtensionApiTest, AsEncodedStringASCIIWithSubClassReturnsASCII) { 51 PyRun_SimpleString(R"( 52class SubStr(str): pass 53 54substr = SubStr("some string") 55)"); 56 PyObjectPtr substr(mainModuleGet("substr")); 57 const char* expected = "some string"; 58 59 PyObjectPtr bytes(PyUnicode_AsEncodedString(substr, "ascii", nullptr)); 60 EXPECT_TRUE(isBytesEqualsCStr(bytes, expected)); 61} 62 63TEST_F(UnicodeExtensionApiTest, 64 AsEncodedStringWithBytearrayReturnRaisesWarning) { 65 CaptureStdStreams streams; 66 PyRun_SimpleString(R"( 67import _codecs 68 69def encoder(s): 70 return bytearray(b"expected"), "two" 71 72def lookup_function(encoding): 73 if encoding == "encode-with-bytearray-return": 74 return encoder, 0, 0, 0 75 76_codecs.register(lookup_function) 77substr = "some test" 78)"); 79 PyObjectPtr substr(mainModuleGet("substr")); 80 PyObjectPtr bytes(PyUnicode_AsEncodedString( 81 substr, "encode-with-bytearray-return", nullptr)); 82 EXPECT_TRUE(isBytesEqualsCStr(bytes, "expected")); 83 EXPECT_EQ(PyErr_Occurred(), nullptr); 84 EXPECT_NE(streams.err().find( 85 "RuntimeWarning: encoder encode-with-bytearray-return " 86 "returned bytearray instead of bytes; use codecs.encode() to " 87 "encode to arbitrary types\n"), 88 std::string::npos); 89} 90 91TEST_F(UnicodeExtensionApiTest, 92 AsEncodedStringWithNonBytelikeReturnRaisesError) { 93 PyRun_SimpleString(R"( 94import _codecs 95 96def encoder(s): 97 return "not-byteslike", "two" 98 99def lookup_function(encoding): 100 if encoding == "encode-with-non-bytelike-return": 101 return encoder, 0, 0, 0 102 103_codecs.register(lookup_function) 104substr = "some test" 105)"); 106 PyObjectPtr substr(mainModuleGet("substr")); 107 EXPECT_EQ(PyUnicode_AsEncodedString(substr, "encode-with-non-bytelike-return", 108 nullptr), 109 nullptr); 110 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 111} 112 113TEST_F(UnicodeExtensionApiTest, AsUTF8FromNonStringReturnsNull) { 114 // Pass a non string object 115 const char* cstring = PyUnicode_AsUTF8AndSize(Py_None, nullptr); 116 EXPECT_EQ(nullptr, cstring); 117} 118 119TEST_F(UnicodeExtensionApiTest, AsUTF8WithNullSizeReturnsCString) { 120 const char* str = "Some C String"; 121 PyObjectPtr pyunicode(PyUnicode_FromString(str)); 122 123 // Pass a nullptr size 124 const char* cstring = PyUnicode_AsUTF8AndSize(pyunicode, nullptr); 125 ASSERT_NE(nullptr, cstring); 126 EXPECT_STREQ(str, cstring); 127} 128 129TEST_F(UnicodeExtensionApiTest, AsUTF8WithSubClassReturnsCString) { 130 PyRun_SimpleString(R"( 131class SubStr(str): pass 132 133substr = SubStr("some string") 134)"); 135 PyObjectPtr substr(mainModuleGet("substr")); 136 Py_ssize_t size = 0; 137 const char* expected = "some string"; 138 139 const char* c_str = PyUnicode_AsUTF8AndSize(substr, &size); 140 ASSERT_NE(c_str, nullptr); 141 EXPECT_STREQ(c_str, expected); 142} 143 144TEST_F(UnicodeExtensionApiTest, AsUTF8WithReferencedSizeReturnsCString) { 145 const char* str = "Some C String"; 146 PyObjectPtr pyunicode(PyUnicode_FromString(str)); 147 148 // Pass a size reference 149 Py_ssize_t size = 0; 150 const char* cstring = PyUnicode_AsUTF8AndSize(pyunicode, &size); 151 ASSERT_NE(nullptr, cstring); 152 EXPECT_STREQ(str, cstring); 153 EXPECT_EQ(size, static_cast<Py_ssize_t>(std::strlen(str))); 154 155 // Repeated calls should return the same buffer and still set the size. 156 size = 0; 157 const char* cstring2 = PyUnicode_AsUTF8AndSize(pyunicode, &size); 158 ASSERT_NE(cstring2, nullptr); 159 EXPECT_EQ(cstring2, cstring); 160} 161 162TEST_F(UnicodeExtensionApiTest, AsUTF8ReturnsCString) { 163 const char* str = "Some other C String"; 164 PyObjectPtr pyobj(PyUnicode_FromString(str)); 165 166 const char* cstring = PyUnicode_AsUTF8(pyobj); 167 ASSERT_NE(cstring, nullptr); 168 EXPECT_STREQ(cstring, str); 169 170 // Make sure repeated calls on the same object return the same buffer. 171 const char* cstring2 = PyUnicode_AsUTF8(pyobj); 172 ASSERT_NE(cstring2, nullptr); 173 EXPECT_EQ(cstring2, cstring); 174} 175 176TEST_F(UnicodeExtensionApiTest, AsUTF8WithSurrogatesRaisesUnicodeEncodeError) { 177 PyObjectPtr str(PyUnicode_DecodeLocale("hello\x80world", "surrogateescape")); 178 179 EXPECT_EQ(PyUnicode_AsUTF8(str), nullptr); 180 PyObject *exc, *value, *tb; 181 PyErr_Fetch(&exc, &value, &tb); 182 ASSERT_NE(exc, nullptr); 183 ASSERT_TRUE(PyErr_GivenExceptionMatches(exc, PyExc_UnicodeEncodeError)); 184 Py_ssize_t temp; 185 PyObjectPtr msg(PyUnicodeEncodeError_GetReason(value)); 186 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(msg, "surrogates not allowed")); 187 PyUnicodeEncodeError_GetStart(value, &temp); 188 EXPECT_EQ(temp, 5); 189 PyUnicodeEncodeError_GetEnd(value, &temp); 190 EXPECT_EQ(temp, 6); 191 Py_DECREF(exc); 192 Py_DECREF(value); 193 Py_XDECREF(tb); 194} 195 196TEST_F(UnicodeExtensionApiTest, AsUTF8StringWithNonStringReturnsNull) { 197 PyObjectPtr bytes(_PyUnicode_AsUTF8String(Py_None, nullptr)); 198 ASSERT_EQ(bytes, nullptr); 199 ASSERT_NE(PyErr_Occurred(), nullptr); 200} 201 202TEST_F(UnicodeExtensionApiTest, AsUTF8StringReturnsBytes) { 203 PyObjectPtr unicode(PyUnicode_FromString("foo")); 204 PyObjectPtr bytes(_PyUnicode_AsUTF8String(unicode, nullptr)); 205 ASSERT_EQ(PyErr_Occurred(), nullptr); 206 ASSERT_TRUE(PyBytes_Check(bytes)); 207 EXPECT_EQ(PyBytes_Size(bytes), 3); 208 EXPECT_STREQ(PyBytes_AsString(bytes), "foo"); 209} 210 211TEST_F(UnicodeExtensionApiTest, 212 AsUTF8StringWithInvalidCodepointRaisesEncodeError) { 213 PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape")); 214 ASSERT_EQ(PyErr_Occurred(), nullptr); 215 ASSERT_TRUE(PyUnicode_CheckExact(unicode)); 216 PyObjectPtr bytes(_PyUnicode_AsUTF8String(unicode, nullptr)); 217 ASSERT_NE(PyErr_Occurred(), nullptr); 218 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); 219 EXPECT_EQ(bytes, nullptr); 220} 221 222TEST_F(UnicodeExtensionApiTest, AsUTF8StringWithReplaceErrorsReturnsBytes) { 223 PyObjectPtr unicode(PyUnicode_DecodeASCII("foo\x80", 4, "surrogateescape")); 224 ASSERT_EQ(PyErr_Occurred(), nullptr); 225 ASSERT_TRUE(PyUnicode_CheckExact(unicode)); 226 PyObjectPtr bytes(_PyUnicode_AsUTF8String(unicode, "replace")); 227 ASSERT_EQ(PyErr_Occurred(), nullptr); 228 229 ASSERT_TRUE(PyBytes_Check(bytes)); 230 EXPECT_EQ(PyBytes_Size(bytes), 4); 231 EXPECT_STREQ(PyBytes_AsString(bytes), "foo?"); 232} 233 234TEST_F(UnicodeExtensionApiTest, AsUCS4WithNonStringReturnsNull) { 235 // Pass a non string object. 236 Py_UCS4* ucs4_string = PyUnicode_AsUCS4(Py_None, nullptr, 0, 0); 237 EXPECT_EQ(nullptr, ucs4_string); 238} 239 240TEST_F(UnicodeExtensionApiTest, AsUTF8StringWithSubClassReturnsBytes) { 241 PyRun_SimpleString(R"( 242class SubStr(str): pass 243 244substr = SubStr("foo") 245)"); 246 PyObjectPtr substr(mainModuleGet("substr")); 247 PyObjectPtr bytes(_PyUnicode_AsUTF8String(substr, nullptr)); 248 ASSERT_EQ(PyErr_Occurred(), nullptr); 249 ASSERT_TRUE(PyBytes_Check(bytes)); 250 EXPECT_EQ(PyBytes_Size(bytes), 3); 251 EXPECT_STREQ(PyBytes_AsString(bytes), "foo"); 252} 253 254TEST_F(UnicodeExtensionApiTest, AsUCS4WithNullBufferReturnsNull) { 255 PyObjectPtr unicode(PyUnicode_FromString("foo")); 256 Py_UCS4* ucs4_string = PyUnicode_AsUCS4(unicode, nullptr, 0, 0); 257 EXPECT_EQ(nullptr, ucs4_string); 258} 259 260TEST_F(UnicodeExtensionApiTest, 261 AsUCS4WithShortBufferWithoutCopyNullReturnsNotNullTerminated) { 262 PyObjectPtr unicode(PyUnicode_FromString("abc")); 263 Py_UCS4 target[4]; 264 target[0] = 1; 265 Py_UCS4* ucs4_string = 266 PyUnicode_AsUCS4(unicode, target, 2, 0 /* copy_null */); 267 EXPECT_EQ(nullptr, ucs4_string); 268 EXPECT_EQ(Py_UCS4{1}, target[0]); 269} 270 271TEST_F(UnicodeExtensionApiTest, 272 AsUCS4WithShortBufferWithCopyNullReturnsNullTerminated) { 273 PyObjectPtr unicode(PyUnicode_FromString("abc")); 274 Py_UCS4 target[4]; 275 target[0] = 1; 276 Py_UCS4* ucs4_string = 277 PyUnicode_AsUCS4(unicode, target, 2, 1 /* copy_null */); 278 EXPECT_EQ(nullptr, ucs4_string); 279 EXPECT_EQ(Py_UCS4{0}, target[0]); 280} 281 282TEST_F(UnicodeExtensionApiTest, AsUCS4WithoutCopyNullReturnsNotNullTerminated) { 283 Py_UCS4 buffer[] = {0x1f192, 'h', 0xe4, 'l', 0x2cc0}; 284 PyObjectPtr unicode(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, 285 Py_ARRAY_LENGTH(buffer))); 286 Py_UCS4 target[6]; 287 target[5] = 1; 288 Py_UCS4* ucs4_string = 289 PyUnicode_AsUCS4(unicode, target, 5, 0 /* copy_null */); 290 EXPECT_EQ(target, ucs4_string); 291 EXPECT_EQ(Py_UCS4{0x1F192}, ucs4_string[0]); 292 EXPECT_EQ(Py_UCS4{'h'}, ucs4_string[1]); 293 EXPECT_EQ(Py_UCS4{0xE4}, ucs4_string[2]); 294 EXPECT_EQ(Py_UCS4{'l'}, ucs4_string[3]); 295 EXPECT_EQ(Py_UCS4{0x2CC0}, ucs4_string[4]); 296 EXPECT_EQ(Py_UCS4{1}, ucs4_string[5]); 297} 298 299TEST_F(UnicodeExtensionApiTest, AsUCS4WithCopyNullReturnsNullTerminated) { 300 Py_UCS4 buffer[] = {0x1f192, 'h', 0xe4, 'l', 0x2cc0}; 301 PyObjectPtr unicode(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, 302 Py_ARRAY_LENGTH(buffer))); 303 Py_UCS4 target[6]; 304 target[5] = 1; 305 Py_UCS4* ucs4_string = 306 PyUnicode_AsUCS4(unicode, target, 6, 1 /* copy_null */); 307 EXPECT_EQ(target, ucs4_string); 308 EXPECT_EQ(Py_UCS4{0x1F192}, ucs4_string[0]); 309 EXPECT_EQ(Py_UCS4{'h'}, ucs4_string[1]); 310 EXPECT_EQ(Py_UCS4{0xE4}, ucs4_string[2]); 311 EXPECT_EQ(Py_UCS4{'l'}, ucs4_string[3]); 312 EXPECT_EQ(Py_UCS4{0x2CC0}, ucs4_string[4]); 313 EXPECT_EQ(Py_UCS4{0}, ucs4_string[5]); 314} 315 316TEST_F(UnicodeExtensionApiTest, 317 AsUCS4WithSubClassAndCopyNullReturnsNullTerminatedString) { 318 PyRun_SimpleString(R"( 319class SubStr(str): pass 320 321substr = SubStr("foo") 322)"); 323 PyObjectPtr unicode(mainModuleGet("substr")); 324 Py_UCS4 target[4]; 325 Py_UCS4* ucs4_string = 326 PyUnicode_AsUCS4(unicode, target, 4, 1 /* copy_null */); 327 EXPECT_EQ(Py_UCS4{'f'}, ucs4_string[0]); 328 EXPECT_EQ(Py_UCS4{'o'}, ucs4_string[1]); 329 EXPECT_EQ(Py_UCS4{'o'}, ucs4_string[2]); 330 EXPECT_EQ(Py_UCS4{0}, ucs4_string[3]); 331} 332 333// Delegating testing to AsUCS4. 334TEST_F(UnicodeExtensionApiTest, 335 AsUCS4WithNonAsciiReturnsCodePointsNullTerminated) { 336 PyObjectPtr unicode(PyUnicode_FromString("ab\u00e4p")); 337 Py_UCS4* ucs4_string = PyUnicode_AsUCS4Copy(unicode); 338 EXPECT_EQ(Py_UCS4{'a'}, ucs4_string[0]); 339 EXPECT_EQ(Py_UCS4{'b'}, ucs4_string[1]); 340 EXPECT_EQ(Py_UCS4{0xE4}, ucs4_string[2]); 341 EXPECT_EQ(Py_UCS4{'p'}, ucs4_string[3]); 342 EXPECT_EQ(Py_UCS4{0}, ucs4_string[4]); 343 PyMem_Free(ucs4_string); 344} 345 346TEST_F(UnicodeExtensionApiTest, AsWideCharWithNullptrRaisesSystemError) { 347 wchar_t wide_string[1]; 348 EXPECT_EQ( 349 PyUnicode_AsWideChar(nullptr, wide_string, Py_ARRAY_LENGTH(wide_string)), 350 -1); 351 ASSERT_NE(PyErr_Occurred(), nullptr); 352 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); 353} 354 355TEST_F(UnicodeExtensionApiTest, AsWideCharWithNonStringRaisesTypeError) { 356 PyObjectPtr not_string(PyTuple_New(0)); 357 wchar_t wide_string[1]; 358 EXPECT_EQ(PyUnicode_AsWideChar(not_string, wide_string, 359 Py_ARRAY_LENGTH(wide_string)), 360 -1); 361 ASSERT_NE(PyErr_Occurred(), nullptr); 362 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 363} 364 365TEST_F(UnicodeExtensionApiTest, 366 AsWideCharWithNonASCIICodePointReturnsNullTerminatedWideCharString) { 367 PyObjectPtr unicode(PyUnicode_FromString("a\xc3\xa5z")); 368 wchar_t wide_string[4]; 369 EXPECT_EQ(Py_ssize_t{3}, PyUnicode_AsWideChar(unicode, wide_string, 370 Py_ARRAY_LENGTH(wide_string))); 371 ASSERT_EQ(PyErr_Occurred(), nullptr); 372 EXPECT_EQ('a', wide_string[0]); 373 EXPECT_EQ(0xe5, wide_string[1]); 374 EXPECT_EQ('z', wide_string[2]); 375 EXPECT_EQ(0, wide_string[3]); 376} 377 378TEST_F(UnicodeExtensionApiTest, AsWideCharCopiesUpToSizeElements) { 379 PyObjectPtr unicode(PyUnicode_FromString("abcdef")); 380 wchar_t wide_string[5] = {'x', 'x', 'x', 'x', 'x'}; 381 EXPECT_EQ(Py_ssize_t{3}, PyUnicode_AsWideChar(unicode, wide_string, 3)); 382 ASSERT_EQ(PyErr_Occurred(), nullptr); 383 EXPECT_EQ('a', wide_string[0]); 384 EXPECT_EQ('b', wide_string[1]); 385 EXPECT_EQ('c', wide_string[2]); 386 EXPECT_EQ('x', wide_string[3]); 387 EXPECT_EQ('x', wide_string[4]); 388} 389 390TEST_F(UnicodeExtensionApiTest, AsWideCharWithEmbeddedNullWritesNullChar) { 391 PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4)); 392 wchar_t wide_string[5]; 393 EXPECT_EQ(4, PyUnicode_AsWideChar(unicode, wide_string, 394 Py_ARRAY_LENGTH(wide_string))); 395 EXPECT_EQ(PyErr_Occurred(), nullptr); 396 EXPECT_EQ('a', wide_string[0]); 397 EXPECT_EQ('b', wide_string[1]); 398 EXPECT_EQ('\0', wide_string[2]); 399 EXPECT_EQ('c', wide_string[3]); 400 EXPECT_EQ('\0', wide_string[4]); 401} 402 403TEST_F(UnicodeExtensionApiTest, 404 AsWideCharWithSizeEqualsBufferSizeDoesNotWriteNul) { 405 PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4)); 406 wchar_t wide_string[4]; 407 EXPECT_EQ(4, PyUnicode_AsWideChar(unicode, wide_string, 4)); 408 EXPECT_EQ(PyErr_Occurred(), nullptr); 409 EXPECT_EQ('a', wide_string[0]); 410 EXPECT_EQ('b', wide_string[1]); 411 EXPECT_EQ('\0', wide_string[2]); 412 EXPECT_EQ('c', wide_string[3]); 413} 414 415TEST_F(UnicodeExtensionApiTest, 416 AsWideCharWithBufferSizeLessThanStringSizeWritesUpToBufferSize) { 417 PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4)); 418 wchar_t wide_string[2]; 419 EXPECT_EQ(2, PyUnicode_AsWideChar(unicode, wide_string, 420 Py_ARRAY_LENGTH(wide_string))); 421 EXPECT_EQ(PyErr_Occurred(), nullptr); 422 EXPECT_EQ('a', wide_string[0]); 423 EXPECT_EQ('b', wide_string[1]); 424} 425 426TEST_F(UnicodeExtensionApiTest, AsWideCharStringWithNullptrRaisesSystemError) { 427 EXPECT_EQ(PyUnicode_AsWideCharString(nullptr, nullptr), nullptr); 428 ASSERT_NE(PyErr_Occurred(), nullptr); 429 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); 430} 431 432TEST_F(UnicodeExtensionApiTest, AsWideCharStringWithNonStringRaisesTypeError) { 433 PyObjectPtr not_string(PyTuple_New(0)); 434 EXPECT_EQ(PyUnicode_AsWideCharString(not_string, nullptr), nullptr); 435 ASSERT_NE(PyErr_Occurred(), nullptr); 436 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 437} 438 439TEST_F( 440 UnicodeExtensionApiTest, 441 AsWideCharStringWithNonASCIICodePointReturnsNullTerminatedWideCharString) { 442 PyObjectPtr unicode(PyUnicode_FromString("a\xc3\xa5z")); 443 wchar_t* wide_string = PyUnicode_AsWideCharString(unicode, nullptr); 444 ASSERT_EQ(PyErr_Occurred(), nullptr); 445 EXPECT_EQ('a', wide_string[0]); 446 EXPECT_EQ(0xe5, wide_string[1]); 447 EXPECT_EQ('z', wide_string[2]); 448 EXPECT_EQ(0, wide_string[3]); 449 PyMem_Free(wide_string); 450} 451 452TEST_F(UnicodeExtensionApiTest, AsWideCharStringWithNonNullSizeSetsSize) { 453 PyObjectPtr unicode(PyUnicode_FromString("a\xc3\xa5z")); 454 Py_ssize_t size = 0xdeadbeef; 455 wchar_t* wide_string = PyUnicode_AsWideCharString(unicode, &size); 456 ASSERT_EQ(PyErr_Occurred(), nullptr); 457 EXPECT_EQ(size, 3); 458 EXPECT_EQ('a', wide_string[0]); 459 EXPECT_EQ(0xe5, wide_string[1]); 460 EXPECT_EQ('z', wide_string[2]); 461 EXPECT_EQ(0, wide_string[3]); 462 PyMem_Free(wide_string); 463} 464 465TEST_F(UnicodeExtensionApiTest, 466 AsWideCharStringWithEmbeddedNullRaisesValueError) { 467 PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4)); 468 EXPECT_EQ(PyUnicode_AsWideCharString(unicode, nullptr), nullptr); 469 ASSERT_NE(PyErr_Occurred(), nullptr); 470 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 471} 472 473TEST_F(UnicodeExtensionApiTest, CheckWithStrReturnsTrue) { 474 PyObjectPtr str(PyUnicode_FromString("ab\u00e4p")); 475 EXPECT_TRUE(PyUnicode_Check(str)); 476 EXPECT_TRUE(PyUnicode_CheckExact(str)); 477} 478 479TEST_F(UnicodeExtensionApiTest, CheckWithSubClassIsNotExact) { 480 PyRun_SimpleString(R"( 481class SubStr(str): pass 482 483substr = SubStr('ok') 484)"); 485 PyObjectPtr substr(mainModuleGet("substr")); 486 ASSERT_EQ(PyErr_Occurred(), nullptr); 487 EXPECT_TRUE(PyUnicode_Check(substr)); 488 EXPECT_FALSE(PyUnicode_CheckExact(substr)); 489} 490 491TEST_F(UnicodeExtensionApiTest, CheckWithUnrelatedTypeReturnsFalse) { 492 PyObjectPtr pylong(PyLong_FromLong(10)); 493 EXPECT_FALSE(PyUnicode_Check(pylong)); 494 EXPECT_FALSE(PyUnicode_CheckExact(pylong)); 495} 496 497TEST_F(UnicodeExtensionApiTest, DATAReturnsCStringContainingStrContents) { 498 const char* cstr = "hello"; 499 PyObjectPtr str(PyUnicode_FromString(cstr)); 500 void* data = PyUnicode_DATA(str.get()); 501 EXPECT_STREQ(reinterpret_cast<char*>(data), cstr); 502} 503 504TEST_F(UnicodeExtensionApiTest, DATAReturnsSamePointer) { 505 PyObjectPtr str(PyUnicode_FromString("hello")); 506 void* p1 = PyUnicode_DATA(str.get()); 507 void* p2 = PyUnicode_DATA(str.get()); 508 EXPECT_EQ(p1, p2); 509} 510 511TEST_F(UnicodeExtensionApiTest, FormatWithNullFormatRaisesBadInternalCall) { 512 PyObjectPtr str(PyUnicode_FromString("foo")); 513 EXPECT_EQ(nullptr, PyUnicode_Format(nullptr, str)); 514 ASSERT_NE(nullptr, PyErr_Occurred()); 515 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); 516} 517 518TEST_F(UnicodeExtensionApiTest, FormatWithNullArgsRaisesBadInternalCall) { 519 PyObjectPtr str(PyUnicode_FromString("foo")); 520 EXPECT_EQ(nullptr, PyUnicode_Format(str, nullptr)); 521 ASSERT_NE(nullptr, PyErr_Occurred()); 522 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); 523} 524 525TEST_F(UnicodeExtensionApiTest, FormatWithNonStrFormatRaisesTypeError) { 526 PyObjectPtr format(PyLong_FromLong(10)); 527 PyObjectPtr str(PyUnicode_FromString("foo")); 528 EXPECT_EQ(nullptr, PyUnicode_Format(format, str)); 529 ASSERT_NE(nullptr, PyErr_Occurred()); 530 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 531} 532 533TEST_F(UnicodeExtensionApiTest, 534 FormatWithMismatchedFormatAndArgsRaisesTypeError) { 535 PyObjectPtr str(PyUnicode_FromString("foo%s%s")); 536 PyObjectPtr args(PyUnicode_FromString("bar")); 537 EXPECT_EQ(nullptr, PyUnicode_Format(str, args)); 538 ASSERT_NE(nullptr, PyErr_Occurred()); 539 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 540} 541 542TEST_F(UnicodeExtensionApiTest, FormatWithStrArgsReturnsStr) { 543 PyObjectPtr str(PyUnicode_FromString("foo%s")); 544 PyObjectPtr args(PyUnicode_FromString("bar")); 545 PyObjectPtr result(PyUnicode_Format(str, args)); 546 EXPECT_NE(nullptr, result); 547 EXPECT_EQ(nullptr, PyErr_Occurred()); 548 EXPECT_TRUE(isUnicodeEqualsCStr(result, "foobar")); 549} 550 551TEST_F(UnicodeExtensionApiTest, FormatWithTupleArgsReturnsStr) { 552 PyObjectPtr str(PyUnicode_FromString("foo%s%s")); 553 PyObjectPtr args(PyTuple_Pack(2, PyUnicode_FromString("bar"), 554 PyUnicode_FromString("baz"))); 555 PyObjectPtr result(PyUnicode_Format(str, args)); 556 EXPECT_NE(nullptr, result); 557 EXPECT_EQ(nullptr, PyErr_Occurred()); 558 EXPECT_TRUE(isUnicodeEqualsCStr(result, "foobarbaz")); 559} 560 561TEST_F(UnicodeExtensionApiTest, FSDecoderWithStrSetsString) { 562 PyObjectPtr str(PyUnicode_FromString("foo")); 563 PyObject* result; 564 EXPECT_EQ(PyUnicode_FSDecoder(str, &result), Py_CLEANUP_SUPPORTED); 565 566 EXPECT_TRUE(isUnicodeEqualsCStr(result, "foo")); 567 568 EXPECT_EQ(PyUnicode_FSDecoder(nullptr, &result), 1); 569 EXPECT_EQ(result, nullptr); 570} 571 572TEST_F(UnicodeExtensionApiTest, FSDecoderWithBytesSetsString) { 573 const char bytes[] = "bar"; 574 PyObjectPtr pybytes(PyBytes_FromStringAndSize(bytes, sizeof(bytes) - 1)); 575 PyObject* result; 576 EXPECT_EQ(PyUnicode_FSDecoder(pybytes, &result), Py_CLEANUP_SUPPORTED); 577 578 EXPECT_TRUE(isUnicodeEqualsCStr(result, bytes)); 579 580 EXPECT_EQ(PyUnicode_FSDecoder(nullptr, &result), 1); 581 EXPECT_EQ(result, nullptr); 582} 583 584TEST_F(UnicodeExtensionApiTest, FSDecoderRaisesValueError) { 585 const char bytes[] = "foo\0bar"; 586 PyObjectPtr pybytes(PyBytes_FromStringAndSize(bytes, sizeof(bytes) - 1)); 587 PyObject* result; 588 EXPECT_EQ(PyUnicode_FSDecoder(pybytes, &result), 0); 589 EXPECT_NE(PyErr_Occurred(), nullptr); 590 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 591} 592 593TEST_F(UnicodeExtensionApiTest, FSDecoderRaisesTypeError) { 594 PyObjectPtr pyint(PyLong_FromLong(42)); 595 PyObject* result; 596 EXPECT_EQ(PyUnicode_FSDecoder(pyint, &result), 0); 597 EXPECT_NE(PyErr_Occurred(), nullptr); 598 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 599} 600 601TEST_F(UnicodeExtensionApiTest, FindWithNonStrSelfRaisesTypeError) { 602 PyObject* self = Py_None; 603 PyObjectPtr sub(PyUnicode_FromString("ll")); 604 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), -2); 605 ASSERT_NE(PyErr_Occurred(), nullptr); 606 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 607} 608 609TEST_F(UnicodeExtensionApiTest, FindWithNonStrSubRaisesTypeError) { 610 PyObjectPtr self(PyUnicode_FromString("hello")); 611 PyObject* sub = Py_None; 612 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), -2); 613 ASSERT_NE(PyErr_Occurred(), nullptr); 614 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 615} 616 617TEST_F(UnicodeExtensionApiTest, FindForwardReturnsLeftmostStartIndex) { 618 PyObjectPtr self(PyUnicode_FromString("hello")); 619 PyObjectPtr sub(PyUnicode_FromString("ll")); 620 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), 2); 621 EXPECT_EQ(PyErr_Occurred(), nullptr); 622} 623 624TEST_F(UnicodeExtensionApiTest, 625 FindForwardWithSubClassReturnsLeftmostStartIndex) { 626 PyRun_SimpleString(R"( 627class SubStr(str): pass 628 629substr = SubStr('hello') 630)"); 631 PyObjectPtr self(mainModuleGet("substr")); 632 PyObjectPtr sub(PyUnicode_FromString("ll")); 633 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), 2); 634 EXPECT_EQ(PyErr_Occurred(), nullptr); 635} 636 637TEST_F(UnicodeExtensionApiTest, FindReturnsNegativeOneWithNonexistentSubstr) { 638 PyObjectPtr self(PyUnicode_FromString("hello")); 639 PyObjectPtr sub(PyUnicode_FromString("xx")); 640 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), -1); 641 EXPECT_EQ(PyErr_Occurred(), nullptr); 642} 643 644TEST_F(UnicodeExtensionApiTest, 645 FindReverseReturnsNegativeOneWithNonexistentSubstr) { 646 PyObjectPtr self(PyUnicode_FromString("hello")); 647 PyObjectPtr sub(PyUnicode_FromString("xx")); 648 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, -1), -1); 649 EXPECT_EQ(PyErr_Occurred(), nullptr); 650} 651 652TEST_F(UnicodeExtensionApiTest, FindReverseReturnsRightmostStartIndex) { 653 PyObjectPtr self(PyUnicode_FromString("helloll")); 654 PyObjectPtr sub(PyUnicode_FromString("ll")); 655 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 7, -1), 5); 656 EXPECT_EQ(PyErr_Occurred(), nullptr); 657} 658 659TEST_F(UnicodeExtensionApiTest, FindCharWithNegativeStartSearchesFromEnd) { 660 PyObjectPtr self(PyUnicode_FromString("hello")); 661 EXPECT_EQ(4, PyUnicode_FindChar(self, Py_UCS4{'o'}, -2, 5, 1)); 662 EXPECT_EQ(PyErr_Occurred(), nullptr); 663} 664 665TEST_F(UnicodeExtensionApiTest, FindCharWithNegativeEndSearchesFromEnd) { 666 PyObjectPtr self(PyUnicode_FromString("hello")); 667 EXPECT_EQ(1, PyUnicode_FindChar(self, Py_UCS4{'e'}, 0, -3, 1)); 668 EXPECT_EQ(PyErr_Occurred(), nullptr); 669} 670 671TEST_F(UnicodeExtensionApiTest, 672 FindCharWithExistentCharEndGreaterThanLengthClipsEnd) { 673 PyObjectPtr self(PyUnicode_FromString("hello")); 674 Py_UCS4 ch = 'h'; 675 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 100, 1), 0); 676 EXPECT_EQ(PyErr_Occurred(), nullptr); 677} 678 679TEST_F(UnicodeExtensionApiTest, 680 FindCharWithNonExistentCharEndGreaterThanLengthClipsEnd) { 681 PyObjectPtr self(PyUnicode_FromString("hello")); 682 Py_UCS4 ch = 'q'; 683 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 100, 1), -1); 684 EXPECT_EQ(PyErr_Occurred(), nullptr); 685} 686 687TEST_F(UnicodeExtensionApiTest, FindCharFindsChar) { 688 PyObjectPtr self(PyUnicode_FromString("hello")); 689 Py_UCS4 ch = 'h'; 690 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), 0); 691 EXPECT_EQ(PyErr_Occurred(), nullptr); 692} 693 694TEST_F(UnicodeExtensionApiTest, FindCharWithStrSubClassReturnsLeftmostIndex) { 695 PyRun_SimpleString(R"( 696class SubStr(str): pass 697 698substr = SubStr('hello') 699)"); 700 PyObjectPtr self(mainModuleGet("substr")); 701 Py_UCS4 ch = 'h'; 702 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), 0); 703 EXPECT_EQ(PyErr_Occurred(), nullptr); 704} 705 706TEST_F(UnicodeExtensionApiTest, FindCharFindsCharInMiddleOfString) { 707 PyObjectPtr self(PyUnicode_FromString("hello")); 708 Py_UCS4 ch = 'l'; 709 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), 2); 710 EXPECT_EQ(PyErr_Occurred(), nullptr); 711} 712 713TEST_F(UnicodeExtensionApiTest, FindCharReverseFindsCharInMiddleOfString) { 714 PyObjectPtr self(PyUnicode_FromString("hello")); 715 Py_UCS4 ch = 'l'; 716 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, -1), 3); 717 EXPECT_EQ(PyErr_Occurred(), nullptr); 718} 719 720TEST_F(UnicodeExtensionApiTest, FindCharWithNonExistentCharDoesNotFindChar) { 721 PyObjectPtr self(PyUnicode_FromString("hello")); 722 Py_UCS4 ch = 'q'; 723 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), -1); 724 EXPECT_EQ(PyErr_Occurred(), nullptr); 725} 726 727TEST_F(UnicodeExtensionApiTest, FindCharWithCharBeforeWindowDoesNotFindChar) { 728 PyObjectPtr self(PyUnicode_FromString("hello")); 729 Py_UCS4 ch = 'h'; 730 EXPECT_EQ(PyUnicode_FindChar(self, ch, 2, 5, 1), -1); 731 EXPECT_EQ(PyErr_Occurred(), nullptr); 732} 733 734TEST_F(UnicodeExtensionApiTest, FindCharWithCharAfterWindowDoesNotFindChar) { 735 PyObjectPtr self(PyUnicode_FromString("hello")); 736 Py_UCS4 ch = 'o'; 737 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 3, 1), -1); 738 EXPECT_EQ(PyErr_Occurred(), nullptr); 739} 740 741TEST_F(UnicodeExtensionApiTest, FindCharWithUnicodeCharFindsChar) { 742 PyObjectPtr self(PyUnicode_FromString("h\u00e9llo")); 743 Py_UCS4 ch = 0xE9; 744 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 3, 1), 1); 745 EXPECT_EQ(PyErr_Occurred(), nullptr); 746} 747 748TEST_F(UnicodeExtensionApiTest, FromStringAndSizeCreatesEmptyString) { 749 PyObjectPtr pyuni(PyUnicode_FromStringAndSize(nullptr, 0)); 750 EXPECT_TRUE(isUnicodeEqualsCStr(pyuni, "")); 751 EXPECT_EQ(PyErr_Occurred(), nullptr); 752} 753 754TEST_F(UnicodeExtensionApiTest, FromStringAndSizeCreatesSizedString) { 755 const char* str = "Some string"; 756 PyObjectPtr pyuni(PyUnicode_FromStringAndSize(str, 11)); 757 EXPECT_TRUE(isUnicodeEqualsCStr(pyuni, str)); 758 EXPECT_EQ(PyErr_Occurred(), nullptr); 759} 760 761TEST_F(UnicodeExtensionApiTest, FromStringAndSizeCreatesSmallerString) { 762 PyObjectPtr str(PyUnicode_FromStringAndSize("1234567890", 5)); 763 EXPECT_TRUE(isUnicodeEqualsCStr(str, "12345")); 764 EXPECT_EQ(PyErr_Occurred(), nullptr); 765} 766 767TEST_F(UnicodeExtensionApiTest, FromStringAndSizeFailsNegSize) { 768 PyObjectPtr pyuni(PyUnicode_FromStringAndSize("a", -1)); 769 ASSERT_EQ(pyuni, nullptr); 770 771 ASSERT_NE(PyErr_Occurred(), nullptr); 772 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); 773} 774 775TEST_F(UnicodeExtensionApiTest, FromStringAndSizeIncrementsRefCount) { 776 PyObject* pyuni = PyUnicode_FromStringAndSize("Some string", 11); 777 ASSERT_NE(pyuni, nullptr); 778 EXPECT_GE(Py_REFCNT(pyuni), 1); 779 Py_DECREF(pyuni); 780 EXPECT_EQ(PyErr_Occurred(), nullptr); 781} 782 783TEST_F(UnicodeExtensionApiTest, READWithOneByteKindReturnsCharAtIndex) { 784 const char* str = "foo"; 785 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, str, 0), Py_UCS4{'f'}); 786 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, str, 1), Py_UCS4{'o'}); 787 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, str, 2), Py_UCS4{'o'}); 788} 789 790TEST_F(UnicodeExtensionApiTest, READWithTwoByteKindReturnsCharAtIndex) { 791 const char* str = "quux"; 792 // This assumes little-endian architecture. No static assert because we can't 793 // include that enum and macro in these tests. 794 EXPECT_EQ(PyUnicode_READ(PyUnicode_2BYTE_KIND, str, 0), 795 Py_UCS4{0x7571}); // qu 796 EXPECT_EQ(PyUnicode_READ(PyUnicode_2BYTE_KIND, str, 1), 797 Py_UCS4{0x7875}); // ux 798} 799 800TEST_F(UnicodeExtensionApiTest, READWithFourByteKindReturnsCharAtIndex) { 801 const char* str = "quux"; 802 // This assumes little-endian architecture. No static assert because we can't 803 // include that enum and macro in these tests. 804 EXPECT_EQ(PyUnicode_READ(PyUnicode_4BYTE_KIND, str, 0), Py_UCS4{0x78757571}); 805} 806 807TEST_F(UnicodeExtensionApiTest, READCHARReturnsCharAtIndex) { 808 PyObjectPtr str(PyUnicode_FromString("foo")); 809 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 0), Py_UCS4{'f'}); 810 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 1), Py_UCS4{'o'}); 811 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 2), Py_UCS4{'o'}); 812 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 3), Py_UCS4{'\0'}); 813} 814 815TEST_F(UnicodeExtensionApiTest, READCHARReturnsUnicodeCodePointAtIndex) { 816 PyObjectPtr str(PyUnicode_FromString("\xF0\x90\x8D\x88")); 817 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), 1); 818 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 0), Py_UCS4{0x10348}); 819 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 1), Py_UCS4{'\0'}); 820 821 PyObjectPtr dessert(PyUnicode_FromString("cr\xc3\xa9me")); 822 EXPECT_EQ(PyUnicode_GET_LENGTH(dessert.get()), 5); 823 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 0), Py_UCS4{'c'}); 824 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 1), Py_UCS4{'r'}); 825 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 2), Py_UCS4{0xE9}); 826 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 3), Py_UCS4{'m'}); 827 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 4), Py_UCS4{'e'}); 828 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 5), Py_UCS4{'\0'}); 829} 830 831TEST_F(UnicodeExtensionApiTest, READReadsCharsFromDATA) { 832 PyObjectPtr str(PyUnicode_FromString("foo")); 833 void* data = PyUnicode_DATA(str.get()); 834 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, data, 0), Py_UCS4{'f'}); 835 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, data, 1), Py_UCS4{'o'}); 836 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, data, 2), Py_UCS4{'o'}); 837} 838 839TEST_F(UnicodeExtensionApiTest, ReadCharReturnsCharAtIndex) { 840 PyObjectPtr str(PyUnicode_FromString("foo")); 841 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 0), Py_UCS4{'f'}); 842 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 1), Py_UCS4{'o'}); 843 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 2), Py_UCS4{'o'}); 844} 845 846TEST_F(UnicodeExtensionApiTest, ReadCharReturnsUnicodeCodePointAtIndex) { 847 PyObjectPtr str(PyUnicode_FromString("\xF0\x90\x8D\x88")); 848 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), 1); 849 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 0), Py_UCS4{0x10348}); 850 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 1), Py_UCS4{0xFFFFFFFF}); 851 ASSERT_NE(PyErr_Occurred(), nullptr); 852 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError)); 853 PyErr_Clear(); 854 855 PyObjectPtr dessert(PyUnicode_FromString("cr\xc3\xa9me")); 856 EXPECT_EQ(PyUnicode_GET_LENGTH(dessert.get()), 5); 857 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 0), Py_UCS4{'c'}); 858 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 1), Py_UCS4{'r'}); 859 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 2), Py_UCS4{0xE9}); 860 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 3), Py_UCS4{'m'}); 861 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 4), Py_UCS4{'e'}); 862 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 5), Py_UCS4{0xFFFFFFFF}); 863 ASSERT_NE(PyErr_Occurred(), nullptr); 864 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError)); 865 PyErr_Clear(); 866} 867 868TEST_F(UnicodeExtensionApiTest, ReadCharWithNonStrRaisesTypeError) { 869 PyObjectPtr list(PyList_New(3)); 870 EXPECT_EQ(PyUnicode_ReadChar(list.get(), 0), Py_UCS4{0xFFFFFFFF}); 871 ASSERT_NE(PyErr_Occurred(), nullptr); 872 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 873} 874 875TEST_F(UnicodeExtensionApiTest, ReadCharWithOutOfBoundIndexRaisesIndexError) { 876 PyObjectPtr str(PyUnicode_FromString("foo")); 877 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 3), Py_UCS4{0xFFFFFFFF}); 878 ASSERT_NE(PyErr_Occurred(), nullptr); 879 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError)); 880} 881 882TEST_F(UnicodeExtensionApiTest, ReadyReturnsZero) { 883 PyObject* pyunicode = PyUnicode_FromString("some string"); 884 int is_ready = PyUnicode_READY(pyunicode); 885 EXPECT_EQ(0, is_ready); 886 Py_DECREF(pyunicode); 887} 888 889TEST_F(UnicodeExtensionApiTest, ReplaceWithStrOfNonStringTypeReturnsNull) { 890 PyObjectPtr non_str(PyBool_FromLong(1)); 891 PyObjectPtr substr(PyUnicode_FromString("some string")); 892 PyObjectPtr replstr(PyUnicode_FromString("some string")); 893 EXPECT_EQ(PyUnicode_Replace(non_str, substr, replstr, -1), nullptr); 894 EXPECT_NE(PyErr_Occurred(), nullptr); 895} 896 897TEST_F(UnicodeExtensionApiTest, ReplaceWithSubstrOfNonStringTypeReturnsNull) { 898 PyObjectPtr non_str(PyBool_FromLong(1)); 899 PyObjectPtr str(PyUnicode_FromString("some string")); 900 PyObjectPtr replstr(PyUnicode_FromString("some string")); 901 EXPECT_EQ(PyUnicode_Replace(str, non_str, replstr, -1), nullptr); 902 EXPECT_NE(PyErr_Occurred(), nullptr); 903} 904 905TEST_F(UnicodeExtensionApiTest, ReplaceWithReplstrOfNonStringTypeReturnsNull) { 906 PyObjectPtr non_str(PyBool_FromLong(1)); 907 PyObjectPtr str(PyUnicode_FromString("some string")); 908 PyObjectPtr substr(PyUnicode_FromString("some string")); 909 EXPECT_EQ(PyUnicode_Replace(str, substr, non_str, -1), nullptr); 910 EXPECT_NE(PyErr_Occurred(), nullptr); 911} 912 913TEST_F(UnicodeExtensionApiTest, 914 ReplaceWithStrSubclassReturnStrWithSameContent) { 915 PyRun_SimpleString(R"( 916class SubStr(str): pass 917 918subclass_instance = SubStr("hello world!") 919)"); 920 PyObjectPtr subclass_instance(mainModuleGet("subclass_instance")); 921 PyObjectPtr substr(PyUnicode_FromString("some string")); 922 PyObjectPtr replstr(PyUnicode_FromString("some string")); 923 PyObjectPtr result(PyUnicode_Replace(subclass_instance, substr, replstr, -1)); 924 ASSERT_EQ(PyErr_Occurred(), nullptr); 925 EXPECT_TRUE(PyUnicode_CheckExact(result)); 926 EXPECT_TRUE(isUnicodeEqualsCStr(result, "hello world!")); 927} 928 929TEST_F(UnicodeExtensionApiTest, 930 ReplaceWithNegativeMaxcountReturnsResultReplacingAllSubstr) { 931 PyObjectPtr str(PyUnicode_FromString("22122122122122122")); 932 PyObjectPtr substr(PyUnicode_FromString("22")); 933 PyObjectPtr replstr(PyUnicode_FromString("*")); 934 PyObjectPtr expected(PyUnicode_FromString("*1*1*1*1*1*")); 935 PyObjectPtr actual(PyUnicode_Replace(str, substr, replstr, -1)); 936 EXPECT_EQ(_PyUnicode_EQ(actual, expected), 1); 937 EXPECT_EQ(PyErr_Occurred(), nullptr); 938} 939 940TEST_F(UnicodeExtensionApiTest, 941 ReplaceWithSubClassAndNegativeMaxcountReturnsResultReplacingAllSubstr) { 942 PyRun_SimpleString(R"( 943class SubStr(str): pass 944 945str_val = SubStr("22122122122122122") 946substr = SubStr("22") 947replstr = SubStr("*") 948)"); 949 PyObjectPtr str(mainModuleGet("str_val")); 950 PyObjectPtr substr(mainModuleGet("substr")); 951 PyObjectPtr replstr(mainModuleGet("replstr")); 952 PyObjectPtr expected(PyUnicode_FromString("*1*1*1*1*1*")); 953 PyObjectPtr actual(PyUnicode_Replace(str, substr, replstr, -1)); 954 EXPECT_EQ(_PyUnicode_EQ(actual, expected), 1); 955 EXPECT_EQ(PyErr_Occurred(), nullptr); 956} 957 958TEST_F(UnicodeExtensionApiTest, 959 ReplaceWithLimitedMaxcountReturnsResultReplacingUpToMaxcount) { 960 PyObjectPtr str(PyUnicode_FromString("22122122122122122")); 961 PyObjectPtr substr(PyUnicode_FromString("22")); 962 PyObjectPtr replstr(PyUnicode_FromString("*")); 963 PyObjectPtr expected(PyUnicode_FromString("*1*1*122122122")); 964 PyObjectPtr actual(PyUnicode_Replace(str, substr, replstr, 3)); 965 EXPECT_EQ(_PyUnicode_EQ(actual, expected), 1); 966 EXPECT_EQ(PyErr_Occurred(), nullptr); 967} 968 969TEST_F(UnicodeExtensionApiTest, Compare) { 970 PyObject* s1 = PyUnicode_FromString("some string"); 971 PyObject* s2 = PyUnicode_FromString("some longer string"); 972 PyObject* s22 = PyUnicode_FromString("some longer string"); 973 974 int result = PyUnicode_Compare(s1, s2); 975 EXPECT_EQ(result, 1); 976 EXPECT_EQ(PyErr_Occurred(), nullptr); 977 978 result = PyUnicode_Compare(s2, s1); 979 EXPECT_EQ(result, -1); 980 EXPECT_EQ(PyErr_Occurred(), nullptr); 981 982 result = PyUnicode_Compare(s2, s22); 983 EXPECT_EQ(result, 0); 984 EXPECT_EQ(PyErr_Occurred(), nullptr); 985 986 Py_DECREF(s22); 987 Py_DECREF(s2); 988 Py_DECREF(s1); 989} 990 991TEST_F(UnicodeExtensionApiTest, CompareWithSubClass) { 992 PyRun_SimpleString(R"( 993class SubStr(str): pass 994 995substr = SubStr("some string") 996)"); 997 PyObjectPtr s1(mainModuleGet("substr")); 998 PyObjectPtr s2(PyUnicode_FromString("some longer string")); 999 PyObjectPtr s22(PyUnicode_FromString("some longer string")); 1000 1001 int result = PyUnicode_Compare(s1, s2); 1002 EXPECT_EQ(result, 1); 1003 EXPECT_EQ(PyErr_Occurred(), nullptr); 1004 1005 result = PyUnicode_Compare(s2, s1); 1006 EXPECT_EQ(result, -1); 1007 EXPECT_EQ(PyErr_Occurred(), nullptr); 1008 1009 result = PyUnicode_Compare(s2, s22); 1010 EXPECT_EQ(result, 0); 1011 EXPECT_EQ(PyErr_Occurred(), nullptr); 1012} 1013 1014TEST_F(UnicodeExtensionApiTest, CompareBadInput) { 1015 PyObject* str_obj = PyUnicode_FromString("this is a string"); 1016 PyObject* int_obj = PyLong_FromLong(1234); 1017 1018 PyUnicode_Compare(str_obj, int_obj); 1019 ASSERT_NE(PyErr_Occurred(), nullptr); 1020 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1021 PyErr_Clear(); 1022 1023 PyUnicode_Compare(int_obj, str_obj); 1024 ASSERT_NE(PyErr_Occurred(), nullptr); 1025 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1026 PyErr_Clear(); 1027 1028 PyUnicode_Compare(int_obj, int_obj); 1029 ASSERT_NE(PyErr_Occurred(), nullptr); 1030 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1031 PyErr_Clear(); 1032 1033 Py_DECREF(int_obj); 1034 Py_DECREF(str_obj); 1035} 1036 1037TEST_F(UnicodeExtensionApiTest, EqSameLength) { 1038 PyObject* str1 = PyUnicode_FromString("some string"); 1039 1040 PyObject* str2 = PyUnicode_FromString("some other string"); 1041 EXPECT_EQ(_PyUnicode_EQ(str1, str2), 0); 1042 EXPECT_EQ(_PyUnicode_EQ(str2, str1), 0); 1043 Py_DECREF(str2); 1044 1045 PyObject* str3 = PyUnicode_FromString("some string"); 1046 EXPECT_EQ(_PyUnicode_EQ(str1, str3), 1); 1047 EXPECT_EQ(_PyUnicode_EQ(str3, str1), 1); 1048 Py_DECREF(str3); 1049 1050 Py_DECREF(str1); 1051} 1052 1053TEST_F(UnicodeExtensionApiTest, EqWithSubClassSameLength) { 1054 PyRun_SimpleString(R"( 1055class SubStr(str): pass 1056 1057substr = SubStr("some string") 1058)"); 1059 PyObjectPtr str(mainModuleGet("substr")); 1060 PyObjectPtr str1(PyUnicode_FromString("some string")); 1061 EXPECT_EQ(_PyUnicode_EQ(str1.get(), str.get()), 1); 1062 1063 PyObjectPtr str2(PyUnicode_FromString("some other string")); 1064 EXPECT_EQ(_PyUnicode_EQ(str2.get(), str.get()), 0); 1065} 1066 1067TEST_F(UnicodeExtensionApiTest, EqDifferentLength) { 1068 PyObject* small = PyUnicode_FromString("123"); 1069 PyObject* large = PyUnicode_FromString("1234567890"); 1070 EXPECT_EQ(_PyUnicode_EQ(small, large), 0); 1071 EXPECT_EQ(_PyUnicode_EQ(large, small), 0); 1072 Py_DECREF(large); 1073 Py_DECREF(small); 1074} 1075 1076TEST_F(UnicodeExtensionApiTest, EqualToASCIIString) { 1077 PyObject* unicode = PyUnicode_FromString("here's another string"); 1078 1079 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(unicode, "here's another string")); 1080 EXPECT_FALSE( 1081 _PyUnicode_EqualToASCIIString(unicode, "here is another string")); 1082 1083 Py_DECREF(unicode); 1084} 1085 1086TEST_F(UnicodeExtensionApiTest, EqualToASCIIStringWithSubClass) { 1087 PyRun_SimpleString(R"( 1088class SubStr(str): pass 1089 1090substr = SubStr("here's another string") 1091)"); 1092 PyObjectPtr unicode(mainModuleGet("substr")); 1093 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(unicode, "here's another string")); 1094 EXPECT_FALSE( 1095 _PyUnicode_EqualToASCIIString(unicode, "here is another string")); 1096} 1097 1098TEST_F(UnicodeExtensionApiTest, CompareWithASCIIStringASCIINul) { 1099 PyObjectPtr pyunicode(PyUnicode_FromStringAndSize("large\0st", 8)); 1100 1101 // Less 1102 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "largz"), -1); 1103 1104 // Greater 1105 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large"), 1); 1106} 1107 1108TEST_F(UnicodeExtensionApiTest, CompareWithASCIIStringASCII) { 1109 PyObjectPtr pyunicode(PyUnicode_FromString("large string")); 1110 1111 // Equal 1112 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large string"), 0); 1113 1114 // Less 1115 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large strings"), -1); 1116 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large tbigger"), -1); 1117 1118 // Greater 1119 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large strin"), 1); 1120 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large smaller"), 1); 1121} 1122 1123TEST_F(UnicodeExtensionApiTest, CompareWithASCIIStringWithSubClass) { 1124 PyRun_SimpleString(R"( 1125class SubStr(str): pass 1126 1127substr = SubStr("large string") 1128)"); 1129 PyObjectPtr substr(mainModuleGet("substr")); 1130 1131 // Equal 1132 EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large string"), 0); 1133 1134 // Less 1135 EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large strings"), -1); 1136 EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large tbigger"), -1); 1137 1138 // Greater 1139 EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large strin"), 1); 1140 EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large smaller"), 1); 1141} 1142 1143TEST_F(UnicodeExtensionApiTest, GetLengthWithEmptyStrReturnsZero) { 1144 PyObjectPtr str(PyUnicode_FromString("")); 1145 Py_ssize_t expected = 0; 1146 EXPECT_EQ(PyUnicode_GetLength(str), expected); 1147 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected); 1148 EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected); 1149} 1150 1151TEST_F(UnicodeExtensionApiTest, GetLengthWithNonEmptyString) { 1152 PyObjectPtr str(PyUnicode_FromString("foo")); 1153 Py_ssize_t expected = 3; 1154 EXPECT_EQ(PyUnicode_GetLength(str), expected); 1155 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected); 1156 EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected); 1157} 1158 1159TEST_F(UnicodeExtensionApiTest, GetLengthWithSubClassOfNonEmptyString) { 1160 PyRun_SimpleString(R"( 1161class SubStr(str): pass 1162 1163substr = SubStr('foo') 1164)"); 1165 PyObjectPtr str(mainModuleGet("substr")); 1166 Py_ssize_t expected = 3; 1167 EXPECT_EQ(PyUnicode_GetLength(str), expected); 1168 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected); 1169 EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected); 1170} 1171 1172TEST_F(UnicodeExtensionApiTest, GetLengthWithUTF8ReturnsCodePointLength) { 1173 PyObjectPtr str(PyUnicode_FromString("\xc3\xa9")); 1174 Py_ssize_t expected = 1; 1175 EXPECT_EQ(PyUnicode_GetLength(str), expected); 1176 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected); 1177 EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected); 1178} 1179 1180TEST_F(UnicodeExtensionApiTest, GetLengthWithNonStrRaisesTypeError) { 1181 PyObjectPtr list(PyList_New(3)); 1182 EXPECT_EQ(PyUnicode_GetLength(list), -1); 1183 ASSERT_NE(PyErr_Occurred(), nullptr); 1184 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1185} 1186 1187TEST_F(UnicodeExtensionApiTest, GetSizeWithNonStrRaisesTypeError) { 1188 PyObjectPtr list(PyList_New(3)); 1189#pragma GCC diagnostic push 1190#pragma GCC diagnostic ignored "-Wdeprecated-declarations" 1191 EXPECT_EQ(PyUnicode_GetSize(list), -1); 1192#pragma GCC diagnostic pop 1193 ASSERT_NE(PyErr_Occurred(), nullptr); 1194 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1195} 1196 1197TEST_F(UnicodeExtensionApiTest, GetSizeWithStrReturnsLength) { 1198 PyObjectPtr unicode(PyUnicode_FromString("abc")); 1199#pragma GCC diagnostic push 1200#pragma GCC diagnostic ignored "-Wdeprecated-declarations" 1201 EXPECT_EQ(PyUnicode_GetSize(unicode), 3); 1202#pragma GCC diagnostic pop 1203 EXPECT_EQ(PyErr_Occurred(), nullptr); 1204} 1205 1206TEST_F(UnicodeExtensionApiTest, FromUnicodeWithASCIIReturnsString) { 1207 PyObjectPtr unicode(PyUnicode_FromUnicode(L"abc123-", 7)); 1208 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "abc123-")); 1209} 1210 1211TEST_F(UnicodeExtensionApiTest, FromUnicodeWithNullBufferAbortsPyro) { 1212 EXPECT_DEATH(PyUnicode_FromUnicode(nullptr, 2), 1213 "unimplemented: _PyUnicode_New"); 1214} 1215 1216TEST_F(UnicodeExtensionApiTest, 1217 FromOrdinalWithNegativeCodePointRaisesValueError) { 1218 EXPECT_EQ(PyUnicode_FromOrdinal(-1), nullptr); 1219 ASSERT_NE(PyErr_Occurred(), nullptr); 1220 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 1221} 1222 1223TEST_F(UnicodeExtensionApiTest, FromOrdinalWithHugeCodePointRaisesValueError) { 1224 EXPECT_EQ(PyUnicode_FromOrdinal(0xFFFFFFFF), nullptr); 1225 ASSERT_NE(PyErr_Occurred(), nullptr); 1226 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 1227} 1228 1229TEST_F(UnicodeExtensionApiTest, FromOrdinalWithValidCodePointReturnsString) { 1230 PyObjectPtr str(PyUnicode_FromOrdinal(1488)); 1231 EXPECT_EQ(PyErr_Occurred(), nullptr); 1232 EXPECT_NE(str, nullptr); 1233 ASSERT_TRUE(PyUnicode_Check(str)); 1234 EXPECT_STREQ(PyUnicode_AsUTF8(str), "\xD7\x90"); 1235} 1236 1237TEST_F(UnicodeExtensionApiTest, 1238 FromWideCharWithNullBufferAndZeroSizeReturnsEmpty) { 1239 PyObjectPtr empty(PyUnicode_FromWideChar(nullptr, 0)); 1240 ASSERT_EQ(PyErr_Occurred(), nullptr); 1241 ASSERT_TRUE(PyUnicode_Check(empty)); 1242 EXPECT_EQ(PyUnicode_GetLength(empty), 0); 1243} 1244 1245TEST_F(UnicodeExtensionApiTest, FromWideCharWithNullBufferReturnsError) { 1246 PyObjectPtr empty(PyUnicode_FromWideChar(nullptr, 1)); 1247 ASSERT_EQ(empty, nullptr); 1248 ASSERT_NE(PyErr_Occurred(), nullptr); 1249 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); 1250} 1251 1252TEST_F(UnicodeExtensionApiTest, FromWideCharWithUnknownSizeReturnsString) { 1253 PyObjectPtr unicode(PyUnicode_FromWideChar(L"abc123-", -1)); 1254 ASSERT_EQ(PyErr_Occurred(), nullptr); 1255 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "abc123-")); 1256} 1257 1258TEST_F(UnicodeExtensionApiTest, FromWideCharWithGivenSizeReturnsString) { 1259 PyObjectPtr unicode(PyUnicode_FromWideChar(L"abc123-", 6)); 1260 ASSERT_EQ(PyErr_Occurred(), nullptr); 1261 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "abc123")); 1262} 1263 1264TEST_F(UnicodeExtensionApiTest, FromWideCharWithBufferAndZeroSizeReturnsEmpty) { 1265 PyObjectPtr empty(PyUnicode_FromWideChar(L"abc", 0)); 1266 ASSERT_EQ(PyErr_Occurred(), nullptr); 1267 ASSERT_TRUE(PyUnicode_Check(empty)); 1268 EXPECT_EQ(PyUnicode_GetLength(empty), 0); 1269} 1270 1271TEST_F(UnicodeExtensionApiTest, DecodeWithNullEncodingReturnsUTF8) { 1272 const char* str = "utf-8 \xc3\xa8"; 1273 PyObjectPtr uni(PyUnicode_Decode(str, 8, nullptr, nullptr)); 1274 ASSERT_TRUE(PyUnicode_CheckExact(uni)); 1275 EXPECT_STREQ(PyUnicode_AsUTF8(uni), str); 1276} 1277 1278TEST_F(UnicodeExtensionApiTest, DecodeASCIIUsesErrorHandler) { 1279 PyObjectPtr uni(PyUnicode_Decode("non\xc3\xa8-ascii", 11, "ascii", "ignore")); 1280 ASSERT_TRUE(PyUnicode_CheckExact(uni)); 1281 EXPECT_STREQ(PyUnicode_AsUTF8(uni), "non-ascii"); 1282} 1283 1284TEST_F(UnicodeExtensionApiTest, DecodeLatin1ReturnsLatin1) { 1285 PyObjectPtr uni(PyUnicode_Decode("latin-1 \xe8", 9, "latin-1", nullptr)); 1286 ASSERT_TRUE(PyUnicode_CheckExact(uni)); 1287 EXPECT_STREQ(PyUnicode_AsUTF8(uni), "latin-1 \xc3\xa8"); 1288} 1289 1290TEST_F(UnicodeExtensionApiTest, DecodeFSDefaultCreatesString) { 1291 PyObjectPtr unicode(PyUnicode_DecodeFSDefault("hello")); 1292 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello")); 1293 EXPECT_EQ(PyErr_Occurred(), nullptr); 1294} 1295 1296TEST_F(UnicodeExtensionApiTest, DecodeFSDefaultAndSizeReturnsString) { 1297 PyObjectPtr unicode(PyUnicode_DecodeFSDefaultAndSize("hello", 5)); 1298 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello")); 1299 EXPECT_EQ(PyErr_Occurred(), nullptr); 1300} 1301 1302TEST_F(UnicodeExtensionApiTest, 1303 DecodeFSDefaultAndSizeWithSmallerSizeReturnsString) { 1304 PyObjectPtr unicode(PyUnicode_DecodeFSDefaultAndSize("hello", 2)); 1305 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "he")); 1306 EXPECT_EQ(PyErr_Occurred(), nullptr); 1307} 1308 1309TEST_F(UnicodeExtensionApiTest, DecodeASCIIReturnsString) { 1310 PyObjectPtr str(PyUnicode_DecodeASCII("hello world", 11, nullptr)); 1311 ASSERT_EQ(PyErr_Occurred(), nullptr); 1312 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); 1313} 1314 1315TEST_F(UnicodeExtensionApiTest, DecodeLatin1ReturnsString) { 1316 const char* c_str = "\xBFhello world?"; 1317 PyObjectPtr str(PyUnicode_DecodeLatin1(c_str, std::strlen(c_str), nullptr)); 1318 ASSERT_EQ(PyErr_Occurred(), nullptr); 1319 EXPECT_EQ(PyUnicode_CheckExact(str), 1); 1320 EXPECT_STREQ(PyUnicode_AsUTF8(str), "\xC2\xBFhello world?"); 1321} 1322 1323TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterPrepareWithLenZeroReturnsZero) { 1324 _PyUnicodeWriter writer; 1325 _PyUnicodeWriter_Init(&writer); 1326 ASSERT_EQ(_PyUnicodeWriter_Prepare(&writer, 0, 127), 0); 1327 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); 1328 1329 ASSERT_EQ(PyErr_Occurred(), nullptr); 1330 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "")); 1331} 1332 1333TEST_F(UnicodeExtensionApiTest, 1334 PyUnicodeWriterWithOverallocateSetOverallocates) { 1335 _PyUnicodeWriter writer; 1336 _PyUnicodeWriter_Init(&writer); 1337 writer.overallocate = 0; 1338 ASSERT_EQ(_PyUnicodeWriter_Prepare(&writer, 5, 127), 0); 1339 ASSERT_EQ(writer.size, 5); 1340 _PyUnicodeWriter_Dealloc(&writer); 1341 1342 _PyUnicodeWriter_Init(&writer); 1343 writer.overallocate = 1; 1344 ASSERT_EQ(_PyUnicodeWriter_Prepare(&writer, 5, 127), 0); 1345 ASSERT_GT(writer.size, 5); 1346 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); 1347 1348 ASSERT_EQ(PyErr_Occurred(), nullptr); 1349 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "")); 1350} 1351 1352TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterCreatesEmptyString) { 1353 _PyUnicodeWriter writer; 1354 _PyUnicodeWriter_Init(&writer); 1355 PyObjectPtr empty(_PyUnicodeWriter_Finish(&writer)); 1356 ASSERT_EQ(PyErr_Occurred(), nullptr); 1357 ASSERT_TRUE(PyUnicode_Check(empty)); 1358 EXPECT_EQ(PyUnicode_GetLength(empty), 0); 1359} 1360 1361TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWritesASCIIStrings) { 1362 _PyUnicodeWriter writer; 1363 _PyUnicodeWriter_Init(&writer); 1364 ASSERT_EQ(_PyUnicodeWriter_WriteASCIIString(&writer, "hello", 5), 0); 1365 ASSERT_EQ(_PyUnicodeWriter_WriteASCIIString(&writer, " world", 6), 0); 1366 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); 1367 1368 ASSERT_EQ(PyErr_Occurred(), nullptr); 1369 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world")); 1370} 1371 1372TEST_F(UnicodeExtensionApiTest, 1373 WriteASCIIStringWithNegativeLengthReturnsString) { 1374 _PyUnicodeWriter writer; 1375 _PyUnicodeWriter_Init(&writer); 1376 ASSERT_EQ(_PyUnicodeWriter_WriteASCIIString(&writer, "hello world", -1), 0); 1377 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); 1378 1379 ASSERT_EQ(PyErr_Occurred(), nullptr); 1380 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world")); 1381} 1382 1383TEST_F(UnicodeExtensionApiTest, WriteASCIIStringWithNonASCIIDeathTestPyro) { 1384 _PyUnicodeWriter writer; 1385 _PyUnicodeWriter_Init(&writer); 1386 EXPECT_DEATH(_PyUnicodeWriter_WriteASCIIString(&writer, "\xA0", 1), 1387 "_PyUnicodeWriter_WriteASCIIString only takes ASCII"); 1388 _PyUnicodeWriter_Dealloc(&writer); 1389} 1390 1391TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWritesChars) { 1392 _PyUnicodeWriter writer; 1393 _PyUnicodeWriter_Init(&writer); 1394 ASSERT_EQ(_PyUnicodeWriter_WriteChar(&writer, 'a'), 0); 1395 ASSERT_EQ(_PyUnicodeWriter_WriteChar(&writer, 0xA0), 0); 1396 ASSERT_EQ(_PyUnicodeWriter_WriteChar(&writer, 'g'), 0); 1397 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); 1398 1399 ASSERT_EQ(PyErr_Occurred(), nullptr); 1400 PyObjectPtr test(PyUnicode_FromString("a\xc2\xa0g")); 1401 EXPECT_TRUE(_PyUnicode_EQ(unicode, test)); 1402} 1403 1404TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWritesLatin1String) { 1405 _PyUnicodeWriter writer; 1406 _PyUnicodeWriter_Init(&writer); 1407 ASSERT_EQ(_PyUnicodeWriter_WriteLatin1String(&writer, "hello\xA0", 6), 0); 1408 ASSERT_EQ(_PyUnicodeWriter_WriteLatin1String(&writer, " world", 6), 0); 1409 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); 1410 1411 ASSERT_EQ(PyErr_Occurred(), nullptr); 1412 PyObjectPtr test(PyUnicode_FromString("hello\xc2\xa0 world")); 1413 EXPECT_TRUE(_PyUnicode_EQ(unicode, test)); 1414} 1415 1416TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWriteStrWritesStringObject) { 1417 _PyUnicodeWriter writer; 1418 _PyUnicodeWriter_Init(&writer); 1419 PyObjectPtr hello_str(PyUnicode_FromString("hello")); 1420 PyObjectPtr world_str(PyUnicode_FromString(" \xf0\x9f\x90\x8d world")); 1421 ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, hello_str), 0); 1422 ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, world_str), 0); 1423 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); 1424 1425 ASSERT_EQ(PyErr_Occurred(), nullptr); 1426 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello \xf0\x9f\x90\x8d world")); 1427} 1428 1429TEST_F(UnicodeExtensionApiTest, 1430 PyUnicodeWriterWriteStrWithSubClassWritesStringObject) { 1431 PyRun_SimpleString(R"( 1432class SubStr(str): pass 1433 1434hello_str = SubStr("hello") 1435world_str = SubStr(" world") 1436)"); 1437 _PyUnicodeWriter writer; 1438 _PyUnicodeWriter_Init(&writer); 1439 PyObjectPtr hello_str(mainModuleGet("hello_str")); 1440 PyObjectPtr world_str(mainModuleGet("world_str")); 1441 ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, hello_str), 0); 1442 ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, world_str), 0); 1443 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); 1444 1445 ASSERT_EQ(PyErr_Occurred(), nullptr); 1446 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world")); 1447} 1448 1449TEST_F(UnicodeExtensionApiTest, 1450 PyUnicodeWriterWriteSubstringWritesSubStringObject) { 1451 _PyUnicodeWriter writer; 1452 _PyUnicodeWriter_Init(&writer); 1453 PyObjectPtr str(PyUnicode_FromString("hello \xf0\x9f\x90\x8d world")); 1454 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 8, 13), 0); 1455 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 5, 8), 0); 1456 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 0, 5), 0); 1457 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); 1458 1459 ASSERT_EQ(PyErr_Occurred(), nullptr); 1460 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "world \xf0\x9f\x90\x8d hello")); 1461} 1462 1463TEST_F(UnicodeExtensionApiTest, 1464 PyUnicodeWriterWriteSubstringWithSubClassWritesSubStringObject) { 1465 PyRun_SimpleString(R"( 1466class SubStr(str): pass 1467 1468str_value = SubStr("hello world") 1469)"); 1470 _PyUnicodeWriter writer; 1471 _PyUnicodeWriter_Init(&writer); 1472 PyObjectPtr str(mainModuleGet("str_value")); 1473 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 0, 5), 0); 1474 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 5, 11), 0); 1475 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); 1476 1477 ASSERT_EQ(PyErr_Occurred(), nullptr); 1478 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world")); 1479} 1480 1481TEST_F(UnicodeExtensionApiTest, WriteSubstringWithZeroEndReturnsString) { 1482 _PyUnicodeWriter writer; 1483 _PyUnicodeWriter_Init(&writer); 1484 PyObjectPtr str(PyUnicode_FromString("hello")); 1485 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 0, 0), 0); 1486 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); 1487 1488 ASSERT_EQ(PyErr_Occurred(), nullptr); 1489 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "")); 1490} 1491 1492TEST_F(UnicodeExtensionApiTest, DecodeUTF8ReturnsString) { 1493 PyObjectPtr str(PyUnicode_DecodeUTF8("hello world", 11, nullptr)); 1494 ASSERT_EQ(PyErr_Occurred(), nullptr); 1495 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); 1496} 1497 1498TEST_F(UnicodeExtensionApiTest, 1499 DecodeUTF8WithUnfinishedBytesRaisesUnicodeDecodeError) { 1500 EXPECT_EQ(PyUnicode_DecodeUTF8("hello world\xC3", 12, nullptr), nullptr); 1501 PyObject *exc, *value, *tb; 1502 PyErr_Fetch(&exc, &value, &tb); 1503 ASSERT_NE(exc, nullptr); 1504 ASSERT_TRUE(PyErr_GivenExceptionMatches(exc, PyExc_UnicodeDecodeError)); 1505 Py_ssize_t temp; 1506 PyObjectPtr msg(PyUnicodeDecodeError_GetReason(value)); 1507 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(msg, "unexpected end of data")); 1508 PyUnicodeDecodeError_GetStart(value, &temp); 1509 EXPECT_EQ(temp, 11); 1510 PyUnicodeDecodeError_GetEnd(value, &temp); 1511 EXPECT_EQ(temp, 12); 1512 Py_XDECREF(exc); 1513 Py_XDECREF(value); 1514 Py_XDECREF(tb); 1515} 1516 1517TEST_F(UnicodeExtensionApiTest, DecodeUTF8StatefulReturnsString) { 1518 Py_ssize_t consumed; 1519 PyObjectPtr str( 1520 PyUnicode_DecodeUTF8Stateful("hello world", 11, nullptr, &consumed)); 1521 ASSERT_EQ(PyErr_Occurred(), nullptr); 1522 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); 1523 EXPECT_EQ(consumed, 11); 1524} 1525 1526TEST_F(UnicodeExtensionApiTest, 1527 DecodeUTF8StatefulWithUnfinishedBytesReturnsString) { 1528 Py_ssize_t consumed; 1529 PyObjectPtr str( 1530 PyUnicode_DecodeUTF8Stateful("hello world\xC3", 12, nullptr, &consumed)); 1531 ASSERT_EQ(PyErr_Occurred(), nullptr); 1532 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); 1533 EXPECT_EQ(consumed, 11); 1534} 1535 1536TEST_F(UnicodeExtensionApiTest, DecodeUnicodeEscapeReturnsString) { 1537 PyObjectPtr str( 1538 PyUnicode_DecodeUnicodeEscape("hello \\\nworld", 13, nullptr)); 1539 ASSERT_EQ(PyErr_Occurred(), nullptr); 1540 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); 1541} 1542 1543TEST_F(UnicodeExtensionApiTest, UnderDecodeUnicodeEscapeReturnsFirstInvalid) { 1544 const char* invalid; 1545 PyObjectPtr str( 1546 _PyUnicode_DecodeUnicodeEscape("hello \\yworld", 13, nullptr, &invalid)); 1547 ASSERT_EQ(PyErr_Occurred(), nullptr); 1548 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello \\yworld")); 1549 EXPECT_EQ(*invalid, 'y'); 1550} 1551 1552TEST_F(UnicodeExtensionApiTest, 1553 UnderDecodeUnicodeEscapeSetsFirstInvalidEscapeToNull) { 1554 const char* invalid = reinterpret_cast<const char*>(0x100); 1555 PyObjectPtr result( 1556 _PyUnicode_DecodeUnicodeEscape("hello", 5, nullptr, &invalid)); 1557 EXPECT_NE(result, nullptr); 1558 EXPECT_EQ(PyErr_Occurred(), nullptr); 1559 EXPECT_EQ(invalid, nullptr); 1560} 1561 1562TEST_F(UnicodeExtensionApiTest, FromFormatWithNoArgsReturnsString) { 1563 PyObjectPtr str(PyUnicode_FromFormat("hello world")); 1564 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); 1565} 1566 1567TEST_F(UnicodeExtensionApiTest, FromFormatWithManyArgsReturnsString) { 1568 PyObjectPtr str(PyUnicode_FromFormat("h%c%s%%%2.i", 'e', "llo world", 2)); 1569 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world% 2")); 1570} 1571 1572TEST_F(UnicodeExtensionApiTest, FromFormatParsesNumberTypes) { 1573 { 1574 PyObjectPtr str(PyUnicode_FromFormat("%x", 123)); 1575 EXPECT_TRUE(isUnicodeEqualsCStr(str, "7b")); 1576 } 1577 1578 { 1579 PyObjectPtr str(PyUnicode_FromFormat("%d", 124)); 1580 EXPECT_TRUE(isUnicodeEqualsCStr(str, "124")); 1581 } 1582 1583 { 1584 PyObjectPtr str(PyUnicode_FromFormat("%i", 125)); 1585 EXPECT_TRUE(isUnicodeEqualsCStr(str, "125")); 1586 } 1587 1588 { 1589 PyObjectPtr str(PyUnicode_FromFormat("%ld", 126)); 1590 EXPECT_TRUE(isUnicodeEqualsCStr(str, "126")); 1591 } 1592 1593 { 1594 PyObjectPtr str(PyUnicode_FromFormat("%li", 127)); 1595 EXPECT_TRUE(isUnicodeEqualsCStr(str, "127")); 1596 } 1597 1598 { 1599 PyObjectPtr str(PyUnicode_FromFormat("%lld", 128)); 1600 EXPECT_TRUE(isUnicodeEqualsCStr(str, "128")); 1601 } 1602 1603 { 1604 PyObjectPtr str(PyUnicode_FromFormat("%lli", 129)); 1605 EXPECT_TRUE(isUnicodeEqualsCStr(str, "129")); 1606 } 1607 1608 { 1609 PyObjectPtr str(PyUnicode_FromFormat("%u", 130)); 1610 EXPECT_TRUE(isUnicodeEqualsCStr(str, "130")); 1611 } 1612 1613 { 1614 PyObjectPtr str(PyUnicode_FromFormat("%lu", 131)); 1615 EXPECT_TRUE(isUnicodeEqualsCStr(str, "131")); 1616 } 1617 1618 { 1619 PyObjectPtr str(PyUnicode_FromFormat("%llu", 132)); 1620 EXPECT_TRUE(isUnicodeEqualsCStr(str, "132")); 1621 } 1622 1623 { 1624 PyObjectPtr str(PyUnicode_FromFormat("%zd", 133)); 1625 EXPECT_TRUE(isUnicodeEqualsCStr(str, "133")); 1626 } 1627 1628 { 1629 PyObjectPtr str(PyUnicode_FromFormat("%zu", 134)); 1630 EXPECT_TRUE(isUnicodeEqualsCStr(str, "134")); 1631 } 1632 1633 { 1634 PyObjectPtr str(PyUnicode_FromFormat("%zi", 135)); 1635 EXPECT_TRUE(isUnicodeEqualsCStr(str, "135")); 1636 } 1637} 1638 1639TEST_F(UnicodeExtensionApiTest, FromFormatParsesCharacters) { 1640 PyObjectPtr str(PyUnicode_FromFormat("%c%c", 'h', 'w')); 1641 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hw")); 1642} 1643 1644TEST_F(UnicodeExtensionApiTest, FromFormatParsesPointer) { 1645 long value = 0; 1646 void* test = &value; 1647 char buff[18]; 1648 std::snprintf(buff, 18, "%p", test); 1649 PyObjectPtr str(PyUnicode_FromFormat("%p", test)); 1650 EXPECT_TRUE(isUnicodeEqualsCStr(str, buff)); 1651} 1652 1653TEST_F(UnicodeExtensionApiTest, FromFormatParsesString) { 1654 PyObjectPtr str(PyUnicode_FromFormat("%s", "UTF-8")); 1655 EXPECT_TRUE(isUnicodeEqualsCStr(str, "UTF-8")); 1656} 1657 1658TEST_F(UnicodeExtensionApiTest, FromFormatParsesStringObject) { 1659 PyObjectPtr unicode(PyUnicode_FromString("hello")); 1660 PyObjectPtr str(PyUnicode_FromFormat("%U", static_cast<PyObject*>(unicode))); 1661 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello")); 1662} 1663 1664TEST_F(UnicodeExtensionApiTest, FromFormatParsesStringObjectAndString) { 1665 PyObjectPtr unicode(PyUnicode_FromString("hello")); 1666 PyObjectPtr str( 1667 PyUnicode_FromFormat("%V", static_cast<PyObject*>(unicode), "world")); 1668 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello")); 1669} 1670 1671TEST_F(UnicodeExtensionApiTest, FromFormatParsesNullAndString) { 1672 PyObjectPtr str(PyUnicode_FromFormat("%V", nullptr, "world")); 1673 EXPECT_TRUE(isUnicodeEqualsCStr(str, "world")); 1674} 1675 1676TEST_F(UnicodeExtensionApiTest, ConcatWithNonStringFails) { 1677 PyObjectPtr i(PyLong_FromLong(1)); 1678 EXPECT_EQ(PyUnicode_Concat(i, i), nullptr); 1679 ASSERT_NE(PyErr_Occurred(), nullptr); 1680 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1681} 1682 1683TEST_F(UnicodeExtensionApiTest, ConcatWithEmptyArgumentReturnsString) { 1684 PyObjectPtr hello(PyUnicode_FromString("hello")); 1685 PyObjectPtr empty(PyUnicode_FromString("")); 1686 PyObjectPtr empty_right(PyUnicode_Concat(hello, empty)); 1687 ASSERT_EQ(PyErr_Occurred(), nullptr); 1688 EXPECT_TRUE(isUnicodeEqualsCStr(empty_right, "hello")); 1689 1690 PyObjectPtr empty_left(PyUnicode_Concat(empty, hello)); 1691 ASSERT_EQ(PyErr_Occurred(), nullptr); 1692 EXPECT_TRUE(isUnicodeEqualsCStr(empty_left, "hello")); 1693} 1694 1695TEST_F(UnicodeExtensionApiTest, ConcatWithTwoStringsReturnsString) { 1696 PyObjectPtr hello(PyUnicode_FromString("hello ")); 1697 PyObjectPtr world(PyUnicode_FromString("world")); 1698 PyObjectPtr result(PyUnicode_Concat(hello, world)); 1699 ASSERT_EQ(PyErr_Occurred(), nullptr); 1700 EXPECT_TRUE(isUnicodeEqualsCStr(result, "hello world")); 1701} 1702 1703TEST_F(UnicodeExtensionApiTest, AppendWithNullFails) { 1704 PyUnicode_Append(nullptr, nullptr); 1705 ASSERT_NE(PyErr_Occurred(), nullptr); 1706 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); 1707} 1708 1709TEST_F(UnicodeExtensionApiTest, AppendWithNonStringFails) { 1710 PyObject* not_str = PyLong_FromLong(1); 1711 PyUnicode_Append(&not_str, not_str); 1712 ASSERT_NE(PyErr_Occurred(), nullptr); 1713 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); 1714} 1715 1716TEST_F(UnicodeExtensionApiTest, AppendWithEmptyArgumentReturnsString) { 1717 PyObject* hello(PyUnicode_FromString("hello")); 1718 PyObject* empty(PyUnicode_FromString("")); 1719 PyUnicode_Append(&hello, empty); 1720 ASSERT_EQ(PyErr_Occurred(), nullptr); 1721 EXPECT_TRUE(isUnicodeEqualsCStr(hello, "hello")); 1722 1723 PyUnicode_Append(&empty, hello); 1724 ASSERT_EQ(PyErr_Occurred(), nullptr); 1725 EXPECT_TRUE(isUnicodeEqualsCStr(empty, "hello")); 1726 Py_DECREF(hello); 1727 Py_DECREF(empty); 1728} 1729 1730TEST_F(UnicodeExtensionApiTest, AppendWithTwoStringsReturnsString) { 1731 PyObject* hello = PyUnicode_FromString("hello "); 1732 PyObjectPtr world(PyUnicode_FromString("world")); 1733 PyUnicode_Append(&hello, world); 1734 ASSERT_EQ(PyErr_Occurred(), nullptr); 1735 EXPECT_TRUE(isUnicodeEqualsCStr(hello, "hello world")); 1736 Py_DECREF(hello); 1737} 1738 1739TEST_F(UnicodeExtensionApiTest, AppendAndDelWithStringDecreasesRefcnt) { 1740 PyObject* hello = PyUnicode_FromString("hello "); 1741 PyObject* world = PyUnicode_FromString("world"); 1742 Py_INCREF(world); 1743 Py_ssize_t original_refcnt = Py_REFCNT(world); 1744 PyUnicode_AppendAndDel(&hello, world); 1745 1746 ASSERT_EQ(PyErr_Occurred(), nullptr); 1747 EXPECT_TRUE(isUnicodeEqualsCStr(hello, "hello world")); 1748 Py_DECREF(hello); 1749 1750 EXPECT_LT(Py_REFCNT(world), original_refcnt); 1751 Py_DECREF(world); 1752} 1753 1754TEST_F(UnicodeExtensionApiTest, EncodeFSDefaultWithNonStringReturnsNull) { 1755 PyObjectPtr bytes(PyUnicode_EncodeFSDefault(Py_None)); 1756 EXPECT_EQ(bytes, nullptr); 1757 ASSERT_NE(PyErr_Occurred(), nullptr); 1758 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1759} 1760 1761TEST_F(UnicodeExtensionApiTest, EncodeFSDefaultReturnsBytes) { 1762 PyObjectPtr unicode(PyUnicode_FromString("foo")); 1763 PyObjectPtr bytes(PyUnicode_EncodeFSDefault(unicode)); 1764 EXPECT_EQ(PyErr_Occurred(), nullptr); 1765 ASSERT_TRUE(PyBytes_Check(bytes)); 1766 EXPECT_EQ(PyBytes_Size(bytes), 3); 1767 EXPECT_STREQ(PyBytes_AsString(bytes), "foo"); 1768} 1769 1770TEST_F(UnicodeExtensionApiTest, EncodeLocaleWithEmbeddedNulRaisesValueError) { 1771 PyObjectPtr nul_str(PyUnicode_FromStringAndSize("a\0b", 3)); 1772 PyObject* bytes = PyUnicode_EncodeLocale(nul_str, nullptr); 1773 ASSERT_NE(PyErr_Occurred(), nullptr); 1774 EXPECT_EQ(bytes, nullptr); 1775 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 1776} 1777 1778TEST_F(UnicodeExtensionApiTest, 1779 EncodeLocaleWithUnknownErrorHandlerNameRaisesValueError) { 1780 PyObjectPtr str(PyUnicode_FromStringAndSize("abc", 3)); 1781 PyObject* bytes = PyUnicode_EncodeLocale(str, "nonexistant"); 1782 ASSERT_NE(PyErr_Occurred(), nullptr); 1783 EXPECT_EQ(bytes, nullptr); 1784 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 1785} 1786 1787TEST_F(UnicodeExtensionApiTest, EncodeLocaleWithStrReturnsBytes) { 1788 PyObjectPtr str(PyUnicode_FromStringAndSize("abc", 3)); 1789 PyObjectPtr bytes(PyUnicode_EncodeLocale(str, nullptr)); 1790 ASSERT_EQ(PyErr_Occurred(), nullptr); 1791 ASSERT_TRUE(PyBytes_Check(bytes)); 1792 EXPECT_STREQ(PyBytes_AsString(bytes), "abc"); 1793} 1794 1795TEST_F(UnicodeExtensionApiTest, 1796 EncodeLocaleWithStrictErrorsAndSurrogatesRaisesError) { 1797 PyObjectPtr str(PyUnicode_DecodeLocale("abc\x80", "surrogateescape")); 1798 PyObjectPtr bytes(PyUnicode_EncodeLocale(str, "strict")); 1799 ASSERT_NE(PyErr_Occurred(), nullptr); 1800 ASSERT_EQ(bytes, nullptr); 1801 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); 1802} 1803 1804TEST_F(UnicodeExtensionApiTest, 1805 EncodeLocaleWithSurrogateescapeAndSurrogatesReturnsBytes) { 1806 PyObjectPtr str(PyUnicode_DecodeLocale("abc\x80", "surrogateescape")); 1807 PyObjectPtr bytes(PyUnicode_EncodeLocale(str, "surrogateescape")); 1808 ASSERT_EQ(PyErr_Occurred(), nullptr); 1809 ASSERT_TRUE(PyBytes_Check(bytes)); 1810 EXPECT_STREQ(PyBytes_AsString(bytes), "abc\x80"); 1811} 1812 1813TEST_F(UnicodeExtensionApiTest, FSConverterWithNullSetAddrToNull) { 1814 PyObject* result = PyLong_FromLong(1); 1815 ASSERT_EQ(PyUnicode_FSConverter(nullptr, &result), 1); 1816 ASSERT_EQ(PyErr_Occurred(), nullptr); 1817 EXPECT_EQ(result, nullptr); 1818} 1819 1820TEST_F(UnicodeExtensionApiTest, FSConverterWithBytesReturnsBytes) { 1821 PyObjectPtr bytes(PyBytes_FromString("foo")); 1822 PyObject* result = nullptr; 1823 ASSERT_EQ(PyUnicode_FSConverter(bytes, &result), Py_CLEANUP_SUPPORTED); 1824 ASSERT_EQ(PyErr_Occurred(), nullptr); 1825 ASSERT_NE(result, nullptr); 1826 EXPECT_TRUE(PyBytes_Check(result)); 1827 Py_DECREF(result); 1828} 1829 1830TEST_F(UnicodeExtensionApiTest, FSConverterWithUnicodeReturnsBytes) { 1831 PyObjectPtr unicode(PyUnicode_FromString("foo")); 1832 PyObject* result = nullptr; 1833 ASSERT_EQ(PyUnicode_FSConverter(unicode, &result), Py_CLEANUP_SUPPORTED); 1834 ASSERT_EQ(PyErr_Occurred(), nullptr); 1835 ASSERT_NE(result, nullptr); 1836 EXPECT_TRUE(PyBytes_Check(result)); 1837 Py_DECREF(result); 1838} 1839 1840TEST_F(UnicodeExtensionApiTest, FSConverterCallsDunderFspath) { 1841 PyRun_SimpleString(R"( 1842class C: 1843 def __fspath__(self): 1844 return "foo" 1845 1846foo = C() 1847)"); 1848 PyObjectPtr path(mainModuleGet("foo")); 1849 PyObject* result = nullptr; 1850 ASSERT_EQ(PyUnicode_FSConverter(path, &result), Py_CLEANUP_SUPPORTED); 1851 ASSERT_EQ(PyErr_Occurred(), nullptr); 1852 ASSERT_NE(result, nullptr); 1853 EXPECT_TRUE(PyBytes_Check(result)); 1854 Py_DECREF(result); 1855} 1856 1857TEST_F(UnicodeExtensionApiTest, FSConverterWithBytesSubclassReturnsSubclass) { 1858 PyRun_SimpleString(R"( 1859class C(bytes): 1860 pass 1861 1862foo = C() 1863)"); 1864 PyObjectPtr path(mainModuleGet("foo")); 1865 PyObject* result = nullptr; 1866 ASSERT_EQ(PyUnicode_FSConverter(path, &result), Py_CLEANUP_SUPPORTED); 1867 ASSERT_EQ(PyErr_Occurred(), nullptr); 1868 ASSERT_NE(result, nullptr); 1869 EXPECT_TRUE(PyBytes_Check(result)); 1870 EXPECT_EQ(result, path); 1871 Py_DECREF(result); 1872} 1873 1874TEST_F(UnicodeExtensionApiTest, FSConverterWithEmbeddedNullRaisesValueError) { 1875 PyObjectPtr bytes(PyBytes_FromStringAndSize("foo \0 bar", 9)); 1876 PyObject* result = nullptr; 1877 ASSERT_EQ(PyUnicode_FSConverter(bytes, &result), 0); 1878 ASSERT_NE(PyErr_Occurred(), nullptr); 1879 ASSERT_EQ(result, nullptr); 1880 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 1881} 1882 1883TEST_F(UnicodeExtensionApiTest, InternInPlaceWritesNewHandleBack) { 1884 PyObject* a = PyUnicode_FromString("hello world aaaaaaaaaa"); 1885 PyObject* b = PyUnicode_FromString("hello world aaaaaaaaaa"); 1886 PyObject* b_addr = b; 1887 EXPECT_NE(a, b); 1888 PyUnicode_InternInPlace(&a); 1889 EXPECT_EQ(PyErr_Occurred(), nullptr); 1890 PyUnicode_InternInPlace(&b); 1891 EXPECT_EQ(PyErr_Occurred(), nullptr); 1892 EXPECT_NE(b, b_addr); 1893 Py_DECREF(a); 1894 Py_DECREF(b); 1895} 1896 1897TEST_F(UnicodeExtensionApiTest, InternFromStringReturnsStr) { 1898 PyObjectPtr result(PyUnicode_InternFromString("szechuan broccoli")); 1899 ASSERT_NE(result, nullptr); 1900 EXPECT_EQ(PyErr_Occurred(), nullptr); 1901 EXPECT_TRUE(PyUnicode_CheckExact(result)); 1902} 1903 1904TEST_F(UnicodeExtensionApiTest, JoinWithEmptySeqReturnsEmptyStr) { 1905 PyObjectPtr sep(PyUnicode_FromString(".")); 1906 PyObjectPtr seq(PyList_New(0)); 1907 PyObjectPtr result(PyUnicode_Join(sep, seq)); 1908 EXPECT_EQ(PyErr_Occurred(), nullptr); 1909 EXPECT_TRUE(isUnicodeEqualsCStr(result, "")); 1910} 1911 1912TEST_F(UnicodeExtensionApiTest, JoinWithSeqJoinsElements) { 1913 PyObjectPtr sep(PyUnicode_FromString(".")); 1914 PyObjectPtr seq(PyList_New(0)); 1915 PyObjectPtr elt0(PyUnicode_FromString("a")); 1916 PyList_Append(seq, elt0); 1917 PyObjectPtr elt1(PyUnicode_FromString("b")); 1918 PyList_Append(seq, elt1); 1919 PyObjectPtr result(PyUnicode_Join(sep, seq)); 1920 EXPECT_EQ(PyErr_Occurred(), nullptr); 1921 EXPECT_TRUE(isUnicodeEqualsCStr(result, "a.b")); 1922} 1923 1924TEST_F(UnicodeExtensionApiTest, JoinWithSeqContainingNonStrRaisesTypeError) { 1925 PyObjectPtr sep(PyUnicode_FromString(".")); 1926 PyObjectPtr seq(PyList_New(0)); 1927 PyList_Append(seq, Py_None); 1928 PyObjectPtr result(PyUnicode_Join(sep, seq)); 1929 EXPECT_EQ(result, nullptr); 1930 ASSERT_NE(PyErr_Occurred(), nullptr); 1931 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1932} 1933 1934TEST_F(UnicodeExtensionApiTest, JoinWithSeqContainingBytesRaisesTypeError) { 1935 PyObjectPtr sep(PyUnicode_FromString(".")); 1936 PyObjectPtr seq(PyList_New(0)); 1937 PyObjectPtr elt0(PyBytes_FromString("a")); 1938 PyList_Append(seq, elt0); 1939 PyObjectPtr result(PyUnicode_Join(sep, seq)); 1940 EXPECT_EQ(result, nullptr); 1941 ASSERT_NE(PyErr_Occurred(), nullptr); 1942 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1943} 1944 1945TEST_F(UnicodeExtensionApiTest, PartitionWithNonStrStrRaisesTypeError) { 1946 PyObjectPtr sep(PyUnicode_FromString(".")); 1947 PyObjectPtr result(PyUnicode_Partition(Py_None, sep)); 1948 EXPECT_EQ(result, nullptr); 1949 ASSERT_NE(PyErr_Occurred(), nullptr); 1950 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1951} 1952 1953TEST_F(UnicodeExtensionApiTest, PartitionWithNonStrSepRaisesTypeError) { 1954 PyObjectPtr str(PyUnicode_FromString("hello")); 1955 PyObjectPtr result(PyUnicode_Partition(str, Py_None)); 1956 EXPECT_EQ(result, nullptr); 1957 ASSERT_NE(PyErr_Occurred(), nullptr); 1958 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1959} 1960 1961TEST_F(UnicodeExtensionApiTest, PartitionReturnsTuple) { 1962 PyObjectPtr str(PyUnicode_FromString("a.b")); 1963 PyObjectPtr sep(PyUnicode_FromString(".")); 1964 PyObjectPtr result(PyUnicode_Partition(str, sep)); 1965 EXPECT_EQ(PyErr_Occurred(), nullptr); 1966 ASSERT_NE(result, nullptr); 1967 EXPECT_TRUE(PyTuple_CheckExact(result)); 1968} 1969 1970TEST_F(UnicodeExtensionApiTest, RPartitionWithNonStrStrRaisesTypeError) { 1971 PyObjectPtr sep(PyUnicode_FromString(".")); 1972 PyObjectPtr result(PyUnicode_RPartition(Py_None, sep)); 1973 EXPECT_EQ(result, nullptr); 1974 ASSERT_NE(PyErr_Occurred(), nullptr); 1975 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1976} 1977 1978TEST_F(UnicodeExtensionApiTest, RPartitionWithNonStrSepRaisesTypeError) { 1979 PyObjectPtr str(PyUnicode_FromString("hello")); 1980 PyObjectPtr result(PyUnicode_RPartition(str, Py_None)); 1981 EXPECT_EQ(result, nullptr); 1982 ASSERT_NE(PyErr_Occurred(), nullptr); 1983 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 1984} 1985 1986TEST_F(UnicodeExtensionApiTest, RPartitionReturnsTuple) { 1987 PyObjectPtr str(PyUnicode_FromString("a.b")); 1988 PyObjectPtr sep(PyUnicode_FromString(".")); 1989 PyObjectPtr result(PyUnicode_RPartition(str, sep)); 1990 EXPECT_EQ(PyErr_Occurred(), nullptr); 1991 ASSERT_NE(result, nullptr); 1992 EXPECT_TRUE(PyTuple_CheckExact(result)); 1993} 1994 1995TEST_F(UnicodeExtensionApiTest, SplitlinesWithNonStrStrRaisesTypeError) { 1996 PyObjectPtr result(PyUnicode_Splitlines(Py_None, 0)); 1997 EXPECT_EQ(result, nullptr); 1998 ASSERT_NE(PyErr_Occurred(), nullptr); 1999 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2000} 2001 2002TEST_F(UnicodeExtensionApiTest, SplitlinesReturnsList) { 2003 PyObjectPtr str(PyUnicode_FromString("hello\nworld")); 2004 PyObjectPtr result(PyUnicode_Splitlines(str, 1)); 2005 EXPECT_EQ(PyErr_Occurred(), nullptr); 2006 ASSERT_NE(result, nullptr); 2007 EXPECT_TRUE(PyList_CheckExact(result)); 2008} 2009 2010TEST_F(UnicodeExtensionApiTest, SplitlinesWithSubClassReturnsList) { 2011 PyRun_SimpleString(R"( 2012class SubStr(str): pass 2013 2014str_val = SubStr('hello\nworld') 2015)"); 2016 PyObjectPtr str(mainModuleGet("str_val")); 2017 PyObjectPtr result(PyUnicode_Splitlines(str, 1)); 2018 EXPECT_EQ(PyErr_Occurred(), nullptr); 2019 ASSERT_NE(result, nullptr); 2020 EXPECT_TRUE(PyList_CheckExact(result)); 2021} 2022 2023TEST_F(UnicodeExtensionApiTest, SplitlinesWithNoNewlinesReturnsIdEqualString) { 2024 PyObjectPtr str(PyUnicode_FromString("hello")); 2025 PyObjectPtr result(PyUnicode_Splitlines(str, 1)); 2026 EXPECT_EQ(PyErr_Occurred(), nullptr); 2027 ASSERT_NE(result, nullptr); 2028 ASSERT_TRUE(PyList_CheckExact(result)); 2029 ASSERT_EQ(PyList_Size(result), 1); 2030 PyObject* str_elt = PyList_GetItem(result, 0); 2031 EXPECT_EQ(str, str_elt); 2032} 2033 2034TEST_F(UnicodeExtensionApiTest, SplitWithNonStrStrRaisesTypeError) { 2035 PyObjectPtr sep(PyUnicode_FromString(".")); 2036 PyObjectPtr result(PyUnicode_Split(Py_None, sep, 5)); 2037 EXPECT_EQ(result, nullptr); 2038 ASSERT_NE(PyErr_Occurred(), nullptr); 2039 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2040} 2041 2042TEST_F(UnicodeExtensionApiTest, SplitWithNonStrSepRaisesTypeError) { 2043 PyObjectPtr str(PyUnicode_FromString("hello")); 2044 PyObjectPtr sep(PyLong_FromLong(8)); 2045 PyObjectPtr result(PyUnicode_Split(str, sep, 5)); 2046 EXPECT_EQ(result, nullptr); 2047 ASSERT_NE(PyErr_Occurred(), nullptr); 2048 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2049} 2050 2051TEST_F(UnicodeExtensionApiTest, SplitReturnsList) { 2052 PyObjectPtr str(PyUnicode_FromString("a.b")); 2053 PyObjectPtr sep(PyUnicode_FromString(".")); 2054 PyObjectPtr result(PyUnicode_Split(str, sep, 5)); 2055 EXPECT_EQ(PyErr_Occurred(), nullptr); 2056 ASSERT_NE(result, nullptr); 2057 EXPECT_TRUE(PyList_CheckExact(result)); 2058} 2059 2060TEST_F(UnicodeExtensionApiTest, RSplitWithNonStrStrRaisesTypeError) { 2061 PyObjectPtr sep(PyUnicode_FromString(".")); 2062 PyObjectPtr result(PyUnicode_RSplit(Py_None, sep, 5)); 2063 EXPECT_EQ(result, nullptr); 2064 ASSERT_NE(PyErr_Occurred(), nullptr); 2065 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2066} 2067 2068TEST_F(UnicodeExtensionApiTest, RSplitWithNonStrSepRaisesTypeError) { 2069 PyObjectPtr str(PyUnicode_FromString("hello")); 2070 PyObjectPtr sep(PyLong_FromLong(8)); 2071 PyObjectPtr result(PyUnicode_RSplit(str, sep, 5)); 2072 EXPECT_EQ(result, nullptr); 2073 ASSERT_NE(PyErr_Occurred(), nullptr); 2074 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2075} 2076 2077TEST_F(UnicodeExtensionApiTest, RSplitReturnsList) { 2078 PyObjectPtr str(PyUnicode_FromString("a.b")); 2079 PyObjectPtr sep(PyUnicode_FromString(".")); 2080 PyObjectPtr result(PyUnicode_RSplit(str, sep, 5)); 2081 EXPECT_EQ(PyErr_Occurred(), nullptr); 2082 ASSERT_NE(result, nullptr); 2083 EXPECT_TRUE(PyList_CheckExact(result)); 2084} 2085 2086TEST_F(UnicodeExtensionApiTest, StrlenWithEmptyStrReturnsZero) { 2087 const wchar_t* str = L""; 2088#pragma GCC diagnostic push 2089#pragma GCC diagnostic ignored "-Wdeprecated-declarations" 2090 ASSERT_EQ(Py_UNICODE_strlen(str), 0U); 2091#pragma GCC diagnostic pop 2092} 2093 2094TEST_F(UnicodeExtensionApiTest, StrlenWithStrReturnsNumberOfChars) { 2095 const wchar_t* str = L"hello"; 2096#pragma GCC diagnostic push 2097#pragma GCC diagnostic ignored "-Wdeprecated-declarations" 2098 ASSERT_EQ(Py_UNICODE_strlen(str), 5U); 2099#pragma GCC diagnostic pop 2100} 2101 2102TEST_F(UnicodeExtensionApiTest, SubstringWithNegativeStartRaisesIndexError) { 2103 PyObjectPtr str(PyUnicode_FromString("foo")); 2104 ASSERT_EQ(PyUnicode_Substring(str, -1, 3), nullptr); 2105 ASSERT_NE(PyErr_Occurred(), nullptr); 2106 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError)); 2107} 2108 2109TEST_F(UnicodeExtensionApiTest, SubstringWithNegativeEndRaisesIndexError) { 2110 PyObjectPtr str(PyUnicode_FromString("foo")); 2111 ASSERT_EQ(PyUnicode_Substring(str, 0, -3), nullptr); 2112 ASSERT_NE(PyErr_Occurred(), nullptr); 2113 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError)); 2114} 2115 2116TEST_F(UnicodeExtensionApiTest, SubstringWithFullStringReturnsSameObject) { 2117 PyObjectPtr str(PyUnicode_FromString("foo")); 2118 PyObjectPtr result(PyUnicode_Substring(str, 0, 5)); 2119 EXPECT_EQ(PyErr_Occurred(), nullptr); 2120 EXPECT_EQ(result, str); 2121} 2122 2123TEST_F(UnicodeExtensionApiTest, SubstringWithSameStartAndEndReturnsEmpty) { 2124 PyObjectPtr str(PyUnicode_FromString("foo")); 2125 PyObjectPtr result(PyUnicode_Substring(str, 2, 2)); 2126 EXPECT_EQ(PyErr_Occurred(), nullptr); 2127 ASSERT_TRUE(PyUnicode_CheckExact(result)); 2128 EXPECT_STREQ(PyUnicode_AsUTF8(result), ""); 2129} 2130 2131TEST_F(UnicodeExtensionApiTest, SubstringWithASCIIReturnsSubstring) { 2132 PyObjectPtr str(PyUnicode_FromString("Hello world!")); 2133 PyObjectPtr result(PyUnicode_Substring(str, 3, 8)); 2134 EXPECT_EQ(PyErr_Occurred(), nullptr); 2135 ASSERT_TRUE(PyUnicode_CheckExact(result)); 2136 EXPECT_STREQ(PyUnicode_AsUTF8(result), "lo wo"); 2137} 2138 2139TEST_F(UnicodeExtensionApiTest, SubstringWithSubClassReturnsSubstring) { 2140 PyRun_SimpleString(R"( 2141class SubStr(str): pass 2142 2143str_val = SubStr('Hello world!') 2144)"); 2145 PyObjectPtr str(mainModuleGet("str_val")); 2146 PyObjectPtr result(PyUnicode_Substring(str, 3, 8)); 2147 EXPECT_EQ(PyErr_Occurred(), nullptr); 2148 ASSERT_TRUE(PyUnicode_CheckExact(result)); 2149 EXPECT_STREQ(PyUnicode_AsUTF8(result), "lo wo"); 2150} 2151 2152TEST_F(UnicodeExtensionApiTest, SubstringCountsCodePoints) { 2153 PyObjectPtr str(PyUnicode_FromString("cre\u0300me bru\u0302le\u0301e")); 2154 PyObjectPtr result(PyUnicode_Substring(str, 2, 11)); 2155 EXPECT_EQ(PyErr_Occurred(), nullptr); 2156 ASSERT_TRUE(PyUnicode_CheckExact(result)); 2157 EXPECT_STREQ(PyUnicode_AsUTF8(result), "e\u0300me bru\u0302"); 2158} 2159 2160TEST_F(UnicodeExtensionApiTest, TailmatchSuffixWithEmptyStringsReturnsOne) { 2161 PyObjectPtr str(PyUnicode_FromString("")); 2162 PyObjectPtr substr(PyUnicode_FromString("")); 2163 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 0, 0, 1), 1); 2164 EXPECT_EQ(PyErr_Occurred(), nullptr); 2165} 2166 2167TEST_F(UnicodeExtensionApiTest, TailmatchPrefixWithEmptyStringsReturnsOne) { 2168 PyObjectPtr str(PyUnicode_FromString("")); 2169 PyObjectPtr substr(PyUnicode_FromString("")); 2170 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 0, 0, -1), 1); 2171 EXPECT_EQ(PyErr_Occurred(), nullptr); 2172} 2173 2174TEST_F(UnicodeExtensionApiTest, TailmatchPrefixWithMatchReturnsOne) { 2175 PyObjectPtr str(PyUnicode_FromString("abcde")); 2176 PyObjectPtr substr(PyUnicode_FromString("cde")); 2177 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 2, 9, -1), 1); 2178 EXPECT_EQ(PyErr_Occurred(), nullptr); 2179} 2180 2181TEST_F(UnicodeExtensionApiTest, TailmatchPrefixWithoutMatchReturnsZero) { 2182 PyObjectPtr str(PyUnicode_FromString("abcde")); 2183 PyObjectPtr substr(PyUnicode_FromString("cde")); 2184 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 2, 4, -1), 0); 2185 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 1, 6, -1), 0); 2186 2187 PyObjectPtr substr2(PyUnicode_FromString("cdf")); 2188 EXPECT_EQ(PyUnicode_Tailmatch(str, substr2, 2, 6, -1), 0); 2189 EXPECT_EQ(PyErr_Occurred(), nullptr); 2190} 2191 2192TEST_F(UnicodeExtensionApiTest, TailmatchSuffixWithMatchReturnsOne) { 2193 PyObjectPtr str(PyUnicode_FromString("abcde")); 2194 PyObjectPtr substr(PyUnicode_FromString("cde")); 2195 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 1, 5, 1), 1); 2196 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 1, 6, 1), 1); 2197 EXPECT_EQ(PyErr_Occurred(), nullptr); 2198} 2199 2200TEST_F(UnicodeExtensionApiTest, TailmatchSuffixWithoutMatchReturnsZero) { 2201 PyObjectPtr str(PyUnicode_FromString("abcde")); 2202 PyObjectPtr substr(PyUnicode_FromString("cde")); 2203 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 3, 5, 1), 0); 2204 PyObjectPtr substr2(PyUnicode_FromString("bde")); 2205 EXPECT_EQ(PyUnicode_Tailmatch(str, substr2, 1, 5, 1), 0); 2206 EXPECT_EQ(PyErr_Occurred(), nullptr); 2207} 2208 2209TEST_F(UnicodeExtensionApiTest, TailmatchWithLargerNeedleReturnsZero) { 2210 PyObjectPtr str(PyUnicode_FromString("abcde")); 2211 PyObjectPtr substr(PyUnicode_FromString("bananas")); 2212 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 3, 5, 1), 0); 2213 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 3, 5, -1), 0); 2214 EXPECT_EQ(PyErr_Occurred(), nullptr); 2215} 2216 2217TEST_F(UnicodeExtensionApiTest, TailmatchWithNonStrHaystackRaisesTypeError) { 2218 PyObjectPtr str(PyUnicode_FromString("abcde")); 2219 PyObjectPtr num(PyLong_FromLong(7)); 2220 EXPECT_EQ(PyUnicode_Tailmatch(num, str, 1, 6, 1), -1); 2221 ASSERT_NE(PyErr_Occurred(), nullptr); 2222 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2223} 2224 2225TEST_F(UnicodeExtensionApiTest, TailmatchWithNonStrNeedleRaisesTypeError) { 2226 PyObjectPtr str(PyUnicode_FromString("abcde")); 2227 PyObjectPtr num(PyLong_FromLong(7)); 2228 EXPECT_EQ(PyUnicode_Tailmatch(str, num, 1, 6, 1), -1); 2229 ASSERT_NE(PyErr_Occurred(), nullptr); 2230 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2231} 2232 2233TEST_F(UnicodeExtensionApiTest, NewWithInvalidSizeReturnsError) { 2234 EXPECT_EQ(PyUnicode_New(-1, 0), nullptr); 2235 ASSERT_NE(PyErr_Occurred(), nullptr); 2236 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); 2237} 2238 2239TEST_F(UnicodeExtensionApiTest, NewWithInvalidMaxCharReturnsError) { 2240 EXPECT_EQ(PyUnicode_New(1, 0x11FFFF), nullptr); 2241 ASSERT_NE(PyErr_Occurred(), nullptr); 2242 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); 2243} 2244 2245TEST_F(UnicodeExtensionApiTest, NewWithZeroSizeAndInvalidMaxCharReturnsStr) { 2246 PyObjectPtr empty(PyUnicode_New(0, 0x11FFFF)); 2247 ASSERT_EQ(PyErr_Occurred(), nullptr); 2248 EXPECT_TRUE(PyUnicode_CheckExact(empty)); 2249 EXPECT_TRUE(isUnicodeEqualsCStr(empty, "")); 2250} 2251 2252TEST_F(UnicodeExtensionApiTest, FromKindAndDataWithNegativeOneRaiseError) { 2253 char c = 'a'; 2254 PyObjectPtr empty(PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, &c, -1)); 2255 EXPECT_EQ(empty, nullptr); 2256 ASSERT_NE(PyErr_Occurred(), nullptr); 2257 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 2258} 2259 2260TEST_F(UnicodeExtensionApiTest, FromKindAndDataWithInvalidKindRaiseError) { 2261 char c = 'a'; 2262 PyObjectPtr empty(PyUnicode_FromKindAndData(100, &c, 1)); 2263 EXPECT_EQ(empty, nullptr); 2264 ASSERT_NE(PyErr_Occurred(), nullptr); 2265 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); 2266} 2267 2268TEST_F(UnicodeExtensionApiTest, 2269 FromKindAndDataWithOneByteKindAndASCIICodePointsReturnsStr) { 2270 Py_UCS1 buffer[] = {'h', 'e', 'l', 'l', 'o'}; 2271 PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, buffer, 2272 Py_ARRAY_LENGTH(buffer))); 2273 ASSERT_EQ(PyErr_Occurred(), nullptr); 2274 ASSERT_TRUE(PyUnicode_CheckExact(str)); 2275 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "hello")); 2276} 2277 2278TEST_F(UnicodeExtensionApiTest, 2279 FromKindAndDataWithOneByteKindAndLatin1CodePointsReturnsStr) { 2280 Py_UCS1 buffer[] = {'h', 0xe4, 'l', 'l', 'o'}; 2281 PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, buffer, 2282 Py_ARRAY_LENGTH(buffer))); 2283 ASSERT_EQ(PyErr_Occurred(), nullptr); 2284 ASSERT_TRUE(PyUnicode_CheckExact(str)); 2285 EXPECT_STREQ(PyUnicode_AsUTF8(str), "h\xc3\xa4llo"); 2286} 2287 2288TEST_F(UnicodeExtensionApiTest, 2289 FromKindAndDataWithTwoByteKindAndBMPCodePointsReturnsStr) { 2290 Py_UCS2 buffer[] = {'h', 0xe4, 'l', 0x2cc0, 'o'}; 2291 PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, buffer, 2292 Py_ARRAY_LENGTH(buffer))); 2293 ASSERT_EQ(PyErr_Occurred(), nullptr); 2294 ASSERT_TRUE(PyUnicode_CheckExact(str)); 2295 EXPECT_STREQ(PyUnicode_AsUTF8(str), "h\xc3\xa4l\xe2\xb3\x80o"); 2296} 2297 2298TEST_F(UnicodeExtensionApiTest, 2299 FromKindAndDataWithFourByteKindAndNonBMPCodePointsReturnsStr) { 2300 Py_UCS4 buffer[] = {0x1f192, 'h', 0xe4, 'l', 0x2cc0}; 2301 PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, 2302 Py_ARRAY_LENGTH(buffer))); 2303 ASSERT_EQ(PyErr_Occurred(), nullptr); 2304 ASSERT_TRUE(PyUnicode_CheckExact(str)); 2305 EXPECT_STREQ(PyUnicode_AsUTF8(str), "\xf0\x9f\x86\x92h\xc3\xa4l\xe2\xb3\x80"); 2306} 2307 2308TEST_F(UnicodeExtensionApiTest, ContainsWithNonStrSelfRaisesTypeError) { 2309 PyObjectPtr self(PyLong_FromLong(7)); 2310 PyObjectPtr other(PyUnicode_FromString("hello")); 2311 EXPECT_EQ(PyUnicode_Contains(self, other), -1); 2312 ASSERT_NE(PyErr_Occurred(), nullptr); 2313 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2314} 2315 2316TEST_F(UnicodeExtensionApiTest, ContainsWithNonStrOtherRaisesTypeError) { 2317 PyObjectPtr self(PyUnicode_FromString("hello")); 2318 PyObjectPtr other(PyLong_FromLong(7)); 2319 EXPECT_EQ(PyUnicode_Contains(self, other), -1); 2320 ASSERT_NE(PyErr_Occurred(), nullptr); 2321 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2322} 2323 2324TEST_F(UnicodeExtensionApiTest, ContainsWithPresentSubstrReturnsTrue) { 2325 PyObjectPtr self(PyUnicode_FromString("foo")); 2326 PyObjectPtr other(PyUnicode_FromString("f")); 2327 EXPECT_EQ(PyUnicode_Contains(self, other), 1); 2328 EXPECT_EQ(PyErr_Occurred(), nullptr); 2329} 2330 2331TEST_F(UnicodeExtensionApiTest, ContainsWithNotPresentSubstrReturnsTrue) { 2332 PyObjectPtr self(PyUnicode_FromString("foo")); 2333 PyObjectPtr other(PyUnicode_FromString("q")); 2334 EXPECT_EQ(PyUnicode_Contains(self, other), 0); 2335 EXPECT_EQ(PyErr_Occurred(), nullptr); 2336} 2337 2338TEST_F(UnicodeExtensionApiTest, NormalizeEncodingEscapesMidStringPunctuation) { 2339 char buffer[11] = {0}; 2340 EXPECT_EQ(_Py_normalize_encoding("utf-8", buffer, sizeof(buffer)), 1); 2341 EXPECT_STREQ(buffer, "utf_8"); 2342 EXPECT_EQ(_Py_normalize_encoding("utf}8", buffer, sizeof(buffer)), 1); 2343 EXPECT_STREQ(buffer, "utf_8"); 2344} 2345 2346TEST_F(UnicodeExtensionApiTest, 2347 NormalizeEncodingIgnoresEndOfStringPunctuation) { 2348 char buffer[11] = {0}; 2349 EXPECT_EQ(_Py_normalize_encoding("_utf8", buffer, sizeof(buffer)), 1); 2350 EXPECT_STREQ(buffer, "utf8"); 2351 EXPECT_EQ(_Py_normalize_encoding("utf8_", buffer, sizeof(buffer)), 1); 2352 EXPECT_STREQ(buffer, "utf8"); 2353} 2354 2355TEST_F(UnicodeExtensionApiTest, NormalizeEncodingProperlyLowercases) { 2356 char buffer[11] = {0}; 2357 EXPECT_EQ(_Py_normalize_encoding("ASCII", buffer, sizeof(buffer)), 1); 2358 EXPECT_STREQ(buffer, "ascii"); 2359} 2360 2361TEST_F(UnicodeExtensionApiTest, 2362 NormalizeEncodingWithTooLongStringReturnsEmptyString) { 2363 char buffer[5] = {0}; 2364 EXPECT_EQ(_Py_normalize_encoding("12345", buffer, sizeof(buffer)), 0); 2365 EXPECT_STREQ(buffer, "1234"); 2366} 2367 2368TEST_F(UnicodeExtensionApiTest, 2369 DecodeLocaleAndSizeWithNullErrorValueEmbeddedNulRaisesValueError) { 2370 PyObject* self = PyUnicode_DecodeLocaleAndSize("a\0b", 3, nullptr); 2371 ASSERT_NE(PyErr_Occurred(), nullptr); 2372 EXPECT_EQ(self, nullptr); 2373 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 2374} 2375 2376TEST_F( 2377 UnicodeExtensionApiTest, 2378 DecodeLocaleAndSizeWithNullErrorValueNonNulTerminatedStrRaisesValueError) { 2379 const char data[] = {'a', 'b'}; 2380 PyObject* self = PyUnicode_DecodeLocaleAndSize(data, 1, nullptr); 2381 ASSERT_NE(PyErr_Occurred(), nullptr); 2382 EXPECT_EQ(self, nullptr); 2383 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 2384} 2385 2386TEST_F(UnicodeExtensionApiTest, 2387 DecodeLocaleAndSizeWithNullErrorValueReturnsStr) { 2388 PyObjectPtr str(PyUnicode_DecodeLocaleAndSize("abc", 3, nullptr)); 2389 ASSERT_EQ(PyErr_Occurred(), nullptr); 2390 ASSERT_TRUE(PyUnicode_CheckExact(str)); 2391 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "abc")); 2392} 2393 2394TEST_F(UnicodeExtensionApiTest, 2395 DecodeLocaleAndSizeWithNullErrorValueStrictAndSurrogatesRaisesError) { 2396 PyObject* str = PyUnicode_DecodeLocaleAndSize("abc\x80", 4, nullptr); 2397 ASSERT_EQ(str, nullptr); 2398 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)); 2399} 2400 2401TEST_F(UnicodeExtensionApiTest, 2402 DecodeLocaleAndSizeWithEmbeddedNulRaisesValueError) { 2403 PyObject* self = PyUnicode_DecodeLocaleAndSize("a\0b", 3, "strict"); 2404 ASSERT_NE(PyErr_Occurred(), nullptr); 2405 EXPECT_EQ(self, nullptr); 2406 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 2407} 2408 2409TEST_F(UnicodeExtensionApiTest, 2410 DecodeLocaleAndSizeWithNonNulTerminatedStrRaisesValueError) { 2411 const char data[] = {'a', 'b'}; 2412 PyObject* self = PyUnicode_DecodeLocaleAndSize(data, 1, "strict"); 2413 ASSERT_NE(PyErr_Occurred(), nullptr); 2414 EXPECT_EQ(self, nullptr); 2415 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 2416} 2417 2418TEST_F(UnicodeExtensionApiTest, 2419 DecodeLocaleAndSizeWithUnknownErrorHandlerNameRaisesValueError) { 2420 PyObject* self = PyUnicode_DecodeLocaleAndSize("abc", 3, "nonexistant"); 2421 ASSERT_NE(PyErr_Occurred(), nullptr); 2422 EXPECT_EQ(self, nullptr); 2423 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); 2424} 2425 2426TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithStrictReturnsStr) { 2427 PyObjectPtr str(PyUnicode_DecodeLocaleAndSize("abc", 3, "strict")); 2428 ASSERT_EQ(PyErr_Occurred(), nullptr); 2429 ASSERT_TRUE(PyUnicode_CheckExact(str)); 2430 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "abc")); 2431} 2432 2433TEST_F(UnicodeExtensionApiTest, 2434 DecodeLocaleAndSizeWithSurrogateescapeReturnsStr) { 2435 PyObjectPtr str(PyUnicode_DecodeLocaleAndSize("abc", 3, "surrogateescape")); 2436 ASSERT_EQ(PyErr_Occurred(), nullptr); 2437 ASSERT_TRUE(PyUnicode_CheckExact(str)); 2438 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "abc")); 2439} 2440 2441TEST_F(UnicodeExtensionApiTest, 2442 DecodeLocaleAndSizeWithSurrogateescapeAndSurrogatesReturnsStr) { 2443 PyObjectPtr str( 2444 PyUnicode_DecodeLocaleAndSize("abc\x80", 4, "surrogateescape")); 2445 ASSERT_EQ(PyErr_Occurred(), nullptr); 2446 ASSERT_TRUE(PyUnicode_CheckExact(str)); 2447 Py_UCS4 data[] = {'a', 'b', 'c', 0xDC80}; 2448 PyObjectPtr test(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, data, 4)); 2449 EXPECT_TRUE(_PyUnicode_EQ(str, test)); 2450} 2451 2452TEST_F(UnicodeExtensionApiTest, 2453 DecodeLocaleAndSizeWithStrictAndSurrogatesRaisesError) { 2454 PyObject* str = PyUnicode_DecodeLocaleAndSize("abc\x80", 4, "strict"); 2455 ASSERT_EQ(str, nullptr); 2456 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)); 2457} 2458 2459TEST_F(UnicodeExtensionApiTest, AsASCIIStringWithNonStringReturnsNull) { 2460 PyObjectPtr bytes(_PyUnicode_AsASCIIString(Py_None, nullptr)); 2461 ASSERT_EQ(bytes, nullptr); 2462 ASSERT_NE(PyErr_Occurred(), nullptr); 2463 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2464} 2465 2466TEST_F(UnicodeExtensionApiTest, AsASCIIStringReturnsBytes) { 2467 PyObjectPtr unicode(PyUnicode_FromString("foo")); 2468 PyObjectPtr bytes(_PyUnicode_AsASCIIString(unicode, nullptr)); 2469 ASSERT_EQ(PyErr_Occurred(), nullptr); 2470 ASSERT_TRUE(PyBytes_Check(bytes)); 2471 EXPECT_EQ(PyBytes_Size(bytes), 3); 2472 EXPECT_STREQ(PyBytes_AsString(bytes), "foo"); 2473} 2474 2475TEST_F(UnicodeExtensionApiTest, 2476 AsASCIIStringWithInvalidCodepointRaisesEncodeError) { 2477 PyObjectPtr unicode(PyUnicode_FromString("foo\u00EF")); 2478 PyObjectPtr bytes(_PyUnicode_AsASCIIString(unicode, nullptr)); 2479 ASSERT_NE(PyErr_Occurred(), nullptr); 2480 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); 2481 EXPECT_EQ(bytes, nullptr); 2482} 2483 2484TEST_F(UnicodeExtensionApiTest, AsASCIIStringWithReplaceErrorsReturnsBytes) { 2485 PyObjectPtr unicode(PyUnicode_FromString("foo\u00EF")); 2486 PyObjectPtr bytes(_PyUnicode_AsASCIIString(unicode, "replace")); 2487 ASSERT_EQ(PyErr_Occurred(), nullptr); 2488 2489 ASSERT_TRUE(PyBytes_Check(bytes)); 2490 EXPECT_EQ(PyBytes_Size(bytes), 4); 2491 EXPECT_STREQ(PyBytes_AsString(bytes), "foo?"); 2492} 2493 2494TEST_F(UnicodeExtensionApiTest, AsLatin1StringWithNonStringReturnsNull) { 2495 PyObjectPtr bytes(_PyUnicode_AsLatin1String(Py_None, nullptr)); 2496 ASSERT_EQ(bytes, nullptr); 2497 ASSERT_NE(PyErr_Occurred(), nullptr); 2498 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2499} 2500 2501TEST_F(UnicodeExtensionApiTest, AsLatin1StringReturnsBytes) { 2502 PyObjectPtr unicode(PyUnicode_FromString("foo")); 2503 PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, nullptr)); 2504 ASSERT_EQ(PyErr_Occurred(), nullptr); 2505 ASSERT_TRUE(PyBytes_Check(bytes)); 2506 EXPECT_EQ(PyBytes_Size(bytes), 3); 2507 EXPECT_STREQ(PyBytes_AsString(bytes), "foo"); 2508} 2509 2510TEST_F(UnicodeExtensionApiTest, AsLatin1StringWithLatin1ReturnsBytes) { 2511 PyObjectPtr unicode(PyUnicode_FromString("foo\u00E4")); 2512 PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, "replace")); 2513 ASSERT_EQ(PyErr_Occurred(), nullptr); 2514 2515 ASSERT_TRUE(PyBytes_Check(bytes)); 2516 EXPECT_EQ(PyBytes_Size(bytes), 4); 2517 EXPECT_STREQ(PyBytes_AsString(bytes), "foo\xE4"); 2518} 2519 2520TEST_F(UnicodeExtensionApiTest, 2521 AsLatin1StringWithInvalidCodepointRaisesEncodeError) { 2522 PyObjectPtr unicode(PyUnicode_FromString("foo\u01EF")); 2523 PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, nullptr)); 2524 ASSERT_NE(PyErr_Occurred(), nullptr); 2525 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); 2526 EXPECT_EQ(bytes, nullptr); 2527} 2528 2529TEST_F(UnicodeExtensionApiTest, AsLatin1StringWithReplaceErrorsReturnsBytes) { 2530 PyObjectPtr unicode(PyUnicode_FromString("foo\u0AE4")); 2531 PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, "replace")); 2532 ASSERT_EQ(PyErr_Occurred(), nullptr); 2533 2534 ASSERT_TRUE(PyBytes_Check(bytes)); 2535 EXPECT_EQ(PyBytes_Size(bytes), 4); 2536 EXPECT_STREQ(PyBytes_AsString(bytes), "foo?"); 2537} 2538 2539TEST_F(UnicodeExtensionApiTest, AsUTF16StringWithNonStringReturnsNull) { 2540 PyObjectPtr bytes(PyUnicode_AsUTF16String(Py_None)); 2541 ASSERT_EQ(bytes, nullptr); 2542 ASSERT_NE(PyErr_Occurred(), nullptr); 2543 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2544} 2545 2546TEST_F(UnicodeExtensionApiTest, AsUTF16StringReturnsBytes) { 2547 PyObjectPtr unicode(PyUnicode_FromString("hi")); 2548 PyObjectPtr bytes(PyUnicode_AsUTF16String(unicode)); 2549 ASSERT_EQ(PyErr_Occurred(), nullptr); 2550 ASSERT_TRUE(PyBytes_Check(bytes)); 2551 EXPECT_EQ(PyBytes_Size(bytes), 6); 2552 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfeh\x00i\x00", 6), 0); 2553} 2554 2555TEST_F(UnicodeExtensionApiTest, 2556 AsUTF16StringWithInvalidCodepointRaisesEncodeError) { 2557 PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape")); 2558 ASSERT_EQ(PyErr_Occurred(), nullptr); 2559 ASSERT_TRUE(PyUnicode_CheckExact(unicode)); 2560 PyObjectPtr bytes(PyUnicode_AsUTF16String(unicode)); 2561 ASSERT_NE(PyErr_Occurred(), nullptr); 2562 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); 2563 EXPECT_EQ(bytes, nullptr); 2564} 2565 2566TEST_F(UnicodeExtensionApiTest, AsUTF16StringWithUTF16ReturnsBytes) { 2567 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); 2568 PyObjectPtr bytes(PyUnicode_AsUTF16String(unicode)); 2569 ASSERT_EQ(PyErr_Occurred(), nullptr); 2570 2571 ASSERT_TRUE(PyBytes_Check(bytes)); 2572 EXPECT_EQ(PyBytes_Size(bytes), 10); 2573 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), 2574 "\xff\xfeh\x00\x34\xd8\xf0\xddi\x00", 10), 2575 0); 2576} 2577 2578TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16WithUTF16ReturnsBytes) { 2579 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); 2580 PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 0)); 2581 ASSERT_EQ(PyErr_Occurred(), nullptr); 2582 2583 ASSERT_TRUE(PyBytes_Check(bytes)); 2584 EXPECT_EQ(PyBytes_Size(bytes), 10); 2585 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), 2586 "\xff\xfeh\x00\x34\xd8\xf0\xddi\x00", 10), 2587 0); 2588} 2589 2590TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16LeWithUTF16ReturnsBytes) { 2591 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); 2592 PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", -1)); 2593 ASSERT_EQ(PyErr_Occurred(), nullptr); 2594 2595 ASSERT_TRUE(PyBytes_Check(bytes)); 2596 EXPECT_EQ(PyBytes_Size(bytes), 8); 2597 EXPECT_EQ( 2598 std::memcmp(PyBytes_AsString(bytes), "h\x00\x34\xd8\xf0\xddi\x00", 8), 0); 2599} 2600 2601TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16BeWithUTF16ReturnsBytes) { 2602 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); 2603 PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 1)); 2604 ASSERT_EQ(PyErr_Occurred(), nullptr); 2605 2606 ASSERT_TRUE(PyBytes_Check(bytes)); 2607 EXPECT_EQ(PyBytes_Size(bytes), 8); 2608 EXPECT_EQ( 2609 std::memcmp(PyBytes_AsString(bytes), "\x00h\xd8\x34\xdd\xf0\x00i", 8), 0); 2610} 2611 2612TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16WithReplaceReturnsBytes) { 2613 PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape")); 2614 PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 0)); 2615 ASSERT_EQ(PyErr_Occurred(), nullptr); 2616 2617 ASSERT_TRUE(PyBytes_Check(bytes)); 2618 EXPECT_EQ(PyBytes_Size(bytes), 8); 2619 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfeh\x00?\x00i\x00", 8), 2620 0); 2621} 2622 2623TEST_F(UnicodeExtensionApiTest, EncodeUTF16WithReplaceReturnsBytes) { 2624 PyObjectPtr unicode(PyUnicode_FromWideChar(L"h\xDC80i", 3)); 2625 PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 0)); 2626 ASSERT_EQ(PyErr_Occurred(), nullptr); 2627 2628 ASSERT_TRUE(PyBytes_Check(bytes)); 2629 EXPECT_EQ(PyBytes_Size(bytes), 8); 2630 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfeh\x00?\x00i\x00", 8), 2631 0); 2632} 2633 2634TEST_F(UnicodeExtensionApiTest, AsUTF32StringWithNonStringReturnsNull) { 2635 PyObjectPtr bytes(PyUnicode_AsUTF32String(Py_None)); 2636 ASSERT_EQ(bytes, nullptr); 2637 ASSERT_NE(PyErr_Occurred(), nullptr); 2638 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); 2639} 2640 2641TEST_F(UnicodeExtensionApiTest, AsUTF32StringReturnsBytes) { 2642 PyObjectPtr unicode(PyUnicode_FromString("hi")); 2643 PyObjectPtr bytes(PyUnicode_AsUTF32String(unicode)); 2644 ASSERT_EQ(PyErr_Occurred(), nullptr); 2645 ASSERT_TRUE(PyBytes_Check(bytes)); 2646 EXPECT_EQ(PyBytes_Size(bytes), 12); 2647 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), 2648 "\xff\xfe\x00\x00h\x00\x00\x00i\x00\x00\x00", 12), 2649 0); 2650} 2651 2652TEST_F(UnicodeExtensionApiTest, 2653 AsUTF32StringWithInvalidCodepointRaisesEncodeError) { 2654 PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape")); 2655 ASSERT_EQ(PyErr_Occurred(), nullptr); 2656 ASSERT_TRUE(PyUnicode_CheckExact(unicode)); 2657 PyObjectPtr bytes(PyUnicode_AsUTF32String(unicode)); 2658 ASSERT_NE(PyErr_Occurred(), nullptr); 2659 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); 2660 EXPECT_EQ(bytes, nullptr); 2661} 2662 2663TEST_F(UnicodeExtensionApiTest, AsUTF32StringWithUTF32ReturnsBytes) { 2664 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); 2665 PyObjectPtr bytes(PyUnicode_AsUTF32String(unicode)); 2666 ASSERT_EQ(PyErr_Occurred(), nullptr); 2667 2668 ASSERT_TRUE(PyBytes_Check(bytes)); 2669 EXPECT_EQ(PyBytes_Size(bytes), 16); 2670 EXPECT_EQ( 2671 std::memcmp(PyBytes_AsString(bytes), 2672 "\xff\xfe\x00\x00h\x00\x00\x00\xf0\xd1\x01\x00i\x00\x00\x00", 2673 16), 2674 0); 2675} 2676 2677TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32WithUTF32ReturnsBytes) { 2678 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); 2679 PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 0)); 2680 ASSERT_EQ(PyErr_Occurred(), nullptr); 2681 2682 ASSERT_TRUE(PyBytes_Check(bytes)); 2683 EXPECT_EQ(PyBytes_Size(bytes), 16); 2684 EXPECT_EQ( 2685 std::memcmp(PyBytes_AsString(bytes), 2686 "\xff\xfe\x00\x00h\x00\x00\x00\xf0\xd1\x01\x00i\x00\x00\x00", 2687 16), 2688 0); 2689} 2690 2691TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32LeWithUTF32ReturnsBytes) { 2692 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); 2693 PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", -1)); 2694 ASSERT_EQ(PyErr_Occurred(), nullptr); 2695 2696 ASSERT_TRUE(PyBytes_Check(bytes)); 2697 EXPECT_EQ(PyBytes_Size(bytes), 12); 2698 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), 2699 "h\x00\x00\x00\xf0\xd1\x01\x00i\x00\x00\x00", 12), 2700 0); 2701} 2702 2703TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32BeWithUTF32ReturnsBytes) { 2704 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); 2705 PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 1)); 2706 ASSERT_EQ(PyErr_Occurred(), nullptr); 2707 2708 ASSERT_TRUE(PyBytes_Check(bytes)); 2709 EXPECT_EQ(PyBytes_Size(bytes), 12); 2710 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), 2711 "\x00\x00\x00h\x00\x01\xd1\xf0\x00\x00\x00i", 12), 2712 0); 2713} 2714 2715TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32WithReplaceReturnsBytes) { 2716 PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape")); 2717 PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 0)); 2718 ASSERT_EQ(PyErr_Occurred(), nullptr); 2719 2720 ASSERT_TRUE(PyBytes_Check(bytes)); 2721 EXPECT_EQ(PyBytes_Size(bytes), 16); 2722 EXPECT_EQ(std::memcmp( 2723 PyBytes_AsString(bytes), 2724 "\xff\xfe\x00\x00h\x00\x00\x00?\x00\x00\x00i\x00\x00\x00", 16), 2725 0); 2726} 2727 2728TEST_F(UnicodeExtensionApiTest, EncodeUTF32WithReplaceReturnsBytes) { 2729 PyObjectPtr unicode(PyUnicode_FromWideChar(L"h\xDC80i", 3)); 2730 PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 0)); 2731 ASSERT_EQ(PyErr_Occurred(), nullptr); 2732 2733 ASSERT_TRUE(PyBytes_Check(bytes)); 2734 EXPECT_EQ(PyBytes_Size(bytes), 16); 2735 EXPECT_EQ(std::memcmp( 2736 PyBytes_AsString(bytes), 2737 "\xff\xfe\x00\x00h\x00\x00\x00?\x00\x00\x00i\x00\x00\x00", 16), 2738 0); 2739} 2740 2741TEST_F(UnicodeExtensionApiTest, IsAsciiWithAsciiOnlyCharsReturnsOne) { 2742 PyObjectPtr unicode(PyUnicode_FromString("foo")); 2743 EXPECT_EQ(PyUnicode_IS_ASCII(unicode.get()), 1); 2744} 2745 2746TEST_F(UnicodeExtensionApiTest, IsAsciiWithNonAsciiCharsReturnsZero) { 2747 PyObjectPtr unicode(PyUnicode_FromString("fo\u00e4o")); 2748 EXPECT_EQ(PyUnicode_IS_ASCII(unicode.get()), 0); 2749} 2750 2751TEST_F(UnicodeExtensionApiTest, IsCompactAsciiWithAsciiOnlyCharsReturnsOne) { 2752 PyObjectPtr unicode(PyUnicode_FromString("foo")); 2753 EXPECT_EQ(PyUnicode_IS_COMPACT_ASCII(unicode.get()), 1); 2754} 2755 2756TEST_F(UnicodeExtensionApiTest, IsCompactAsciiWithNonAsciiCharsReturnsZero) { 2757 PyObjectPtr unicode(PyUnicode_FromString("fo\u00e4o")); 2758 EXPECT_EQ(PyUnicode_IS_COMPACT_ASCII(unicode.get()), 0); 2759} 2760 2761TEST_F(UnicodeExtensionApiTest, IsIdentifierWithEmptyStringReturnsFalse) { 2762 PyObjectPtr unicode(PyUnicode_FromString("")); 2763 EXPECT_EQ(PyUnicode_IsIdentifier(unicode), 0); 2764 EXPECT_EQ(PyErr_Occurred(), nullptr); 2765} 2766 2767TEST_F(UnicodeExtensionApiTest, IsIdentifierWithValidIdentifierReturnsTrue) { 2768 PyObjectPtr unicode(PyUnicode_FromString("foo")); 2769 EXPECT_EQ(PyUnicode_IsIdentifier(unicode), 1); 2770 EXPECT_EQ(PyErr_Occurred(), nullptr); 2771} 2772 2773TEST_F(UnicodeExtensionApiTest, IsIdentifierWithInvalidIdentifierReturnsFalse) { 2774 PyObjectPtr unicode(PyUnicode_FromString("b$ar")); 2775 EXPECT_EQ(PyUnicode_IsIdentifier(unicode), 0); 2776 EXPECT_EQ(PyErr_Occurred(), nullptr); 2777} 2778 2779TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExWithEmptyStrReturnsZero) { 2780 const char* str = ""; 2781 wchar_t* result = nullptr; 2782 EXPECT_EQ(0, _Py_DecodeUTF8Ex(str, /*size=*/0, /*result=*/&result, 2783 /*wlen=*/nullptr, 2784 /*reason=*/nullptr, _Py_ERROR_STRICT)); 2785 ASSERT_NE(result, nullptr); 2786 EXPECT_STREQ(result, L""); 2787 PyMem_RawFree(result); 2788} 2789 2790TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExWithASCIIStrReturnsZero) { 2791 const char* str = "hello"; 2792 wchar_t* result = nullptr; 2793 EXPECT_EQ(0, 2794 _Py_DecodeUTF8Ex(str, /*size=*/std::strlen(str), /*result=*/&result, 2795 /*wlen=*/nullptr, 2796 /*reason=*/nullptr, _Py_ERROR_STRICT)); 2797 ASSERT_NE(result, nullptr); 2798 EXPECT_EQ(std::wcslen(result), size_t{5}); 2799 EXPECT_EQ('h', result[0]); 2800 EXPECT_EQ('e', result[1]); 2801 EXPECT_EQ('l', result[2]); 2802 EXPECT_EQ('l', result[3]); 2803 EXPECT_EQ('o', result[4]); 2804 PyMem_RawFree(result); 2805} 2806 2807TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExDecodesUpToSizeBytes) { 2808 const char* str = "hello"; 2809 wchar_t* result = nullptr; 2810 EXPECT_EQ(0, _Py_DecodeUTF8Ex(str, /*size=*/3, /*result=*/&result, 2811 /*wlen=*/nullptr, 2812 /*reason=*/nullptr, _Py_ERROR_STRICT)); 2813 ASSERT_NE(result, nullptr); 2814 EXPECT_EQ(std::wcslen(result), size_t{3}); 2815 EXPECT_EQ('h', result[0]); 2816 EXPECT_EQ('e', result[1]); 2817 EXPECT_EQ('l', result[2]); 2818 PyMem_RawFree(result); 2819} 2820 2821TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExWithASCIIStrSetsWlen) { 2822 const char* str = "hello"; 2823 wchar_t* result = nullptr; 2824 size_t wlen = 0; 2825 EXPECT_EQ(0, 2826 _Py_DecodeUTF8Ex(str, /*size=*/std::strlen(str), /*result=*/&result, 2827 /*wlen=*/&wlen, 2828 /*reason=*/nullptr, _Py_ERROR_STRICT)); 2829 ASSERT_NE(result, nullptr); 2830 EXPECT_EQ(std::wcslen(result), size_t{5}); 2831 EXPECT_EQ('h', result[0]); 2832 EXPECT_EQ('e', result[1]); 2833 EXPECT_EQ('l', result[2]); 2834 EXPECT_EQ('l', result[3]); 2835 EXPECT_EQ('o', result[4]); 2836 EXPECT_EQ(wlen, size_t{5}); 2837 PyMem_RawFree(result); 2838} 2839 2840TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithEmptyStrReturnsZero) { 2841 const wchar_t* str = L""; 2842 char* result = nullptr; 2843 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, 2844 /*reason=*/nullptr, /*raw_malloc=*/0, 2845 _Py_ERROR_STRICT)); 2846 ASSERT_NE(result, nullptr); 2847 EXPECT_STREQ(result, ""); 2848 PyMem_Free(result); 2849} 2850 2851TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithASCIIStrReturnsZero) { 2852 const wchar_t* str = L"hello"; 2853 char* result = nullptr; 2854 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, 2855 /*reason=*/nullptr, /*raw_malloc=*/0, 2856 _Py_ERROR_STRICT)); 2857 ASSERT_NE(result, nullptr); 2858 EXPECT_STREQ(result, "hello"); 2859 PyMem_Free(result); 2860} 2861 2862TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithRawMallocReturnsZero) { 2863 const wchar_t* str = L"hello"; 2864 char* result = nullptr; 2865 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, 2866 /*reason=*/nullptr, /*raw_malloc=*/1, 2867 _Py_ERROR_STRICT)); 2868 ASSERT_NE(result, nullptr); 2869 EXPECT_STREQ(result, "hello"); 2870 PyMem_RawFree(result); 2871} 2872 2873TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithLatin1ReturnsZero) { 2874 const wchar_t* str = L"cr\xe8me br\xfbl\xe9e"; 2875 char* result = nullptr; 2876 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, 2877 /*reason=*/nullptr, /*raw_malloc=*/0, 2878 _Py_ERROR_STRICT)); 2879 ASSERT_NE(result, nullptr); 2880 EXPECT_STREQ(result, u8"cr\xC3\xA8me br\xC3\xBBl\xE0\xBA\x9E"); 2881 PyMem_Free(result); 2882} 2883 2884TEST_F(UnicodeExtensionApiTest, 2885 EncodeUTF8ExWithoutSurrogateEscapeReturnsNegativeTwo) { 2886 const wchar_t* str = L"\x0000dc80"; 2887 char* result = reinterpret_cast<char*>(0xdeadbeef); 2888 EXPECT_EQ(-2, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, 2889 /*reason=*/nullptr, /*raw_malloc=*/0, 2890 _Py_ERROR_STRICT)); 2891 EXPECT_EQ(result, reinterpret_cast<char*>(0xdeadbeef)); 2892} 2893 2894TEST_F(UnicodeExtensionApiTest, 2895 EncodeUTF8ExWithoutSurrogateEscapeAndErrorPosSetsErrorPos) { 2896 const wchar_t* str = L"foo\x0000dc80zip"; 2897 char* result = reinterpret_cast<char*>(0xdeadbeef); 2898 size_t error_pos = 1337; 2899 EXPECT_EQ(-2, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos, 2900 /*reason=*/nullptr, /*raw_malloc=*/0, 2901 _Py_ERROR_STRICT)); 2902 EXPECT_EQ(result, reinterpret_cast<char*>(0xdeadbeef)); 2903 EXPECT_EQ(error_pos, size_t{3}); 2904} 2905 2906TEST_F(UnicodeExtensionApiTest, 2907 EncodeUTF8ExWithoutSurrogateEscapeAndReasonSetsReason) { 2908 const wchar_t* str = L"\x0000dc80"; 2909 char* result = reinterpret_cast<char*>(0xdeadbeef); 2910 const char* reason = nullptr; 2911 EXPECT_EQ(-2, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, 2912 /*reason=*/&reason, /*raw_malloc=*/0, 2913 _Py_ERROR_STRICT)); 2914 EXPECT_EQ(result, reinterpret_cast<char*>(0xdeadbeef)); 2915 ASSERT_NE(reason, nullptr); 2916 EXPECT_STREQ(reason, "encoding error"); 2917} 2918 2919TEST_F(UnicodeExtensionApiTest, 2920 EncodeUTF8ExWithSurrogateEscapeEscapesSurrogate) { 2921 const wchar_t* str = L"\x0000dc80"; 2922 char* result = nullptr; 2923 size_t error_pos = 1337; 2924 const char* reason = const_cast<const char*>(reinterpret_cast<char*>(0x1337)); 2925 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos, 2926 /*reason=*/&reason, /*raw_malloc=*/0, 2927 _Py_ERROR_SURROGATEESCAPE)); 2928 EXPECT_EQ(error_pos, size_t{1337}); 2929 EXPECT_EQ(reason, reinterpret_cast<char*>(0x1337)); 2930 ASSERT_NE(result, nullptr); 2931 EXPECT_STREQ(result, u8"\x80"); 2932 PyMem_Free(result); 2933} 2934 2935TEST_F(UnicodeExtensionApiTest, 2936 EncodeUTF8ExWithThreeByteCodePointEncodesCodePoint) { 2937 const wchar_t* str = L"\x0000efff"; 2938 char* result = nullptr; 2939 size_t error_pos = 1337; 2940 const char* reason = const_cast<const char*>(reinterpret_cast<char*>(0x1337)); 2941 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos, 2942 /*reason=*/nullptr, /*raw_malloc=*/0, 2943 _Py_ERROR_SURROGATEESCAPE)); 2944 EXPECT_EQ(error_pos, size_t{1337}); 2945 EXPECT_EQ(reason, reinterpret_cast<char*>(0x1337)); 2946 ASSERT_NE(result, nullptr); 2947 EXPECT_STREQ(result, u8"\xee\xbf\xbf"); 2948 PyMem_Free(result); 2949} 2950 2951TEST_F(UnicodeExtensionApiTest, 2952 EncodeUTF8ExWithFourByteCodePointEncodesCodePoint) { 2953 const wchar_t* str = L"\x10000"; 2954 char* result = nullptr; 2955 size_t error_pos = 1337; 2956 const char* reason = const_cast<const char*>(reinterpret_cast<char*>(0x1337)); 2957 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos, 2958 /*reason=*/nullptr, /*raw_malloc=*/0, 2959 _Py_ERROR_SURROGATEESCAPE)); 2960 EXPECT_EQ(error_pos, size_t{1337}); 2961 EXPECT_EQ(reason, reinterpret_cast<char*>(0x1337)); 2962 ASSERT_NE(result, nullptr); 2963 EXPECT_STREQ(result, u8"\xf0\x90\x80\x80"); 2964 PyMem_Free(result); 2965} 2966 2967TEST_F(UnicodeExtensionApiTest, 2968 FileSystemDefaultEncodeErrorsMatchesSysGetfilesystemencodeerrors) { 2969 PyRun_SimpleString(R"( 2970import sys 2971errors = sys.getfilesystemencodeerrors() 2972)"); 2973 PyObjectPtr errors(mainModuleGet("errors")); 2974 EXPECT_TRUE(isUnicodeEqualsCStr(errors, Py_FileSystemDefaultEncodeErrors)); 2975} 2976 2977TEST_F(UnicodeExtensionApiTest, 2978 FileSystemDefaultEncodingMatchesSysGetfilesystemencoding) { 2979 PyRun_SimpleString(R"( 2980import sys 2981encoding = sys.getfilesystemencoding() 2982)"); 2983 PyObjectPtr errors(mainModuleGet("encoding")); 2984 EXPECT_TRUE(isUnicodeEqualsCStr(errors, Py_FileSystemDefaultEncoding)); 2985} 2986 2987TEST_F(UnicodeExtensionApiTest, 2988 GetDefaultEncodingMatchesSysGetdefaultencoding) { 2989 PyRun_SimpleString(R"( 2990import sys 2991sys_default = sys.getdefaultencoding() 2992)"); 2993 PyObjectPtr sys_default(mainModuleGet("sys_default")); 2994 EXPECT_TRUE(isUnicodeEqualsCStr(sys_default, PyUnicode_GetDefaultEncoding())); 2995} 2996 2997TEST_F(UnicodeExtensionApiTest, 2998 DecodeUTF8SurrogateEscapeWithEmptyStringReturnsEmptyString) { 2999 size_t wlen; 3000 wchar_t* wpath = _Py_DecodeUTF8_surrogateescape("", 0, &wlen); 3001 EXPECT_STREQ(wpath, L""); 3002 EXPECT_EQ(wlen, size_t{0}); 3003 PyMem_RawFree(wpath); 3004} 3005 3006TEST_F(UnicodeExtensionApiTest, DecodeUTF8SurrogateEscapeReturnsWideString) { 3007 const char* path = "/foo/bar/bat"; 3008 size_t len = std::strlen(path); 3009 size_t wlen; 3010 wchar_t* wpath = _Py_DecodeUTF8_surrogateescape(path, len, &wlen); 3011 EXPECT_STREQ(wpath, L"/foo/bar/bat"); 3012 EXPECT_EQ(wlen, len); 3013 PyMem_RawFree(wpath); 3014} 3015 3016} // namespace testing 3017} // namespace py