this repo has no description
1// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
2#include <cstring>
3
4#include "Python.h"
5#include "gtest/gtest.h"
6
7#include "capi-fixture.h"
8#include "capi-testing.h"
9
10extern "C" int _Py_EncodeUTF8Ex(const wchar_t*, char**, size_t*, const char**,
11 int, _Py_error_handler);
12extern "C" wchar_t* _Py_DecodeUTF8_surrogateescape(const char*, Py_ssize_t,
13 size_t*);
14extern "C" int _Py_DecodeUTF8Ex(const char*, Py_ssize_t, wchar_t**, size_t*,
15 const char**, _Py_error_handler);
16extern "C" int _Py_normalize_encoding(const char*, char*, size_t);
17
18namespace py {
19namespace testing {
20
21using UnicodeExtensionApiTest = ExtensionApi;
22
23TEST_F(UnicodeExtensionApiTest, AsEncodedStringFromNonStringReturnsNull) {
24 EXPECT_EQ(PyUnicode_AsEncodedString(Py_None, nullptr, nullptr), nullptr);
25 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
26}
27
28TEST_F(UnicodeExtensionApiTest, AsEncodedStringWithNullSizeReturnsUTF8) {
29 const char* str = "utf-8 \xc3\xa8";
30 PyObjectPtr pyunicode(PyUnicode_FromString(str));
31
32 PyObjectPtr bytes(PyUnicode_AsEncodedString(pyunicode, nullptr, nullptr));
33 EXPECT_TRUE(isBytesEqualsCStr(bytes, str));
34}
35
36TEST_F(UnicodeExtensionApiTest, AsEncodedStringASCIIUsesErrorHandler) {
37 PyObjectPtr pyunicode(PyUnicode_FromString("non\xc3\xa8-ascii"));
38
39 PyObjectPtr bytes(PyUnicode_AsEncodedString(pyunicode, "ascii", "ignore"));
40 EXPECT_TRUE(isBytesEqualsCStr(bytes, "non-ascii"));
41}
42
43TEST_F(UnicodeExtensionApiTest, AsEncodedStringLatin1ReturnsLatin1) {
44 PyObjectPtr pyunicode(PyUnicode_FromString("latin-1 \xc3\xa8"));
45
46 PyObjectPtr bytes(PyUnicode_AsEncodedString(pyunicode, "latin-1", nullptr));
47 EXPECT_TRUE(isBytesEqualsCStr(bytes, "latin-1 \xe8"));
48}
49
50TEST_F(UnicodeExtensionApiTest, AsEncodedStringASCIIWithSubClassReturnsASCII) {
51 PyRun_SimpleString(R"(
52class SubStr(str): pass
53
54substr = SubStr("some string")
55)");
56 PyObjectPtr substr(mainModuleGet("substr"));
57 const char* expected = "some string";
58
59 PyObjectPtr bytes(PyUnicode_AsEncodedString(substr, "ascii", nullptr));
60 EXPECT_TRUE(isBytesEqualsCStr(bytes, expected));
61}
62
63TEST_F(UnicodeExtensionApiTest,
64 AsEncodedStringWithBytearrayReturnRaisesWarning) {
65 CaptureStdStreams streams;
66 PyRun_SimpleString(R"(
67import _codecs
68
69def encoder(s):
70 return bytearray(b"expected"), "two"
71
72def lookup_function(encoding):
73 if encoding == "encode-with-bytearray-return":
74 return encoder, 0, 0, 0
75
76_codecs.register(lookup_function)
77substr = "some test"
78)");
79 PyObjectPtr substr(mainModuleGet("substr"));
80 PyObjectPtr bytes(PyUnicode_AsEncodedString(
81 substr, "encode-with-bytearray-return", nullptr));
82 EXPECT_TRUE(isBytesEqualsCStr(bytes, "expected"));
83 EXPECT_EQ(PyErr_Occurred(), nullptr);
84 EXPECT_NE(streams.err().find(
85 "RuntimeWarning: encoder encode-with-bytearray-return "
86 "returned bytearray instead of bytes; use codecs.encode() to "
87 "encode to arbitrary types\n"),
88 std::string::npos);
89}
90
91TEST_F(UnicodeExtensionApiTest,
92 AsEncodedStringWithNonBytelikeReturnRaisesError) {
93 PyRun_SimpleString(R"(
94import _codecs
95
96def encoder(s):
97 return "not-byteslike", "two"
98
99def lookup_function(encoding):
100 if encoding == "encode-with-non-bytelike-return":
101 return encoder, 0, 0, 0
102
103_codecs.register(lookup_function)
104substr = "some test"
105)");
106 PyObjectPtr substr(mainModuleGet("substr"));
107 EXPECT_EQ(PyUnicode_AsEncodedString(substr, "encode-with-non-bytelike-return",
108 nullptr),
109 nullptr);
110 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
111}
112
113TEST_F(UnicodeExtensionApiTest, AsUTF8FromNonStringReturnsNull) {
114 // Pass a non string object
115 const char* cstring = PyUnicode_AsUTF8AndSize(Py_None, nullptr);
116 EXPECT_EQ(nullptr, cstring);
117}
118
119TEST_F(UnicodeExtensionApiTest, AsUTF8WithNullSizeReturnsCString) {
120 const char* str = "Some C String";
121 PyObjectPtr pyunicode(PyUnicode_FromString(str));
122
123 // Pass a nullptr size
124 const char* cstring = PyUnicode_AsUTF8AndSize(pyunicode, nullptr);
125 ASSERT_NE(nullptr, cstring);
126 EXPECT_STREQ(str, cstring);
127}
128
129TEST_F(UnicodeExtensionApiTest, AsUTF8WithSubClassReturnsCString) {
130 PyRun_SimpleString(R"(
131class SubStr(str): pass
132
133substr = SubStr("some string")
134)");
135 PyObjectPtr substr(mainModuleGet("substr"));
136 Py_ssize_t size = 0;
137 const char* expected = "some string";
138
139 const char* c_str = PyUnicode_AsUTF8AndSize(substr, &size);
140 ASSERT_NE(c_str, nullptr);
141 EXPECT_STREQ(c_str, expected);
142}
143
144TEST_F(UnicodeExtensionApiTest, AsUTF8WithReferencedSizeReturnsCString) {
145 const char* str = "Some C String";
146 PyObjectPtr pyunicode(PyUnicode_FromString(str));
147
148 // Pass a size reference
149 Py_ssize_t size = 0;
150 const char* cstring = PyUnicode_AsUTF8AndSize(pyunicode, &size);
151 ASSERT_NE(nullptr, cstring);
152 EXPECT_STREQ(str, cstring);
153 EXPECT_EQ(size, static_cast<Py_ssize_t>(std::strlen(str)));
154
155 // Repeated calls should return the same buffer and still set the size.
156 size = 0;
157 const char* cstring2 = PyUnicode_AsUTF8AndSize(pyunicode, &size);
158 ASSERT_NE(cstring2, nullptr);
159 EXPECT_EQ(cstring2, cstring);
160}
161
162TEST_F(UnicodeExtensionApiTest, AsUTF8ReturnsCString) {
163 const char* str = "Some other C String";
164 PyObjectPtr pyobj(PyUnicode_FromString(str));
165
166 const char* cstring = PyUnicode_AsUTF8(pyobj);
167 ASSERT_NE(cstring, nullptr);
168 EXPECT_STREQ(cstring, str);
169
170 // Make sure repeated calls on the same object return the same buffer.
171 const char* cstring2 = PyUnicode_AsUTF8(pyobj);
172 ASSERT_NE(cstring2, nullptr);
173 EXPECT_EQ(cstring2, cstring);
174}
175
176TEST_F(UnicodeExtensionApiTest, AsUTF8WithSurrogatesRaisesUnicodeEncodeError) {
177 PyObjectPtr str(PyUnicode_DecodeLocale("hello\x80world", "surrogateescape"));
178
179 EXPECT_EQ(PyUnicode_AsUTF8(str), nullptr);
180 PyObject *exc, *value, *tb;
181 PyErr_Fetch(&exc, &value, &tb);
182 ASSERT_NE(exc, nullptr);
183 ASSERT_TRUE(PyErr_GivenExceptionMatches(exc, PyExc_UnicodeEncodeError));
184 Py_ssize_t temp;
185 PyObjectPtr msg(PyUnicodeEncodeError_GetReason(value));
186 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(msg, "surrogates not allowed"));
187 PyUnicodeEncodeError_GetStart(value, &temp);
188 EXPECT_EQ(temp, 5);
189 PyUnicodeEncodeError_GetEnd(value, &temp);
190 EXPECT_EQ(temp, 6);
191 Py_DECREF(exc);
192 Py_DECREF(value);
193 Py_XDECREF(tb);
194}
195
196TEST_F(UnicodeExtensionApiTest, AsUTF8StringWithNonStringReturnsNull) {
197 PyObjectPtr bytes(_PyUnicode_AsUTF8String(Py_None, nullptr));
198 ASSERT_EQ(bytes, nullptr);
199 ASSERT_NE(PyErr_Occurred(), nullptr);
200}
201
202TEST_F(UnicodeExtensionApiTest, AsUTF8StringReturnsBytes) {
203 PyObjectPtr unicode(PyUnicode_FromString("foo"));
204 PyObjectPtr bytes(_PyUnicode_AsUTF8String(unicode, nullptr));
205 ASSERT_EQ(PyErr_Occurred(), nullptr);
206 ASSERT_TRUE(PyBytes_Check(bytes));
207 EXPECT_EQ(PyBytes_Size(bytes), 3);
208 EXPECT_STREQ(PyBytes_AsString(bytes), "foo");
209}
210
211TEST_F(UnicodeExtensionApiTest,
212 AsUTF8StringWithInvalidCodepointRaisesEncodeError) {
213 PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape"));
214 ASSERT_EQ(PyErr_Occurred(), nullptr);
215 ASSERT_TRUE(PyUnicode_CheckExact(unicode));
216 PyObjectPtr bytes(_PyUnicode_AsUTF8String(unicode, nullptr));
217 ASSERT_NE(PyErr_Occurred(), nullptr);
218 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError));
219 EXPECT_EQ(bytes, nullptr);
220}
221
222TEST_F(UnicodeExtensionApiTest, AsUTF8StringWithReplaceErrorsReturnsBytes) {
223 PyObjectPtr unicode(PyUnicode_DecodeASCII("foo\x80", 4, "surrogateescape"));
224 ASSERT_EQ(PyErr_Occurred(), nullptr);
225 ASSERT_TRUE(PyUnicode_CheckExact(unicode));
226 PyObjectPtr bytes(_PyUnicode_AsUTF8String(unicode, "replace"));
227 ASSERT_EQ(PyErr_Occurred(), nullptr);
228
229 ASSERT_TRUE(PyBytes_Check(bytes));
230 EXPECT_EQ(PyBytes_Size(bytes), 4);
231 EXPECT_STREQ(PyBytes_AsString(bytes), "foo?");
232}
233
234TEST_F(UnicodeExtensionApiTest, AsUCS4WithNonStringReturnsNull) {
235 // Pass a non string object.
236 Py_UCS4* ucs4_string = PyUnicode_AsUCS4(Py_None, nullptr, 0, 0);
237 EXPECT_EQ(nullptr, ucs4_string);
238}
239
240TEST_F(UnicodeExtensionApiTest, AsUTF8StringWithSubClassReturnsBytes) {
241 PyRun_SimpleString(R"(
242class SubStr(str): pass
243
244substr = SubStr("foo")
245)");
246 PyObjectPtr substr(mainModuleGet("substr"));
247 PyObjectPtr bytes(_PyUnicode_AsUTF8String(substr, nullptr));
248 ASSERT_EQ(PyErr_Occurred(), nullptr);
249 ASSERT_TRUE(PyBytes_Check(bytes));
250 EXPECT_EQ(PyBytes_Size(bytes), 3);
251 EXPECT_STREQ(PyBytes_AsString(bytes), "foo");
252}
253
254TEST_F(UnicodeExtensionApiTest, AsUCS4WithNullBufferReturnsNull) {
255 PyObjectPtr unicode(PyUnicode_FromString("foo"));
256 Py_UCS4* ucs4_string = PyUnicode_AsUCS4(unicode, nullptr, 0, 0);
257 EXPECT_EQ(nullptr, ucs4_string);
258}
259
260TEST_F(UnicodeExtensionApiTest,
261 AsUCS4WithShortBufferWithoutCopyNullReturnsNotNullTerminated) {
262 PyObjectPtr unicode(PyUnicode_FromString("abc"));
263 Py_UCS4 target[4];
264 target[0] = 1;
265 Py_UCS4* ucs4_string =
266 PyUnicode_AsUCS4(unicode, target, 2, 0 /* copy_null */);
267 EXPECT_EQ(nullptr, ucs4_string);
268 EXPECT_EQ(Py_UCS4{1}, target[0]);
269}
270
271TEST_F(UnicodeExtensionApiTest,
272 AsUCS4WithShortBufferWithCopyNullReturnsNullTerminated) {
273 PyObjectPtr unicode(PyUnicode_FromString("abc"));
274 Py_UCS4 target[4];
275 target[0] = 1;
276 Py_UCS4* ucs4_string =
277 PyUnicode_AsUCS4(unicode, target, 2, 1 /* copy_null */);
278 EXPECT_EQ(nullptr, ucs4_string);
279 EXPECT_EQ(Py_UCS4{0}, target[0]);
280}
281
282TEST_F(UnicodeExtensionApiTest, AsUCS4WithoutCopyNullReturnsNotNullTerminated) {
283 Py_UCS4 buffer[] = {0x1f192, 'h', 0xe4, 'l', 0x2cc0};
284 PyObjectPtr unicode(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer,
285 Py_ARRAY_LENGTH(buffer)));
286 Py_UCS4 target[6];
287 target[5] = 1;
288 Py_UCS4* ucs4_string =
289 PyUnicode_AsUCS4(unicode, target, 5, 0 /* copy_null */);
290 EXPECT_EQ(target, ucs4_string);
291 EXPECT_EQ(Py_UCS4{0x1F192}, ucs4_string[0]);
292 EXPECT_EQ(Py_UCS4{'h'}, ucs4_string[1]);
293 EXPECT_EQ(Py_UCS4{0xE4}, ucs4_string[2]);
294 EXPECT_EQ(Py_UCS4{'l'}, ucs4_string[3]);
295 EXPECT_EQ(Py_UCS4{0x2CC0}, ucs4_string[4]);
296 EXPECT_EQ(Py_UCS4{1}, ucs4_string[5]);
297}
298
299TEST_F(UnicodeExtensionApiTest, AsUCS4WithCopyNullReturnsNullTerminated) {
300 Py_UCS4 buffer[] = {0x1f192, 'h', 0xe4, 'l', 0x2cc0};
301 PyObjectPtr unicode(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer,
302 Py_ARRAY_LENGTH(buffer)));
303 Py_UCS4 target[6];
304 target[5] = 1;
305 Py_UCS4* ucs4_string =
306 PyUnicode_AsUCS4(unicode, target, 6, 1 /* copy_null */);
307 EXPECT_EQ(target, ucs4_string);
308 EXPECT_EQ(Py_UCS4{0x1F192}, ucs4_string[0]);
309 EXPECT_EQ(Py_UCS4{'h'}, ucs4_string[1]);
310 EXPECT_EQ(Py_UCS4{0xE4}, ucs4_string[2]);
311 EXPECT_EQ(Py_UCS4{'l'}, ucs4_string[3]);
312 EXPECT_EQ(Py_UCS4{0x2CC0}, ucs4_string[4]);
313 EXPECT_EQ(Py_UCS4{0}, ucs4_string[5]);
314}
315
316TEST_F(UnicodeExtensionApiTest,
317 AsUCS4WithSubClassAndCopyNullReturnsNullTerminatedString) {
318 PyRun_SimpleString(R"(
319class SubStr(str): pass
320
321substr = SubStr("foo")
322)");
323 PyObjectPtr unicode(mainModuleGet("substr"));
324 Py_UCS4 target[4];
325 Py_UCS4* ucs4_string =
326 PyUnicode_AsUCS4(unicode, target, 4, 1 /* copy_null */);
327 EXPECT_EQ(Py_UCS4{'f'}, ucs4_string[0]);
328 EXPECT_EQ(Py_UCS4{'o'}, ucs4_string[1]);
329 EXPECT_EQ(Py_UCS4{'o'}, ucs4_string[2]);
330 EXPECT_EQ(Py_UCS4{0}, ucs4_string[3]);
331}
332
333// Delegating testing to AsUCS4.
334TEST_F(UnicodeExtensionApiTest,
335 AsUCS4WithNonAsciiReturnsCodePointsNullTerminated) {
336 PyObjectPtr unicode(PyUnicode_FromString("ab\u00e4p"));
337 Py_UCS4* ucs4_string = PyUnicode_AsUCS4Copy(unicode);
338 EXPECT_EQ(Py_UCS4{'a'}, ucs4_string[0]);
339 EXPECT_EQ(Py_UCS4{'b'}, ucs4_string[1]);
340 EXPECT_EQ(Py_UCS4{0xE4}, ucs4_string[2]);
341 EXPECT_EQ(Py_UCS4{'p'}, ucs4_string[3]);
342 EXPECT_EQ(Py_UCS4{0}, ucs4_string[4]);
343 PyMem_Free(ucs4_string);
344}
345
346TEST_F(UnicodeExtensionApiTest, AsWideCharWithNullptrRaisesSystemError) {
347 wchar_t wide_string[1];
348 EXPECT_EQ(
349 PyUnicode_AsWideChar(nullptr, wide_string, Py_ARRAY_LENGTH(wide_string)),
350 -1);
351 ASSERT_NE(PyErr_Occurred(), nullptr);
352 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError));
353}
354
355TEST_F(UnicodeExtensionApiTest, AsWideCharWithNonStringRaisesTypeError) {
356 PyObjectPtr not_string(PyTuple_New(0));
357 wchar_t wide_string[1];
358 EXPECT_EQ(PyUnicode_AsWideChar(not_string, wide_string,
359 Py_ARRAY_LENGTH(wide_string)),
360 -1);
361 ASSERT_NE(PyErr_Occurred(), nullptr);
362 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
363}
364
365TEST_F(UnicodeExtensionApiTest,
366 AsWideCharWithNonASCIICodePointReturnsNullTerminatedWideCharString) {
367 PyObjectPtr unicode(PyUnicode_FromString("a\xc3\xa5z"));
368 wchar_t wide_string[4];
369 EXPECT_EQ(Py_ssize_t{3}, PyUnicode_AsWideChar(unicode, wide_string,
370 Py_ARRAY_LENGTH(wide_string)));
371 ASSERT_EQ(PyErr_Occurred(), nullptr);
372 EXPECT_EQ('a', wide_string[0]);
373 EXPECT_EQ(0xe5, wide_string[1]);
374 EXPECT_EQ('z', wide_string[2]);
375 EXPECT_EQ(0, wide_string[3]);
376}
377
378TEST_F(UnicodeExtensionApiTest, AsWideCharCopiesUpToSizeElements) {
379 PyObjectPtr unicode(PyUnicode_FromString("abcdef"));
380 wchar_t wide_string[5] = {'x', 'x', 'x', 'x', 'x'};
381 EXPECT_EQ(Py_ssize_t{3}, PyUnicode_AsWideChar(unicode, wide_string, 3));
382 ASSERT_EQ(PyErr_Occurred(), nullptr);
383 EXPECT_EQ('a', wide_string[0]);
384 EXPECT_EQ('b', wide_string[1]);
385 EXPECT_EQ('c', wide_string[2]);
386 EXPECT_EQ('x', wide_string[3]);
387 EXPECT_EQ('x', wide_string[4]);
388}
389
390TEST_F(UnicodeExtensionApiTest, AsWideCharWithEmbeddedNullWritesNullChar) {
391 PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4));
392 wchar_t wide_string[5];
393 EXPECT_EQ(4, PyUnicode_AsWideChar(unicode, wide_string,
394 Py_ARRAY_LENGTH(wide_string)));
395 EXPECT_EQ(PyErr_Occurred(), nullptr);
396 EXPECT_EQ('a', wide_string[0]);
397 EXPECT_EQ('b', wide_string[1]);
398 EXPECT_EQ('\0', wide_string[2]);
399 EXPECT_EQ('c', wide_string[3]);
400 EXPECT_EQ('\0', wide_string[4]);
401}
402
403TEST_F(UnicodeExtensionApiTest,
404 AsWideCharWithSizeEqualsBufferSizeDoesNotWriteNul) {
405 PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4));
406 wchar_t wide_string[4];
407 EXPECT_EQ(4, PyUnicode_AsWideChar(unicode, wide_string, 4));
408 EXPECT_EQ(PyErr_Occurred(), nullptr);
409 EXPECT_EQ('a', wide_string[0]);
410 EXPECT_EQ('b', wide_string[1]);
411 EXPECT_EQ('\0', wide_string[2]);
412 EXPECT_EQ('c', wide_string[3]);
413}
414
415TEST_F(UnicodeExtensionApiTest,
416 AsWideCharWithBufferSizeLessThanStringSizeWritesUpToBufferSize) {
417 PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4));
418 wchar_t wide_string[2];
419 EXPECT_EQ(2, PyUnicode_AsWideChar(unicode, wide_string,
420 Py_ARRAY_LENGTH(wide_string)));
421 EXPECT_EQ(PyErr_Occurred(), nullptr);
422 EXPECT_EQ('a', wide_string[0]);
423 EXPECT_EQ('b', wide_string[1]);
424}
425
426TEST_F(UnicodeExtensionApiTest, AsWideCharStringWithNullptrRaisesSystemError) {
427 EXPECT_EQ(PyUnicode_AsWideCharString(nullptr, nullptr), nullptr);
428 ASSERT_NE(PyErr_Occurred(), nullptr);
429 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError));
430}
431
432TEST_F(UnicodeExtensionApiTest, AsWideCharStringWithNonStringRaisesTypeError) {
433 PyObjectPtr not_string(PyTuple_New(0));
434 EXPECT_EQ(PyUnicode_AsWideCharString(not_string, nullptr), nullptr);
435 ASSERT_NE(PyErr_Occurred(), nullptr);
436 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
437}
438
439TEST_F(
440 UnicodeExtensionApiTest,
441 AsWideCharStringWithNonASCIICodePointReturnsNullTerminatedWideCharString) {
442 PyObjectPtr unicode(PyUnicode_FromString("a\xc3\xa5z"));
443 wchar_t* wide_string = PyUnicode_AsWideCharString(unicode, nullptr);
444 ASSERT_EQ(PyErr_Occurred(), nullptr);
445 EXPECT_EQ('a', wide_string[0]);
446 EXPECT_EQ(0xe5, wide_string[1]);
447 EXPECT_EQ('z', wide_string[2]);
448 EXPECT_EQ(0, wide_string[3]);
449 PyMem_Free(wide_string);
450}
451
452TEST_F(UnicodeExtensionApiTest, AsWideCharStringWithNonNullSizeSetsSize) {
453 PyObjectPtr unicode(PyUnicode_FromString("a\xc3\xa5z"));
454 Py_ssize_t size = 0xdeadbeef;
455 wchar_t* wide_string = PyUnicode_AsWideCharString(unicode, &size);
456 ASSERT_EQ(PyErr_Occurred(), nullptr);
457 EXPECT_EQ(size, 3);
458 EXPECT_EQ('a', wide_string[0]);
459 EXPECT_EQ(0xe5, wide_string[1]);
460 EXPECT_EQ('z', wide_string[2]);
461 EXPECT_EQ(0, wide_string[3]);
462 PyMem_Free(wide_string);
463}
464
465TEST_F(UnicodeExtensionApiTest,
466 AsWideCharStringWithEmbeddedNullRaisesValueError) {
467 PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4));
468 EXPECT_EQ(PyUnicode_AsWideCharString(unicode, nullptr), nullptr);
469 ASSERT_NE(PyErr_Occurred(), nullptr);
470 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
471}
472
473TEST_F(UnicodeExtensionApiTest, CheckWithStrReturnsTrue) {
474 PyObjectPtr str(PyUnicode_FromString("ab\u00e4p"));
475 EXPECT_TRUE(PyUnicode_Check(str));
476 EXPECT_TRUE(PyUnicode_CheckExact(str));
477}
478
479TEST_F(UnicodeExtensionApiTest, CheckWithSubClassIsNotExact) {
480 PyRun_SimpleString(R"(
481class SubStr(str): pass
482
483substr = SubStr('ok')
484)");
485 PyObjectPtr substr(mainModuleGet("substr"));
486 ASSERT_EQ(PyErr_Occurred(), nullptr);
487 EXPECT_TRUE(PyUnicode_Check(substr));
488 EXPECT_FALSE(PyUnicode_CheckExact(substr));
489}
490
491TEST_F(UnicodeExtensionApiTest, CheckWithUnrelatedTypeReturnsFalse) {
492 PyObjectPtr pylong(PyLong_FromLong(10));
493 EXPECT_FALSE(PyUnicode_Check(pylong));
494 EXPECT_FALSE(PyUnicode_CheckExact(pylong));
495}
496
497TEST_F(UnicodeExtensionApiTest, DATAReturnsCStringContainingStrContents) {
498 const char* cstr = "hello";
499 PyObjectPtr str(PyUnicode_FromString(cstr));
500 void* data = PyUnicode_DATA(str.get());
501 EXPECT_STREQ(reinterpret_cast<char*>(data), cstr);
502}
503
504TEST_F(UnicodeExtensionApiTest, DATAReturnsSamePointer) {
505 PyObjectPtr str(PyUnicode_FromString("hello"));
506 void* p1 = PyUnicode_DATA(str.get());
507 void* p2 = PyUnicode_DATA(str.get());
508 EXPECT_EQ(p1, p2);
509}
510
511TEST_F(UnicodeExtensionApiTest, FormatWithNullFormatRaisesBadInternalCall) {
512 PyObjectPtr str(PyUnicode_FromString("foo"));
513 EXPECT_EQ(nullptr, PyUnicode_Format(nullptr, str));
514 ASSERT_NE(nullptr, PyErr_Occurred());
515 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError));
516}
517
518TEST_F(UnicodeExtensionApiTest, FormatWithNullArgsRaisesBadInternalCall) {
519 PyObjectPtr str(PyUnicode_FromString("foo"));
520 EXPECT_EQ(nullptr, PyUnicode_Format(str, nullptr));
521 ASSERT_NE(nullptr, PyErr_Occurred());
522 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError));
523}
524
525TEST_F(UnicodeExtensionApiTest, FormatWithNonStrFormatRaisesTypeError) {
526 PyObjectPtr format(PyLong_FromLong(10));
527 PyObjectPtr str(PyUnicode_FromString("foo"));
528 EXPECT_EQ(nullptr, PyUnicode_Format(format, str));
529 ASSERT_NE(nullptr, PyErr_Occurred());
530 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
531}
532
533TEST_F(UnicodeExtensionApiTest,
534 FormatWithMismatchedFormatAndArgsRaisesTypeError) {
535 PyObjectPtr str(PyUnicode_FromString("foo%s%s"));
536 PyObjectPtr args(PyUnicode_FromString("bar"));
537 EXPECT_EQ(nullptr, PyUnicode_Format(str, args));
538 ASSERT_NE(nullptr, PyErr_Occurred());
539 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
540}
541
542TEST_F(UnicodeExtensionApiTest, FormatWithStrArgsReturnsStr) {
543 PyObjectPtr str(PyUnicode_FromString("foo%s"));
544 PyObjectPtr args(PyUnicode_FromString("bar"));
545 PyObjectPtr result(PyUnicode_Format(str, args));
546 EXPECT_NE(nullptr, result);
547 EXPECT_EQ(nullptr, PyErr_Occurred());
548 EXPECT_TRUE(isUnicodeEqualsCStr(result, "foobar"));
549}
550
551TEST_F(UnicodeExtensionApiTest, FormatWithTupleArgsReturnsStr) {
552 PyObjectPtr str(PyUnicode_FromString("foo%s%s"));
553 PyObjectPtr args(PyTuple_Pack(2, PyUnicode_FromString("bar"),
554 PyUnicode_FromString("baz")));
555 PyObjectPtr result(PyUnicode_Format(str, args));
556 EXPECT_NE(nullptr, result);
557 EXPECT_EQ(nullptr, PyErr_Occurred());
558 EXPECT_TRUE(isUnicodeEqualsCStr(result, "foobarbaz"));
559}
560
561TEST_F(UnicodeExtensionApiTest, FSDecoderWithStrSetsString) {
562 PyObjectPtr str(PyUnicode_FromString("foo"));
563 PyObject* result;
564 EXPECT_EQ(PyUnicode_FSDecoder(str, &result), Py_CLEANUP_SUPPORTED);
565
566 EXPECT_TRUE(isUnicodeEqualsCStr(result, "foo"));
567
568 EXPECT_EQ(PyUnicode_FSDecoder(nullptr, &result), 1);
569 EXPECT_EQ(result, nullptr);
570}
571
572TEST_F(UnicodeExtensionApiTest, FSDecoderWithBytesSetsString) {
573 const char bytes[] = "bar";
574 PyObjectPtr pybytes(PyBytes_FromStringAndSize(bytes, sizeof(bytes) - 1));
575 PyObject* result;
576 EXPECT_EQ(PyUnicode_FSDecoder(pybytes, &result), Py_CLEANUP_SUPPORTED);
577
578 EXPECT_TRUE(isUnicodeEqualsCStr(result, bytes));
579
580 EXPECT_EQ(PyUnicode_FSDecoder(nullptr, &result), 1);
581 EXPECT_EQ(result, nullptr);
582}
583
584TEST_F(UnicodeExtensionApiTest, FSDecoderRaisesValueError) {
585 const char bytes[] = "foo\0bar";
586 PyObjectPtr pybytes(PyBytes_FromStringAndSize(bytes, sizeof(bytes) - 1));
587 PyObject* result;
588 EXPECT_EQ(PyUnicode_FSDecoder(pybytes, &result), 0);
589 EXPECT_NE(PyErr_Occurred(), nullptr);
590 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
591}
592
593TEST_F(UnicodeExtensionApiTest, FSDecoderRaisesTypeError) {
594 PyObjectPtr pyint(PyLong_FromLong(42));
595 PyObject* result;
596 EXPECT_EQ(PyUnicode_FSDecoder(pyint, &result), 0);
597 EXPECT_NE(PyErr_Occurred(), nullptr);
598 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
599}
600
601TEST_F(UnicodeExtensionApiTest, FindWithNonStrSelfRaisesTypeError) {
602 PyObject* self = Py_None;
603 PyObjectPtr sub(PyUnicode_FromString("ll"));
604 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), -2);
605 ASSERT_NE(PyErr_Occurred(), nullptr);
606 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
607}
608
609TEST_F(UnicodeExtensionApiTest, FindWithNonStrSubRaisesTypeError) {
610 PyObjectPtr self(PyUnicode_FromString("hello"));
611 PyObject* sub = Py_None;
612 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), -2);
613 ASSERT_NE(PyErr_Occurred(), nullptr);
614 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
615}
616
617TEST_F(UnicodeExtensionApiTest, FindForwardReturnsLeftmostStartIndex) {
618 PyObjectPtr self(PyUnicode_FromString("hello"));
619 PyObjectPtr sub(PyUnicode_FromString("ll"));
620 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), 2);
621 EXPECT_EQ(PyErr_Occurred(), nullptr);
622}
623
624TEST_F(UnicodeExtensionApiTest,
625 FindForwardWithSubClassReturnsLeftmostStartIndex) {
626 PyRun_SimpleString(R"(
627class SubStr(str): pass
628
629substr = SubStr('hello')
630)");
631 PyObjectPtr self(mainModuleGet("substr"));
632 PyObjectPtr sub(PyUnicode_FromString("ll"));
633 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), 2);
634 EXPECT_EQ(PyErr_Occurred(), nullptr);
635}
636
637TEST_F(UnicodeExtensionApiTest, FindReturnsNegativeOneWithNonexistentSubstr) {
638 PyObjectPtr self(PyUnicode_FromString("hello"));
639 PyObjectPtr sub(PyUnicode_FromString("xx"));
640 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), -1);
641 EXPECT_EQ(PyErr_Occurred(), nullptr);
642}
643
644TEST_F(UnicodeExtensionApiTest,
645 FindReverseReturnsNegativeOneWithNonexistentSubstr) {
646 PyObjectPtr self(PyUnicode_FromString("hello"));
647 PyObjectPtr sub(PyUnicode_FromString("xx"));
648 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, -1), -1);
649 EXPECT_EQ(PyErr_Occurred(), nullptr);
650}
651
652TEST_F(UnicodeExtensionApiTest, FindReverseReturnsRightmostStartIndex) {
653 PyObjectPtr self(PyUnicode_FromString("helloll"));
654 PyObjectPtr sub(PyUnicode_FromString("ll"));
655 EXPECT_EQ(PyUnicode_Find(self, sub, 0, 7, -1), 5);
656 EXPECT_EQ(PyErr_Occurred(), nullptr);
657}
658
659TEST_F(UnicodeExtensionApiTest, FindCharWithNegativeStartSearchesFromEnd) {
660 PyObjectPtr self(PyUnicode_FromString("hello"));
661 EXPECT_EQ(4, PyUnicode_FindChar(self, Py_UCS4{'o'}, -2, 5, 1));
662 EXPECT_EQ(PyErr_Occurred(), nullptr);
663}
664
665TEST_F(UnicodeExtensionApiTest, FindCharWithNegativeEndSearchesFromEnd) {
666 PyObjectPtr self(PyUnicode_FromString("hello"));
667 EXPECT_EQ(1, PyUnicode_FindChar(self, Py_UCS4{'e'}, 0, -3, 1));
668 EXPECT_EQ(PyErr_Occurred(), nullptr);
669}
670
671TEST_F(UnicodeExtensionApiTest,
672 FindCharWithExistentCharEndGreaterThanLengthClipsEnd) {
673 PyObjectPtr self(PyUnicode_FromString("hello"));
674 Py_UCS4 ch = 'h';
675 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 100, 1), 0);
676 EXPECT_EQ(PyErr_Occurred(), nullptr);
677}
678
679TEST_F(UnicodeExtensionApiTest,
680 FindCharWithNonExistentCharEndGreaterThanLengthClipsEnd) {
681 PyObjectPtr self(PyUnicode_FromString("hello"));
682 Py_UCS4 ch = 'q';
683 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 100, 1), -1);
684 EXPECT_EQ(PyErr_Occurred(), nullptr);
685}
686
687TEST_F(UnicodeExtensionApiTest, FindCharFindsChar) {
688 PyObjectPtr self(PyUnicode_FromString("hello"));
689 Py_UCS4 ch = 'h';
690 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), 0);
691 EXPECT_EQ(PyErr_Occurred(), nullptr);
692}
693
694TEST_F(UnicodeExtensionApiTest, FindCharWithStrSubClassReturnsLeftmostIndex) {
695 PyRun_SimpleString(R"(
696class SubStr(str): pass
697
698substr = SubStr('hello')
699)");
700 PyObjectPtr self(mainModuleGet("substr"));
701 Py_UCS4 ch = 'h';
702 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), 0);
703 EXPECT_EQ(PyErr_Occurred(), nullptr);
704}
705
706TEST_F(UnicodeExtensionApiTest, FindCharFindsCharInMiddleOfString) {
707 PyObjectPtr self(PyUnicode_FromString("hello"));
708 Py_UCS4 ch = 'l';
709 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), 2);
710 EXPECT_EQ(PyErr_Occurred(), nullptr);
711}
712
713TEST_F(UnicodeExtensionApiTest, FindCharReverseFindsCharInMiddleOfString) {
714 PyObjectPtr self(PyUnicode_FromString("hello"));
715 Py_UCS4 ch = 'l';
716 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, -1), 3);
717 EXPECT_EQ(PyErr_Occurred(), nullptr);
718}
719
720TEST_F(UnicodeExtensionApiTest, FindCharWithNonExistentCharDoesNotFindChar) {
721 PyObjectPtr self(PyUnicode_FromString("hello"));
722 Py_UCS4 ch = 'q';
723 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), -1);
724 EXPECT_EQ(PyErr_Occurred(), nullptr);
725}
726
727TEST_F(UnicodeExtensionApiTest, FindCharWithCharBeforeWindowDoesNotFindChar) {
728 PyObjectPtr self(PyUnicode_FromString("hello"));
729 Py_UCS4 ch = 'h';
730 EXPECT_EQ(PyUnicode_FindChar(self, ch, 2, 5, 1), -1);
731 EXPECT_EQ(PyErr_Occurred(), nullptr);
732}
733
734TEST_F(UnicodeExtensionApiTest, FindCharWithCharAfterWindowDoesNotFindChar) {
735 PyObjectPtr self(PyUnicode_FromString("hello"));
736 Py_UCS4 ch = 'o';
737 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 3, 1), -1);
738 EXPECT_EQ(PyErr_Occurred(), nullptr);
739}
740
741TEST_F(UnicodeExtensionApiTest, FindCharWithUnicodeCharFindsChar) {
742 PyObjectPtr self(PyUnicode_FromString("h\u00e9llo"));
743 Py_UCS4 ch = 0xE9;
744 EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 3, 1), 1);
745 EXPECT_EQ(PyErr_Occurred(), nullptr);
746}
747
748TEST_F(UnicodeExtensionApiTest, FromStringAndSizeCreatesEmptyString) {
749 PyObjectPtr pyuni(PyUnicode_FromStringAndSize(nullptr, 0));
750 EXPECT_TRUE(isUnicodeEqualsCStr(pyuni, ""));
751 EXPECT_EQ(PyErr_Occurred(), nullptr);
752}
753
754TEST_F(UnicodeExtensionApiTest, FromStringAndSizeCreatesSizedString) {
755 const char* str = "Some string";
756 PyObjectPtr pyuni(PyUnicode_FromStringAndSize(str, 11));
757 EXPECT_TRUE(isUnicodeEqualsCStr(pyuni, str));
758 EXPECT_EQ(PyErr_Occurred(), nullptr);
759}
760
761TEST_F(UnicodeExtensionApiTest, FromStringAndSizeCreatesSmallerString) {
762 PyObjectPtr str(PyUnicode_FromStringAndSize("1234567890", 5));
763 EXPECT_TRUE(isUnicodeEqualsCStr(str, "12345"));
764 EXPECT_EQ(PyErr_Occurred(), nullptr);
765}
766
767TEST_F(UnicodeExtensionApiTest, FromStringAndSizeFailsNegSize) {
768 PyObjectPtr pyuni(PyUnicode_FromStringAndSize("a", -1));
769 ASSERT_EQ(pyuni, nullptr);
770
771 ASSERT_NE(PyErr_Occurred(), nullptr);
772 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError));
773}
774
775TEST_F(UnicodeExtensionApiTest, FromStringAndSizeIncrementsRefCount) {
776 PyObject* pyuni = PyUnicode_FromStringAndSize("Some string", 11);
777 ASSERT_NE(pyuni, nullptr);
778 EXPECT_GE(Py_REFCNT(pyuni), 1);
779 Py_DECREF(pyuni);
780 EXPECT_EQ(PyErr_Occurred(), nullptr);
781}
782
783TEST_F(UnicodeExtensionApiTest, READWithOneByteKindReturnsCharAtIndex) {
784 const char* str = "foo";
785 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, str, 0), Py_UCS4{'f'});
786 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, str, 1), Py_UCS4{'o'});
787 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, str, 2), Py_UCS4{'o'});
788}
789
790TEST_F(UnicodeExtensionApiTest, READWithTwoByteKindReturnsCharAtIndex) {
791 const char* str = "quux";
792 // This assumes little-endian architecture. No static assert because we can't
793 // include that enum and macro in these tests.
794 EXPECT_EQ(PyUnicode_READ(PyUnicode_2BYTE_KIND, str, 0),
795 Py_UCS4{0x7571}); // qu
796 EXPECT_EQ(PyUnicode_READ(PyUnicode_2BYTE_KIND, str, 1),
797 Py_UCS4{0x7875}); // ux
798}
799
800TEST_F(UnicodeExtensionApiTest, READWithFourByteKindReturnsCharAtIndex) {
801 const char* str = "quux";
802 // This assumes little-endian architecture. No static assert because we can't
803 // include that enum and macro in these tests.
804 EXPECT_EQ(PyUnicode_READ(PyUnicode_4BYTE_KIND, str, 0), Py_UCS4{0x78757571});
805}
806
807TEST_F(UnicodeExtensionApiTest, READCHARReturnsCharAtIndex) {
808 PyObjectPtr str(PyUnicode_FromString("foo"));
809 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 0), Py_UCS4{'f'});
810 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 1), Py_UCS4{'o'});
811 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 2), Py_UCS4{'o'});
812 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 3), Py_UCS4{'\0'});
813}
814
815TEST_F(UnicodeExtensionApiTest, READCHARReturnsUnicodeCodePointAtIndex) {
816 PyObjectPtr str(PyUnicode_FromString("\xF0\x90\x8D\x88"));
817 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), 1);
818 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 0), Py_UCS4{0x10348});
819 EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 1), Py_UCS4{'\0'});
820
821 PyObjectPtr dessert(PyUnicode_FromString("cr\xc3\xa9me"));
822 EXPECT_EQ(PyUnicode_GET_LENGTH(dessert.get()), 5);
823 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 0), Py_UCS4{'c'});
824 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 1), Py_UCS4{'r'});
825 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 2), Py_UCS4{0xE9});
826 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 3), Py_UCS4{'m'});
827 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 4), Py_UCS4{'e'});
828 EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 5), Py_UCS4{'\0'});
829}
830
831TEST_F(UnicodeExtensionApiTest, READReadsCharsFromDATA) {
832 PyObjectPtr str(PyUnicode_FromString("foo"));
833 void* data = PyUnicode_DATA(str.get());
834 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, data, 0), Py_UCS4{'f'});
835 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, data, 1), Py_UCS4{'o'});
836 EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, data, 2), Py_UCS4{'o'});
837}
838
839TEST_F(UnicodeExtensionApiTest, ReadCharReturnsCharAtIndex) {
840 PyObjectPtr str(PyUnicode_FromString("foo"));
841 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 0), Py_UCS4{'f'});
842 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 1), Py_UCS4{'o'});
843 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 2), Py_UCS4{'o'});
844}
845
846TEST_F(UnicodeExtensionApiTest, ReadCharReturnsUnicodeCodePointAtIndex) {
847 PyObjectPtr str(PyUnicode_FromString("\xF0\x90\x8D\x88"));
848 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), 1);
849 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 0), Py_UCS4{0x10348});
850 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 1), Py_UCS4{0xFFFFFFFF});
851 ASSERT_NE(PyErr_Occurred(), nullptr);
852 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError));
853 PyErr_Clear();
854
855 PyObjectPtr dessert(PyUnicode_FromString("cr\xc3\xa9me"));
856 EXPECT_EQ(PyUnicode_GET_LENGTH(dessert.get()), 5);
857 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 0), Py_UCS4{'c'});
858 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 1), Py_UCS4{'r'});
859 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 2), Py_UCS4{0xE9});
860 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 3), Py_UCS4{'m'});
861 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 4), Py_UCS4{'e'});
862 EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 5), Py_UCS4{0xFFFFFFFF});
863 ASSERT_NE(PyErr_Occurred(), nullptr);
864 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError));
865 PyErr_Clear();
866}
867
868TEST_F(UnicodeExtensionApiTest, ReadCharWithNonStrRaisesTypeError) {
869 PyObjectPtr list(PyList_New(3));
870 EXPECT_EQ(PyUnicode_ReadChar(list.get(), 0), Py_UCS4{0xFFFFFFFF});
871 ASSERT_NE(PyErr_Occurred(), nullptr);
872 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
873}
874
875TEST_F(UnicodeExtensionApiTest, ReadCharWithOutOfBoundIndexRaisesIndexError) {
876 PyObjectPtr str(PyUnicode_FromString("foo"));
877 EXPECT_EQ(PyUnicode_ReadChar(str.get(), 3), Py_UCS4{0xFFFFFFFF});
878 ASSERT_NE(PyErr_Occurred(), nullptr);
879 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError));
880}
881
882TEST_F(UnicodeExtensionApiTest, ReadyReturnsZero) {
883 PyObject* pyunicode = PyUnicode_FromString("some string");
884 int is_ready = PyUnicode_READY(pyunicode);
885 EXPECT_EQ(0, is_ready);
886 Py_DECREF(pyunicode);
887}
888
889TEST_F(UnicodeExtensionApiTest, ReplaceWithStrOfNonStringTypeReturnsNull) {
890 PyObjectPtr non_str(PyBool_FromLong(1));
891 PyObjectPtr substr(PyUnicode_FromString("some string"));
892 PyObjectPtr replstr(PyUnicode_FromString("some string"));
893 EXPECT_EQ(PyUnicode_Replace(non_str, substr, replstr, -1), nullptr);
894 EXPECT_NE(PyErr_Occurred(), nullptr);
895}
896
897TEST_F(UnicodeExtensionApiTest, ReplaceWithSubstrOfNonStringTypeReturnsNull) {
898 PyObjectPtr non_str(PyBool_FromLong(1));
899 PyObjectPtr str(PyUnicode_FromString("some string"));
900 PyObjectPtr replstr(PyUnicode_FromString("some string"));
901 EXPECT_EQ(PyUnicode_Replace(str, non_str, replstr, -1), nullptr);
902 EXPECT_NE(PyErr_Occurred(), nullptr);
903}
904
905TEST_F(UnicodeExtensionApiTest, ReplaceWithReplstrOfNonStringTypeReturnsNull) {
906 PyObjectPtr non_str(PyBool_FromLong(1));
907 PyObjectPtr str(PyUnicode_FromString("some string"));
908 PyObjectPtr substr(PyUnicode_FromString("some string"));
909 EXPECT_EQ(PyUnicode_Replace(str, substr, non_str, -1), nullptr);
910 EXPECT_NE(PyErr_Occurred(), nullptr);
911}
912
913TEST_F(UnicodeExtensionApiTest,
914 ReplaceWithStrSubclassReturnStrWithSameContent) {
915 PyRun_SimpleString(R"(
916class SubStr(str): pass
917
918subclass_instance = SubStr("hello world!")
919)");
920 PyObjectPtr subclass_instance(mainModuleGet("subclass_instance"));
921 PyObjectPtr substr(PyUnicode_FromString("some string"));
922 PyObjectPtr replstr(PyUnicode_FromString("some string"));
923 PyObjectPtr result(PyUnicode_Replace(subclass_instance, substr, replstr, -1));
924 ASSERT_EQ(PyErr_Occurred(), nullptr);
925 EXPECT_TRUE(PyUnicode_CheckExact(result));
926 EXPECT_TRUE(isUnicodeEqualsCStr(result, "hello world!"));
927}
928
929TEST_F(UnicodeExtensionApiTest,
930 ReplaceWithNegativeMaxcountReturnsResultReplacingAllSubstr) {
931 PyObjectPtr str(PyUnicode_FromString("22122122122122122"));
932 PyObjectPtr substr(PyUnicode_FromString("22"));
933 PyObjectPtr replstr(PyUnicode_FromString("*"));
934 PyObjectPtr expected(PyUnicode_FromString("*1*1*1*1*1*"));
935 PyObjectPtr actual(PyUnicode_Replace(str, substr, replstr, -1));
936 EXPECT_EQ(_PyUnicode_EQ(actual, expected), 1);
937 EXPECT_EQ(PyErr_Occurred(), nullptr);
938}
939
940TEST_F(UnicodeExtensionApiTest,
941 ReplaceWithSubClassAndNegativeMaxcountReturnsResultReplacingAllSubstr) {
942 PyRun_SimpleString(R"(
943class SubStr(str): pass
944
945str_val = SubStr("22122122122122122")
946substr = SubStr("22")
947replstr = SubStr("*")
948)");
949 PyObjectPtr str(mainModuleGet("str_val"));
950 PyObjectPtr substr(mainModuleGet("substr"));
951 PyObjectPtr replstr(mainModuleGet("replstr"));
952 PyObjectPtr expected(PyUnicode_FromString("*1*1*1*1*1*"));
953 PyObjectPtr actual(PyUnicode_Replace(str, substr, replstr, -1));
954 EXPECT_EQ(_PyUnicode_EQ(actual, expected), 1);
955 EXPECT_EQ(PyErr_Occurred(), nullptr);
956}
957
958TEST_F(UnicodeExtensionApiTest,
959 ReplaceWithLimitedMaxcountReturnsResultReplacingUpToMaxcount) {
960 PyObjectPtr str(PyUnicode_FromString("22122122122122122"));
961 PyObjectPtr substr(PyUnicode_FromString("22"));
962 PyObjectPtr replstr(PyUnicode_FromString("*"));
963 PyObjectPtr expected(PyUnicode_FromString("*1*1*122122122"));
964 PyObjectPtr actual(PyUnicode_Replace(str, substr, replstr, 3));
965 EXPECT_EQ(_PyUnicode_EQ(actual, expected), 1);
966 EXPECT_EQ(PyErr_Occurred(), nullptr);
967}
968
969TEST_F(UnicodeExtensionApiTest, Compare) {
970 PyObject* s1 = PyUnicode_FromString("some string");
971 PyObject* s2 = PyUnicode_FromString("some longer string");
972 PyObject* s22 = PyUnicode_FromString("some longer string");
973
974 int result = PyUnicode_Compare(s1, s2);
975 EXPECT_EQ(result, 1);
976 EXPECT_EQ(PyErr_Occurred(), nullptr);
977
978 result = PyUnicode_Compare(s2, s1);
979 EXPECT_EQ(result, -1);
980 EXPECT_EQ(PyErr_Occurred(), nullptr);
981
982 result = PyUnicode_Compare(s2, s22);
983 EXPECT_EQ(result, 0);
984 EXPECT_EQ(PyErr_Occurred(), nullptr);
985
986 Py_DECREF(s22);
987 Py_DECREF(s2);
988 Py_DECREF(s1);
989}
990
991TEST_F(UnicodeExtensionApiTest, CompareWithSubClass) {
992 PyRun_SimpleString(R"(
993class SubStr(str): pass
994
995substr = SubStr("some string")
996)");
997 PyObjectPtr s1(mainModuleGet("substr"));
998 PyObjectPtr s2(PyUnicode_FromString("some longer string"));
999 PyObjectPtr s22(PyUnicode_FromString("some longer string"));
1000
1001 int result = PyUnicode_Compare(s1, s2);
1002 EXPECT_EQ(result, 1);
1003 EXPECT_EQ(PyErr_Occurred(), nullptr);
1004
1005 result = PyUnicode_Compare(s2, s1);
1006 EXPECT_EQ(result, -1);
1007 EXPECT_EQ(PyErr_Occurred(), nullptr);
1008
1009 result = PyUnicode_Compare(s2, s22);
1010 EXPECT_EQ(result, 0);
1011 EXPECT_EQ(PyErr_Occurred(), nullptr);
1012}
1013
1014TEST_F(UnicodeExtensionApiTest, CompareBadInput) {
1015 PyObject* str_obj = PyUnicode_FromString("this is a string");
1016 PyObject* int_obj = PyLong_FromLong(1234);
1017
1018 PyUnicode_Compare(str_obj, int_obj);
1019 ASSERT_NE(PyErr_Occurred(), nullptr);
1020 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1021 PyErr_Clear();
1022
1023 PyUnicode_Compare(int_obj, str_obj);
1024 ASSERT_NE(PyErr_Occurred(), nullptr);
1025 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1026 PyErr_Clear();
1027
1028 PyUnicode_Compare(int_obj, int_obj);
1029 ASSERT_NE(PyErr_Occurred(), nullptr);
1030 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1031 PyErr_Clear();
1032
1033 Py_DECREF(int_obj);
1034 Py_DECREF(str_obj);
1035}
1036
1037TEST_F(UnicodeExtensionApiTest, EqSameLength) {
1038 PyObject* str1 = PyUnicode_FromString("some string");
1039
1040 PyObject* str2 = PyUnicode_FromString("some other string");
1041 EXPECT_EQ(_PyUnicode_EQ(str1, str2), 0);
1042 EXPECT_EQ(_PyUnicode_EQ(str2, str1), 0);
1043 Py_DECREF(str2);
1044
1045 PyObject* str3 = PyUnicode_FromString("some string");
1046 EXPECT_EQ(_PyUnicode_EQ(str1, str3), 1);
1047 EXPECT_EQ(_PyUnicode_EQ(str3, str1), 1);
1048 Py_DECREF(str3);
1049
1050 Py_DECREF(str1);
1051}
1052
1053TEST_F(UnicodeExtensionApiTest, EqWithSubClassSameLength) {
1054 PyRun_SimpleString(R"(
1055class SubStr(str): pass
1056
1057substr = SubStr("some string")
1058)");
1059 PyObjectPtr str(mainModuleGet("substr"));
1060 PyObjectPtr str1(PyUnicode_FromString("some string"));
1061 EXPECT_EQ(_PyUnicode_EQ(str1.get(), str.get()), 1);
1062
1063 PyObjectPtr str2(PyUnicode_FromString("some other string"));
1064 EXPECT_EQ(_PyUnicode_EQ(str2.get(), str.get()), 0);
1065}
1066
1067TEST_F(UnicodeExtensionApiTest, EqDifferentLength) {
1068 PyObject* small = PyUnicode_FromString("123");
1069 PyObject* large = PyUnicode_FromString("1234567890");
1070 EXPECT_EQ(_PyUnicode_EQ(small, large), 0);
1071 EXPECT_EQ(_PyUnicode_EQ(large, small), 0);
1072 Py_DECREF(large);
1073 Py_DECREF(small);
1074}
1075
1076TEST_F(UnicodeExtensionApiTest, EqualToASCIIString) {
1077 PyObject* unicode = PyUnicode_FromString("here's another string");
1078
1079 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(unicode, "here's another string"));
1080 EXPECT_FALSE(
1081 _PyUnicode_EqualToASCIIString(unicode, "here is another string"));
1082
1083 Py_DECREF(unicode);
1084}
1085
1086TEST_F(UnicodeExtensionApiTest, EqualToASCIIStringWithSubClass) {
1087 PyRun_SimpleString(R"(
1088class SubStr(str): pass
1089
1090substr = SubStr("here's another string")
1091)");
1092 PyObjectPtr unicode(mainModuleGet("substr"));
1093 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(unicode, "here's another string"));
1094 EXPECT_FALSE(
1095 _PyUnicode_EqualToASCIIString(unicode, "here is another string"));
1096}
1097
1098TEST_F(UnicodeExtensionApiTest, CompareWithASCIIStringASCIINul) {
1099 PyObjectPtr pyunicode(PyUnicode_FromStringAndSize("large\0st", 8));
1100
1101 // Less
1102 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "largz"), -1);
1103
1104 // Greater
1105 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large"), 1);
1106}
1107
1108TEST_F(UnicodeExtensionApiTest, CompareWithASCIIStringASCII) {
1109 PyObjectPtr pyunicode(PyUnicode_FromString("large string"));
1110
1111 // Equal
1112 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large string"), 0);
1113
1114 // Less
1115 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large strings"), -1);
1116 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large tbigger"), -1);
1117
1118 // Greater
1119 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large strin"), 1);
1120 EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large smaller"), 1);
1121}
1122
1123TEST_F(UnicodeExtensionApiTest, CompareWithASCIIStringWithSubClass) {
1124 PyRun_SimpleString(R"(
1125class SubStr(str): pass
1126
1127substr = SubStr("large string")
1128)");
1129 PyObjectPtr substr(mainModuleGet("substr"));
1130
1131 // Equal
1132 EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large string"), 0);
1133
1134 // Less
1135 EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large strings"), -1);
1136 EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large tbigger"), -1);
1137
1138 // Greater
1139 EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large strin"), 1);
1140 EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large smaller"), 1);
1141}
1142
1143TEST_F(UnicodeExtensionApiTest, GetLengthWithEmptyStrReturnsZero) {
1144 PyObjectPtr str(PyUnicode_FromString(""));
1145 Py_ssize_t expected = 0;
1146 EXPECT_EQ(PyUnicode_GetLength(str), expected);
1147 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected);
1148 EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected);
1149}
1150
1151TEST_F(UnicodeExtensionApiTest, GetLengthWithNonEmptyString) {
1152 PyObjectPtr str(PyUnicode_FromString("foo"));
1153 Py_ssize_t expected = 3;
1154 EXPECT_EQ(PyUnicode_GetLength(str), expected);
1155 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected);
1156 EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected);
1157}
1158
1159TEST_F(UnicodeExtensionApiTest, GetLengthWithSubClassOfNonEmptyString) {
1160 PyRun_SimpleString(R"(
1161class SubStr(str): pass
1162
1163substr = SubStr('foo')
1164)");
1165 PyObjectPtr str(mainModuleGet("substr"));
1166 Py_ssize_t expected = 3;
1167 EXPECT_EQ(PyUnicode_GetLength(str), expected);
1168 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected);
1169 EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected);
1170}
1171
1172TEST_F(UnicodeExtensionApiTest, GetLengthWithUTF8ReturnsCodePointLength) {
1173 PyObjectPtr str(PyUnicode_FromString("\xc3\xa9"));
1174 Py_ssize_t expected = 1;
1175 EXPECT_EQ(PyUnicode_GetLength(str), expected);
1176 EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected);
1177 EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected);
1178}
1179
1180TEST_F(UnicodeExtensionApiTest, GetLengthWithNonStrRaisesTypeError) {
1181 PyObjectPtr list(PyList_New(3));
1182 EXPECT_EQ(PyUnicode_GetLength(list), -1);
1183 ASSERT_NE(PyErr_Occurred(), nullptr);
1184 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1185}
1186
1187TEST_F(UnicodeExtensionApiTest, GetSizeWithNonStrRaisesTypeError) {
1188 PyObjectPtr list(PyList_New(3));
1189#pragma GCC diagnostic push
1190#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1191 EXPECT_EQ(PyUnicode_GetSize(list), -1);
1192#pragma GCC diagnostic pop
1193 ASSERT_NE(PyErr_Occurred(), nullptr);
1194 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1195}
1196
1197TEST_F(UnicodeExtensionApiTest, GetSizeWithStrReturnsLength) {
1198 PyObjectPtr unicode(PyUnicode_FromString("abc"));
1199#pragma GCC diagnostic push
1200#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1201 EXPECT_EQ(PyUnicode_GetSize(unicode), 3);
1202#pragma GCC diagnostic pop
1203 EXPECT_EQ(PyErr_Occurred(), nullptr);
1204}
1205
1206TEST_F(UnicodeExtensionApiTest, FromUnicodeWithASCIIReturnsString) {
1207 PyObjectPtr unicode(PyUnicode_FromUnicode(L"abc123-", 7));
1208 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "abc123-"));
1209}
1210
1211TEST_F(UnicodeExtensionApiTest, FromUnicodeWithNullBufferAbortsPyro) {
1212 EXPECT_DEATH(PyUnicode_FromUnicode(nullptr, 2),
1213 "unimplemented: _PyUnicode_New");
1214}
1215
1216TEST_F(UnicodeExtensionApiTest,
1217 FromOrdinalWithNegativeCodePointRaisesValueError) {
1218 EXPECT_EQ(PyUnicode_FromOrdinal(-1), nullptr);
1219 ASSERT_NE(PyErr_Occurred(), nullptr);
1220 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
1221}
1222
1223TEST_F(UnicodeExtensionApiTest, FromOrdinalWithHugeCodePointRaisesValueError) {
1224 EXPECT_EQ(PyUnicode_FromOrdinal(0xFFFFFFFF), nullptr);
1225 ASSERT_NE(PyErr_Occurred(), nullptr);
1226 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
1227}
1228
1229TEST_F(UnicodeExtensionApiTest, FromOrdinalWithValidCodePointReturnsString) {
1230 PyObjectPtr str(PyUnicode_FromOrdinal(1488));
1231 EXPECT_EQ(PyErr_Occurred(), nullptr);
1232 EXPECT_NE(str, nullptr);
1233 ASSERT_TRUE(PyUnicode_Check(str));
1234 EXPECT_STREQ(PyUnicode_AsUTF8(str), "\xD7\x90");
1235}
1236
1237TEST_F(UnicodeExtensionApiTest,
1238 FromWideCharWithNullBufferAndZeroSizeReturnsEmpty) {
1239 PyObjectPtr empty(PyUnicode_FromWideChar(nullptr, 0));
1240 ASSERT_EQ(PyErr_Occurred(), nullptr);
1241 ASSERT_TRUE(PyUnicode_Check(empty));
1242 EXPECT_EQ(PyUnicode_GetLength(empty), 0);
1243}
1244
1245TEST_F(UnicodeExtensionApiTest, FromWideCharWithNullBufferReturnsError) {
1246 PyObjectPtr empty(PyUnicode_FromWideChar(nullptr, 1));
1247 ASSERT_EQ(empty, nullptr);
1248 ASSERT_NE(PyErr_Occurred(), nullptr);
1249 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError));
1250}
1251
1252TEST_F(UnicodeExtensionApiTest, FromWideCharWithUnknownSizeReturnsString) {
1253 PyObjectPtr unicode(PyUnicode_FromWideChar(L"abc123-", -1));
1254 ASSERT_EQ(PyErr_Occurred(), nullptr);
1255 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "abc123-"));
1256}
1257
1258TEST_F(UnicodeExtensionApiTest, FromWideCharWithGivenSizeReturnsString) {
1259 PyObjectPtr unicode(PyUnicode_FromWideChar(L"abc123-", 6));
1260 ASSERT_EQ(PyErr_Occurred(), nullptr);
1261 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "abc123"));
1262}
1263
1264TEST_F(UnicodeExtensionApiTest, FromWideCharWithBufferAndZeroSizeReturnsEmpty) {
1265 PyObjectPtr empty(PyUnicode_FromWideChar(L"abc", 0));
1266 ASSERT_EQ(PyErr_Occurred(), nullptr);
1267 ASSERT_TRUE(PyUnicode_Check(empty));
1268 EXPECT_EQ(PyUnicode_GetLength(empty), 0);
1269}
1270
1271TEST_F(UnicodeExtensionApiTest, DecodeWithNullEncodingReturnsUTF8) {
1272 const char* str = "utf-8 \xc3\xa8";
1273 PyObjectPtr uni(PyUnicode_Decode(str, 8, nullptr, nullptr));
1274 ASSERT_TRUE(PyUnicode_CheckExact(uni));
1275 EXPECT_STREQ(PyUnicode_AsUTF8(uni), str);
1276}
1277
1278TEST_F(UnicodeExtensionApiTest, DecodeASCIIUsesErrorHandler) {
1279 PyObjectPtr uni(PyUnicode_Decode("non\xc3\xa8-ascii", 11, "ascii", "ignore"));
1280 ASSERT_TRUE(PyUnicode_CheckExact(uni));
1281 EXPECT_STREQ(PyUnicode_AsUTF8(uni), "non-ascii");
1282}
1283
1284TEST_F(UnicodeExtensionApiTest, DecodeLatin1ReturnsLatin1) {
1285 PyObjectPtr uni(PyUnicode_Decode("latin-1 \xe8", 9, "latin-1", nullptr));
1286 ASSERT_TRUE(PyUnicode_CheckExact(uni));
1287 EXPECT_STREQ(PyUnicode_AsUTF8(uni), "latin-1 \xc3\xa8");
1288}
1289
1290TEST_F(UnicodeExtensionApiTest, DecodeFSDefaultCreatesString) {
1291 PyObjectPtr unicode(PyUnicode_DecodeFSDefault("hello"));
1292 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello"));
1293 EXPECT_EQ(PyErr_Occurred(), nullptr);
1294}
1295
1296TEST_F(UnicodeExtensionApiTest, DecodeFSDefaultAndSizeReturnsString) {
1297 PyObjectPtr unicode(PyUnicode_DecodeFSDefaultAndSize("hello", 5));
1298 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello"));
1299 EXPECT_EQ(PyErr_Occurred(), nullptr);
1300}
1301
1302TEST_F(UnicodeExtensionApiTest,
1303 DecodeFSDefaultAndSizeWithSmallerSizeReturnsString) {
1304 PyObjectPtr unicode(PyUnicode_DecodeFSDefaultAndSize("hello", 2));
1305 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "he"));
1306 EXPECT_EQ(PyErr_Occurred(), nullptr);
1307}
1308
1309TEST_F(UnicodeExtensionApiTest, DecodeASCIIReturnsString) {
1310 PyObjectPtr str(PyUnicode_DecodeASCII("hello world", 11, nullptr));
1311 ASSERT_EQ(PyErr_Occurred(), nullptr);
1312 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world"));
1313}
1314
1315TEST_F(UnicodeExtensionApiTest, DecodeLatin1ReturnsString) {
1316 const char* c_str = "\xBFhello world?";
1317 PyObjectPtr str(PyUnicode_DecodeLatin1(c_str, std::strlen(c_str), nullptr));
1318 ASSERT_EQ(PyErr_Occurred(), nullptr);
1319 EXPECT_EQ(PyUnicode_CheckExact(str), 1);
1320 EXPECT_STREQ(PyUnicode_AsUTF8(str), "\xC2\xBFhello world?");
1321}
1322
1323TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterPrepareWithLenZeroReturnsZero) {
1324 _PyUnicodeWriter writer;
1325 _PyUnicodeWriter_Init(&writer);
1326 ASSERT_EQ(_PyUnicodeWriter_Prepare(&writer, 0, 127), 0);
1327 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer));
1328
1329 ASSERT_EQ(PyErr_Occurred(), nullptr);
1330 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, ""));
1331}
1332
1333TEST_F(UnicodeExtensionApiTest,
1334 PyUnicodeWriterWithOverallocateSetOverallocates) {
1335 _PyUnicodeWriter writer;
1336 _PyUnicodeWriter_Init(&writer);
1337 writer.overallocate = 0;
1338 ASSERT_EQ(_PyUnicodeWriter_Prepare(&writer, 5, 127), 0);
1339 ASSERT_EQ(writer.size, 5);
1340 _PyUnicodeWriter_Dealloc(&writer);
1341
1342 _PyUnicodeWriter_Init(&writer);
1343 writer.overallocate = 1;
1344 ASSERT_EQ(_PyUnicodeWriter_Prepare(&writer, 5, 127), 0);
1345 ASSERT_GT(writer.size, 5);
1346 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer));
1347
1348 ASSERT_EQ(PyErr_Occurred(), nullptr);
1349 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, ""));
1350}
1351
1352TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterCreatesEmptyString) {
1353 _PyUnicodeWriter writer;
1354 _PyUnicodeWriter_Init(&writer);
1355 PyObjectPtr empty(_PyUnicodeWriter_Finish(&writer));
1356 ASSERT_EQ(PyErr_Occurred(), nullptr);
1357 ASSERT_TRUE(PyUnicode_Check(empty));
1358 EXPECT_EQ(PyUnicode_GetLength(empty), 0);
1359}
1360
1361TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWritesASCIIStrings) {
1362 _PyUnicodeWriter writer;
1363 _PyUnicodeWriter_Init(&writer);
1364 ASSERT_EQ(_PyUnicodeWriter_WriteASCIIString(&writer, "hello", 5), 0);
1365 ASSERT_EQ(_PyUnicodeWriter_WriteASCIIString(&writer, " world", 6), 0);
1366 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer));
1367
1368 ASSERT_EQ(PyErr_Occurred(), nullptr);
1369 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world"));
1370}
1371
1372TEST_F(UnicodeExtensionApiTest,
1373 WriteASCIIStringWithNegativeLengthReturnsString) {
1374 _PyUnicodeWriter writer;
1375 _PyUnicodeWriter_Init(&writer);
1376 ASSERT_EQ(_PyUnicodeWriter_WriteASCIIString(&writer, "hello world", -1), 0);
1377 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer));
1378
1379 ASSERT_EQ(PyErr_Occurred(), nullptr);
1380 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world"));
1381}
1382
1383TEST_F(UnicodeExtensionApiTest, WriteASCIIStringWithNonASCIIDeathTestPyro) {
1384 _PyUnicodeWriter writer;
1385 _PyUnicodeWriter_Init(&writer);
1386 EXPECT_DEATH(_PyUnicodeWriter_WriteASCIIString(&writer, "\xA0", 1),
1387 "_PyUnicodeWriter_WriteASCIIString only takes ASCII");
1388 _PyUnicodeWriter_Dealloc(&writer);
1389}
1390
1391TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWritesChars) {
1392 _PyUnicodeWriter writer;
1393 _PyUnicodeWriter_Init(&writer);
1394 ASSERT_EQ(_PyUnicodeWriter_WriteChar(&writer, 'a'), 0);
1395 ASSERT_EQ(_PyUnicodeWriter_WriteChar(&writer, 0xA0), 0);
1396 ASSERT_EQ(_PyUnicodeWriter_WriteChar(&writer, 'g'), 0);
1397 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer));
1398
1399 ASSERT_EQ(PyErr_Occurred(), nullptr);
1400 PyObjectPtr test(PyUnicode_FromString("a\xc2\xa0g"));
1401 EXPECT_TRUE(_PyUnicode_EQ(unicode, test));
1402}
1403
1404TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWritesLatin1String) {
1405 _PyUnicodeWriter writer;
1406 _PyUnicodeWriter_Init(&writer);
1407 ASSERT_EQ(_PyUnicodeWriter_WriteLatin1String(&writer, "hello\xA0", 6), 0);
1408 ASSERT_EQ(_PyUnicodeWriter_WriteLatin1String(&writer, " world", 6), 0);
1409 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer));
1410
1411 ASSERT_EQ(PyErr_Occurred(), nullptr);
1412 PyObjectPtr test(PyUnicode_FromString("hello\xc2\xa0 world"));
1413 EXPECT_TRUE(_PyUnicode_EQ(unicode, test));
1414}
1415
1416TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWriteStrWritesStringObject) {
1417 _PyUnicodeWriter writer;
1418 _PyUnicodeWriter_Init(&writer);
1419 PyObjectPtr hello_str(PyUnicode_FromString("hello"));
1420 PyObjectPtr world_str(PyUnicode_FromString(" \xf0\x9f\x90\x8d world"));
1421 ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, hello_str), 0);
1422 ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, world_str), 0);
1423 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer));
1424
1425 ASSERT_EQ(PyErr_Occurred(), nullptr);
1426 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello \xf0\x9f\x90\x8d world"));
1427}
1428
1429TEST_F(UnicodeExtensionApiTest,
1430 PyUnicodeWriterWriteStrWithSubClassWritesStringObject) {
1431 PyRun_SimpleString(R"(
1432class SubStr(str): pass
1433
1434hello_str = SubStr("hello")
1435world_str = SubStr(" world")
1436)");
1437 _PyUnicodeWriter writer;
1438 _PyUnicodeWriter_Init(&writer);
1439 PyObjectPtr hello_str(mainModuleGet("hello_str"));
1440 PyObjectPtr world_str(mainModuleGet("world_str"));
1441 ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, hello_str), 0);
1442 ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, world_str), 0);
1443 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer));
1444
1445 ASSERT_EQ(PyErr_Occurred(), nullptr);
1446 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world"));
1447}
1448
1449TEST_F(UnicodeExtensionApiTest,
1450 PyUnicodeWriterWriteSubstringWritesSubStringObject) {
1451 _PyUnicodeWriter writer;
1452 _PyUnicodeWriter_Init(&writer);
1453 PyObjectPtr str(PyUnicode_FromString("hello \xf0\x9f\x90\x8d world"));
1454 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 8, 13), 0);
1455 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 5, 8), 0);
1456 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 0, 5), 0);
1457 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer));
1458
1459 ASSERT_EQ(PyErr_Occurred(), nullptr);
1460 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "world \xf0\x9f\x90\x8d hello"));
1461}
1462
1463TEST_F(UnicodeExtensionApiTest,
1464 PyUnicodeWriterWriteSubstringWithSubClassWritesSubStringObject) {
1465 PyRun_SimpleString(R"(
1466class SubStr(str): pass
1467
1468str_value = SubStr("hello world")
1469)");
1470 _PyUnicodeWriter writer;
1471 _PyUnicodeWriter_Init(&writer);
1472 PyObjectPtr str(mainModuleGet("str_value"));
1473 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 0, 5), 0);
1474 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 5, 11), 0);
1475 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer));
1476
1477 ASSERT_EQ(PyErr_Occurred(), nullptr);
1478 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world"));
1479}
1480
1481TEST_F(UnicodeExtensionApiTest, WriteSubstringWithZeroEndReturnsString) {
1482 _PyUnicodeWriter writer;
1483 _PyUnicodeWriter_Init(&writer);
1484 PyObjectPtr str(PyUnicode_FromString("hello"));
1485 ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 0, 0), 0);
1486 PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer));
1487
1488 ASSERT_EQ(PyErr_Occurred(), nullptr);
1489 EXPECT_TRUE(isUnicodeEqualsCStr(unicode, ""));
1490}
1491
1492TEST_F(UnicodeExtensionApiTest, DecodeUTF8ReturnsString) {
1493 PyObjectPtr str(PyUnicode_DecodeUTF8("hello world", 11, nullptr));
1494 ASSERT_EQ(PyErr_Occurred(), nullptr);
1495 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world"));
1496}
1497
1498TEST_F(UnicodeExtensionApiTest,
1499 DecodeUTF8WithUnfinishedBytesRaisesUnicodeDecodeError) {
1500 EXPECT_EQ(PyUnicode_DecodeUTF8("hello world\xC3", 12, nullptr), nullptr);
1501 PyObject *exc, *value, *tb;
1502 PyErr_Fetch(&exc, &value, &tb);
1503 ASSERT_NE(exc, nullptr);
1504 ASSERT_TRUE(PyErr_GivenExceptionMatches(exc, PyExc_UnicodeDecodeError));
1505 Py_ssize_t temp;
1506 PyObjectPtr msg(PyUnicodeDecodeError_GetReason(value));
1507 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(msg, "unexpected end of data"));
1508 PyUnicodeDecodeError_GetStart(value, &temp);
1509 EXPECT_EQ(temp, 11);
1510 PyUnicodeDecodeError_GetEnd(value, &temp);
1511 EXPECT_EQ(temp, 12);
1512 Py_XDECREF(exc);
1513 Py_XDECREF(value);
1514 Py_XDECREF(tb);
1515}
1516
1517TEST_F(UnicodeExtensionApiTest, DecodeUTF8StatefulReturnsString) {
1518 Py_ssize_t consumed;
1519 PyObjectPtr str(
1520 PyUnicode_DecodeUTF8Stateful("hello world", 11, nullptr, &consumed));
1521 ASSERT_EQ(PyErr_Occurred(), nullptr);
1522 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world"));
1523 EXPECT_EQ(consumed, 11);
1524}
1525
1526TEST_F(UnicodeExtensionApiTest,
1527 DecodeUTF8StatefulWithUnfinishedBytesReturnsString) {
1528 Py_ssize_t consumed;
1529 PyObjectPtr str(
1530 PyUnicode_DecodeUTF8Stateful("hello world\xC3", 12, nullptr, &consumed));
1531 ASSERT_EQ(PyErr_Occurred(), nullptr);
1532 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world"));
1533 EXPECT_EQ(consumed, 11);
1534}
1535
1536TEST_F(UnicodeExtensionApiTest, DecodeUnicodeEscapeReturnsString) {
1537 PyObjectPtr str(
1538 PyUnicode_DecodeUnicodeEscape("hello \\\nworld", 13, nullptr));
1539 ASSERT_EQ(PyErr_Occurred(), nullptr);
1540 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world"));
1541}
1542
1543TEST_F(UnicodeExtensionApiTest, UnderDecodeUnicodeEscapeReturnsFirstInvalid) {
1544 const char* invalid;
1545 PyObjectPtr str(
1546 _PyUnicode_DecodeUnicodeEscape("hello \\yworld", 13, nullptr, &invalid));
1547 ASSERT_EQ(PyErr_Occurred(), nullptr);
1548 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello \\yworld"));
1549 EXPECT_EQ(*invalid, 'y');
1550}
1551
1552TEST_F(UnicodeExtensionApiTest,
1553 UnderDecodeUnicodeEscapeSetsFirstInvalidEscapeToNull) {
1554 const char* invalid = reinterpret_cast<const char*>(0x100);
1555 PyObjectPtr result(
1556 _PyUnicode_DecodeUnicodeEscape("hello", 5, nullptr, &invalid));
1557 EXPECT_NE(result, nullptr);
1558 EXPECT_EQ(PyErr_Occurred(), nullptr);
1559 EXPECT_EQ(invalid, nullptr);
1560}
1561
1562TEST_F(UnicodeExtensionApiTest, FromFormatWithNoArgsReturnsString) {
1563 PyObjectPtr str(PyUnicode_FromFormat("hello world"));
1564 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world"));
1565}
1566
1567TEST_F(UnicodeExtensionApiTest, FromFormatWithManyArgsReturnsString) {
1568 PyObjectPtr str(PyUnicode_FromFormat("h%c%s%%%2.i", 'e', "llo world", 2));
1569 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world% 2"));
1570}
1571
1572TEST_F(UnicodeExtensionApiTest, FromFormatParsesNumberTypes) {
1573 {
1574 PyObjectPtr str(PyUnicode_FromFormat("%x", 123));
1575 EXPECT_TRUE(isUnicodeEqualsCStr(str, "7b"));
1576 }
1577
1578 {
1579 PyObjectPtr str(PyUnicode_FromFormat("%d", 124));
1580 EXPECT_TRUE(isUnicodeEqualsCStr(str, "124"));
1581 }
1582
1583 {
1584 PyObjectPtr str(PyUnicode_FromFormat("%i", 125));
1585 EXPECT_TRUE(isUnicodeEqualsCStr(str, "125"));
1586 }
1587
1588 {
1589 PyObjectPtr str(PyUnicode_FromFormat("%ld", 126));
1590 EXPECT_TRUE(isUnicodeEqualsCStr(str, "126"));
1591 }
1592
1593 {
1594 PyObjectPtr str(PyUnicode_FromFormat("%li", 127));
1595 EXPECT_TRUE(isUnicodeEqualsCStr(str, "127"));
1596 }
1597
1598 {
1599 PyObjectPtr str(PyUnicode_FromFormat("%lld", 128));
1600 EXPECT_TRUE(isUnicodeEqualsCStr(str, "128"));
1601 }
1602
1603 {
1604 PyObjectPtr str(PyUnicode_FromFormat("%lli", 129));
1605 EXPECT_TRUE(isUnicodeEqualsCStr(str, "129"));
1606 }
1607
1608 {
1609 PyObjectPtr str(PyUnicode_FromFormat("%u", 130));
1610 EXPECT_TRUE(isUnicodeEqualsCStr(str, "130"));
1611 }
1612
1613 {
1614 PyObjectPtr str(PyUnicode_FromFormat("%lu", 131));
1615 EXPECT_TRUE(isUnicodeEqualsCStr(str, "131"));
1616 }
1617
1618 {
1619 PyObjectPtr str(PyUnicode_FromFormat("%llu", 132));
1620 EXPECT_TRUE(isUnicodeEqualsCStr(str, "132"));
1621 }
1622
1623 {
1624 PyObjectPtr str(PyUnicode_FromFormat("%zd", 133));
1625 EXPECT_TRUE(isUnicodeEqualsCStr(str, "133"));
1626 }
1627
1628 {
1629 PyObjectPtr str(PyUnicode_FromFormat("%zu", 134));
1630 EXPECT_TRUE(isUnicodeEqualsCStr(str, "134"));
1631 }
1632
1633 {
1634 PyObjectPtr str(PyUnicode_FromFormat("%zi", 135));
1635 EXPECT_TRUE(isUnicodeEqualsCStr(str, "135"));
1636 }
1637}
1638
1639TEST_F(UnicodeExtensionApiTest, FromFormatParsesCharacters) {
1640 PyObjectPtr str(PyUnicode_FromFormat("%c%c", 'h', 'w'));
1641 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hw"));
1642}
1643
1644TEST_F(UnicodeExtensionApiTest, FromFormatParsesPointer) {
1645 long value = 0;
1646 void* test = &value;
1647 char buff[18];
1648 std::snprintf(buff, 18, "%p", test);
1649 PyObjectPtr str(PyUnicode_FromFormat("%p", test));
1650 EXPECT_TRUE(isUnicodeEqualsCStr(str, buff));
1651}
1652
1653TEST_F(UnicodeExtensionApiTest, FromFormatParsesString) {
1654 PyObjectPtr str(PyUnicode_FromFormat("%s", "UTF-8"));
1655 EXPECT_TRUE(isUnicodeEqualsCStr(str, "UTF-8"));
1656}
1657
1658TEST_F(UnicodeExtensionApiTest, FromFormatParsesStringObject) {
1659 PyObjectPtr unicode(PyUnicode_FromString("hello"));
1660 PyObjectPtr str(PyUnicode_FromFormat("%U", static_cast<PyObject*>(unicode)));
1661 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello"));
1662}
1663
1664TEST_F(UnicodeExtensionApiTest, FromFormatParsesStringObjectAndString) {
1665 PyObjectPtr unicode(PyUnicode_FromString("hello"));
1666 PyObjectPtr str(
1667 PyUnicode_FromFormat("%V", static_cast<PyObject*>(unicode), "world"));
1668 EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello"));
1669}
1670
1671TEST_F(UnicodeExtensionApiTest, FromFormatParsesNullAndString) {
1672 PyObjectPtr str(PyUnicode_FromFormat("%V", nullptr, "world"));
1673 EXPECT_TRUE(isUnicodeEqualsCStr(str, "world"));
1674}
1675
1676TEST_F(UnicodeExtensionApiTest, ConcatWithNonStringFails) {
1677 PyObjectPtr i(PyLong_FromLong(1));
1678 EXPECT_EQ(PyUnicode_Concat(i, i), nullptr);
1679 ASSERT_NE(PyErr_Occurred(), nullptr);
1680 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1681}
1682
1683TEST_F(UnicodeExtensionApiTest, ConcatWithEmptyArgumentReturnsString) {
1684 PyObjectPtr hello(PyUnicode_FromString("hello"));
1685 PyObjectPtr empty(PyUnicode_FromString(""));
1686 PyObjectPtr empty_right(PyUnicode_Concat(hello, empty));
1687 ASSERT_EQ(PyErr_Occurred(), nullptr);
1688 EXPECT_TRUE(isUnicodeEqualsCStr(empty_right, "hello"));
1689
1690 PyObjectPtr empty_left(PyUnicode_Concat(empty, hello));
1691 ASSERT_EQ(PyErr_Occurred(), nullptr);
1692 EXPECT_TRUE(isUnicodeEqualsCStr(empty_left, "hello"));
1693}
1694
1695TEST_F(UnicodeExtensionApiTest, ConcatWithTwoStringsReturnsString) {
1696 PyObjectPtr hello(PyUnicode_FromString("hello "));
1697 PyObjectPtr world(PyUnicode_FromString("world"));
1698 PyObjectPtr result(PyUnicode_Concat(hello, world));
1699 ASSERT_EQ(PyErr_Occurred(), nullptr);
1700 EXPECT_TRUE(isUnicodeEqualsCStr(result, "hello world"));
1701}
1702
1703TEST_F(UnicodeExtensionApiTest, AppendWithNullFails) {
1704 PyUnicode_Append(nullptr, nullptr);
1705 ASSERT_NE(PyErr_Occurred(), nullptr);
1706 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError));
1707}
1708
1709TEST_F(UnicodeExtensionApiTest, AppendWithNonStringFails) {
1710 PyObject* not_str = PyLong_FromLong(1);
1711 PyUnicode_Append(¬_str, not_str);
1712 ASSERT_NE(PyErr_Occurred(), nullptr);
1713 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError));
1714}
1715
1716TEST_F(UnicodeExtensionApiTest, AppendWithEmptyArgumentReturnsString) {
1717 PyObject* hello(PyUnicode_FromString("hello"));
1718 PyObject* empty(PyUnicode_FromString(""));
1719 PyUnicode_Append(&hello, empty);
1720 ASSERT_EQ(PyErr_Occurred(), nullptr);
1721 EXPECT_TRUE(isUnicodeEqualsCStr(hello, "hello"));
1722
1723 PyUnicode_Append(&empty, hello);
1724 ASSERT_EQ(PyErr_Occurred(), nullptr);
1725 EXPECT_TRUE(isUnicodeEqualsCStr(empty, "hello"));
1726 Py_DECREF(hello);
1727 Py_DECREF(empty);
1728}
1729
1730TEST_F(UnicodeExtensionApiTest, AppendWithTwoStringsReturnsString) {
1731 PyObject* hello = PyUnicode_FromString("hello ");
1732 PyObjectPtr world(PyUnicode_FromString("world"));
1733 PyUnicode_Append(&hello, world);
1734 ASSERT_EQ(PyErr_Occurred(), nullptr);
1735 EXPECT_TRUE(isUnicodeEqualsCStr(hello, "hello world"));
1736 Py_DECREF(hello);
1737}
1738
1739TEST_F(UnicodeExtensionApiTest, AppendAndDelWithStringDecreasesRefcnt) {
1740 PyObject* hello = PyUnicode_FromString("hello ");
1741 PyObject* world = PyUnicode_FromString("world");
1742 Py_INCREF(world);
1743 Py_ssize_t original_refcnt = Py_REFCNT(world);
1744 PyUnicode_AppendAndDel(&hello, world);
1745
1746 ASSERT_EQ(PyErr_Occurred(), nullptr);
1747 EXPECT_TRUE(isUnicodeEqualsCStr(hello, "hello world"));
1748 Py_DECREF(hello);
1749
1750 EXPECT_LT(Py_REFCNT(world), original_refcnt);
1751 Py_DECREF(world);
1752}
1753
1754TEST_F(UnicodeExtensionApiTest, EncodeFSDefaultWithNonStringReturnsNull) {
1755 PyObjectPtr bytes(PyUnicode_EncodeFSDefault(Py_None));
1756 EXPECT_EQ(bytes, nullptr);
1757 ASSERT_NE(PyErr_Occurred(), nullptr);
1758 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1759}
1760
1761TEST_F(UnicodeExtensionApiTest, EncodeFSDefaultReturnsBytes) {
1762 PyObjectPtr unicode(PyUnicode_FromString("foo"));
1763 PyObjectPtr bytes(PyUnicode_EncodeFSDefault(unicode));
1764 EXPECT_EQ(PyErr_Occurred(), nullptr);
1765 ASSERT_TRUE(PyBytes_Check(bytes));
1766 EXPECT_EQ(PyBytes_Size(bytes), 3);
1767 EXPECT_STREQ(PyBytes_AsString(bytes), "foo");
1768}
1769
1770TEST_F(UnicodeExtensionApiTest, EncodeLocaleWithEmbeddedNulRaisesValueError) {
1771 PyObjectPtr nul_str(PyUnicode_FromStringAndSize("a\0b", 3));
1772 PyObject* bytes = PyUnicode_EncodeLocale(nul_str, nullptr);
1773 ASSERT_NE(PyErr_Occurred(), nullptr);
1774 EXPECT_EQ(bytes, nullptr);
1775 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
1776}
1777
1778TEST_F(UnicodeExtensionApiTest,
1779 EncodeLocaleWithUnknownErrorHandlerNameRaisesValueError) {
1780 PyObjectPtr str(PyUnicode_FromStringAndSize("abc", 3));
1781 PyObject* bytes = PyUnicode_EncodeLocale(str, "nonexistant");
1782 ASSERT_NE(PyErr_Occurred(), nullptr);
1783 EXPECT_EQ(bytes, nullptr);
1784 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
1785}
1786
1787TEST_F(UnicodeExtensionApiTest, EncodeLocaleWithStrReturnsBytes) {
1788 PyObjectPtr str(PyUnicode_FromStringAndSize("abc", 3));
1789 PyObjectPtr bytes(PyUnicode_EncodeLocale(str, nullptr));
1790 ASSERT_EQ(PyErr_Occurred(), nullptr);
1791 ASSERT_TRUE(PyBytes_Check(bytes));
1792 EXPECT_STREQ(PyBytes_AsString(bytes), "abc");
1793}
1794
1795TEST_F(UnicodeExtensionApiTest,
1796 EncodeLocaleWithStrictErrorsAndSurrogatesRaisesError) {
1797 PyObjectPtr str(PyUnicode_DecodeLocale("abc\x80", "surrogateescape"));
1798 PyObjectPtr bytes(PyUnicode_EncodeLocale(str, "strict"));
1799 ASSERT_NE(PyErr_Occurred(), nullptr);
1800 ASSERT_EQ(bytes, nullptr);
1801 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError));
1802}
1803
1804TEST_F(UnicodeExtensionApiTest,
1805 EncodeLocaleWithSurrogateescapeAndSurrogatesReturnsBytes) {
1806 PyObjectPtr str(PyUnicode_DecodeLocale("abc\x80", "surrogateescape"));
1807 PyObjectPtr bytes(PyUnicode_EncodeLocale(str, "surrogateescape"));
1808 ASSERT_EQ(PyErr_Occurred(), nullptr);
1809 ASSERT_TRUE(PyBytes_Check(bytes));
1810 EXPECT_STREQ(PyBytes_AsString(bytes), "abc\x80");
1811}
1812
1813TEST_F(UnicodeExtensionApiTest, FSConverterWithNullSetAddrToNull) {
1814 PyObject* result = PyLong_FromLong(1);
1815 ASSERT_EQ(PyUnicode_FSConverter(nullptr, &result), 1);
1816 ASSERT_EQ(PyErr_Occurred(), nullptr);
1817 EXPECT_EQ(result, nullptr);
1818}
1819
1820TEST_F(UnicodeExtensionApiTest, FSConverterWithBytesReturnsBytes) {
1821 PyObjectPtr bytes(PyBytes_FromString("foo"));
1822 PyObject* result = nullptr;
1823 ASSERT_EQ(PyUnicode_FSConverter(bytes, &result), Py_CLEANUP_SUPPORTED);
1824 ASSERT_EQ(PyErr_Occurred(), nullptr);
1825 ASSERT_NE(result, nullptr);
1826 EXPECT_TRUE(PyBytes_Check(result));
1827 Py_DECREF(result);
1828}
1829
1830TEST_F(UnicodeExtensionApiTest, FSConverterWithUnicodeReturnsBytes) {
1831 PyObjectPtr unicode(PyUnicode_FromString("foo"));
1832 PyObject* result = nullptr;
1833 ASSERT_EQ(PyUnicode_FSConverter(unicode, &result), Py_CLEANUP_SUPPORTED);
1834 ASSERT_EQ(PyErr_Occurred(), nullptr);
1835 ASSERT_NE(result, nullptr);
1836 EXPECT_TRUE(PyBytes_Check(result));
1837 Py_DECREF(result);
1838}
1839
1840TEST_F(UnicodeExtensionApiTest, FSConverterCallsDunderFspath) {
1841 PyRun_SimpleString(R"(
1842class C:
1843 def __fspath__(self):
1844 return "foo"
1845
1846foo = C()
1847)");
1848 PyObjectPtr path(mainModuleGet("foo"));
1849 PyObject* result = nullptr;
1850 ASSERT_EQ(PyUnicode_FSConverter(path, &result), Py_CLEANUP_SUPPORTED);
1851 ASSERT_EQ(PyErr_Occurred(), nullptr);
1852 ASSERT_NE(result, nullptr);
1853 EXPECT_TRUE(PyBytes_Check(result));
1854 Py_DECREF(result);
1855}
1856
1857TEST_F(UnicodeExtensionApiTest, FSConverterWithBytesSubclassReturnsSubclass) {
1858 PyRun_SimpleString(R"(
1859class C(bytes):
1860 pass
1861
1862foo = C()
1863)");
1864 PyObjectPtr path(mainModuleGet("foo"));
1865 PyObject* result = nullptr;
1866 ASSERT_EQ(PyUnicode_FSConverter(path, &result), Py_CLEANUP_SUPPORTED);
1867 ASSERT_EQ(PyErr_Occurred(), nullptr);
1868 ASSERT_NE(result, nullptr);
1869 EXPECT_TRUE(PyBytes_Check(result));
1870 EXPECT_EQ(result, path);
1871 Py_DECREF(result);
1872}
1873
1874TEST_F(UnicodeExtensionApiTest, FSConverterWithEmbeddedNullRaisesValueError) {
1875 PyObjectPtr bytes(PyBytes_FromStringAndSize("foo \0 bar", 9));
1876 PyObject* result = nullptr;
1877 ASSERT_EQ(PyUnicode_FSConverter(bytes, &result), 0);
1878 ASSERT_NE(PyErr_Occurred(), nullptr);
1879 ASSERT_EQ(result, nullptr);
1880 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
1881}
1882
1883TEST_F(UnicodeExtensionApiTest, InternInPlaceWritesNewHandleBack) {
1884 PyObject* a = PyUnicode_FromString("hello world aaaaaaaaaa");
1885 PyObject* b = PyUnicode_FromString("hello world aaaaaaaaaa");
1886 PyObject* b_addr = b;
1887 EXPECT_NE(a, b);
1888 PyUnicode_InternInPlace(&a);
1889 EXPECT_EQ(PyErr_Occurred(), nullptr);
1890 PyUnicode_InternInPlace(&b);
1891 EXPECT_EQ(PyErr_Occurred(), nullptr);
1892 EXPECT_NE(b, b_addr);
1893 Py_DECREF(a);
1894 Py_DECREF(b);
1895}
1896
1897TEST_F(UnicodeExtensionApiTest, InternFromStringReturnsStr) {
1898 PyObjectPtr result(PyUnicode_InternFromString("szechuan broccoli"));
1899 ASSERT_NE(result, nullptr);
1900 EXPECT_EQ(PyErr_Occurred(), nullptr);
1901 EXPECT_TRUE(PyUnicode_CheckExact(result));
1902}
1903
1904TEST_F(UnicodeExtensionApiTest, JoinWithEmptySeqReturnsEmptyStr) {
1905 PyObjectPtr sep(PyUnicode_FromString("."));
1906 PyObjectPtr seq(PyList_New(0));
1907 PyObjectPtr result(PyUnicode_Join(sep, seq));
1908 EXPECT_EQ(PyErr_Occurred(), nullptr);
1909 EXPECT_TRUE(isUnicodeEqualsCStr(result, ""));
1910}
1911
1912TEST_F(UnicodeExtensionApiTest, JoinWithSeqJoinsElements) {
1913 PyObjectPtr sep(PyUnicode_FromString("."));
1914 PyObjectPtr seq(PyList_New(0));
1915 PyObjectPtr elt0(PyUnicode_FromString("a"));
1916 PyList_Append(seq, elt0);
1917 PyObjectPtr elt1(PyUnicode_FromString("b"));
1918 PyList_Append(seq, elt1);
1919 PyObjectPtr result(PyUnicode_Join(sep, seq));
1920 EXPECT_EQ(PyErr_Occurred(), nullptr);
1921 EXPECT_TRUE(isUnicodeEqualsCStr(result, "a.b"));
1922}
1923
1924TEST_F(UnicodeExtensionApiTest, JoinWithSeqContainingNonStrRaisesTypeError) {
1925 PyObjectPtr sep(PyUnicode_FromString("."));
1926 PyObjectPtr seq(PyList_New(0));
1927 PyList_Append(seq, Py_None);
1928 PyObjectPtr result(PyUnicode_Join(sep, seq));
1929 EXPECT_EQ(result, nullptr);
1930 ASSERT_NE(PyErr_Occurred(), nullptr);
1931 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1932}
1933
1934TEST_F(UnicodeExtensionApiTest, JoinWithSeqContainingBytesRaisesTypeError) {
1935 PyObjectPtr sep(PyUnicode_FromString("."));
1936 PyObjectPtr seq(PyList_New(0));
1937 PyObjectPtr elt0(PyBytes_FromString("a"));
1938 PyList_Append(seq, elt0);
1939 PyObjectPtr result(PyUnicode_Join(sep, seq));
1940 EXPECT_EQ(result, nullptr);
1941 ASSERT_NE(PyErr_Occurred(), nullptr);
1942 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1943}
1944
1945TEST_F(UnicodeExtensionApiTest, PartitionWithNonStrStrRaisesTypeError) {
1946 PyObjectPtr sep(PyUnicode_FromString("."));
1947 PyObjectPtr result(PyUnicode_Partition(Py_None, sep));
1948 EXPECT_EQ(result, nullptr);
1949 ASSERT_NE(PyErr_Occurred(), nullptr);
1950 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1951}
1952
1953TEST_F(UnicodeExtensionApiTest, PartitionWithNonStrSepRaisesTypeError) {
1954 PyObjectPtr str(PyUnicode_FromString("hello"));
1955 PyObjectPtr result(PyUnicode_Partition(str, Py_None));
1956 EXPECT_EQ(result, nullptr);
1957 ASSERT_NE(PyErr_Occurred(), nullptr);
1958 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1959}
1960
1961TEST_F(UnicodeExtensionApiTest, PartitionReturnsTuple) {
1962 PyObjectPtr str(PyUnicode_FromString("a.b"));
1963 PyObjectPtr sep(PyUnicode_FromString("."));
1964 PyObjectPtr result(PyUnicode_Partition(str, sep));
1965 EXPECT_EQ(PyErr_Occurred(), nullptr);
1966 ASSERT_NE(result, nullptr);
1967 EXPECT_TRUE(PyTuple_CheckExact(result));
1968}
1969
1970TEST_F(UnicodeExtensionApiTest, RPartitionWithNonStrStrRaisesTypeError) {
1971 PyObjectPtr sep(PyUnicode_FromString("."));
1972 PyObjectPtr result(PyUnicode_RPartition(Py_None, sep));
1973 EXPECT_EQ(result, nullptr);
1974 ASSERT_NE(PyErr_Occurred(), nullptr);
1975 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1976}
1977
1978TEST_F(UnicodeExtensionApiTest, RPartitionWithNonStrSepRaisesTypeError) {
1979 PyObjectPtr str(PyUnicode_FromString("hello"));
1980 PyObjectPtr result(PyUnicode_RPartition(str, Py_None));
1981 EXPECT_EQ(result, nullptr);
1982 ASSERT_NE(PyErr_Occurred(), nullptr);
1983 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
1984}
1985
1986TEST_F(UnicodeExtensionApiTest, RPartitionReturnsTuple) {
1987 PyObjectPtr str(PyUnicode_FromString("a.b"));
1988 PyObjectPtr sep(PyUnicode_FromString("."));
1989 PyObjectPtr result(PyUnicode_RPartition(str, sep));
1990 EXPECT_EQ(PyErr_Occurred(), nullptr);
1991 ASSERT_NE(result, nullptr);
1992 EXPECT_TRUE(PyTuple_CheckExact(result));
1993}
1994
1995TEST_F(UnicodeExtensionApiTest, SplitlinesWithNonStrStrRaisesTypeError) {
1996 PyObjectPtr result(PyUnicode_Splitlines(Py_None, 0));
1997 EXPECT_EQ(result, nullptr);
1998 ASSERT_NE(PyErr_Occurred(), nullptr);
1999 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2000}
2001
2002TEST_F(UnicodeExtensionApiTest, SplitlinesReturnsList) {
2003 PyObjectPtr str(PyUnicode_FromString("hello\nworld"));
2004 PyObjectPtr result(PyUnicode_Splitlines(str, 1));
2005 EXPECT_EQ(PyErr_Occurred(), nullptr);
2006 ASSERT_NE(result, nullptr);
2007 EXPECT_TRUE(PyList_CheckExact(result));
2008}
2009
2010TEST_F(UnicodeExtensionApiTest, SplitlinesWithSubClassReturnsList) {
2011 PyRun_SimpleString(R"(
2012class SubStr(str): pass
2013
2014str_val = SubStr('hello\nworld')
2015)");
2016 PyObjectPtr str(mainModuleGet("str_val"));
2017 PyObjectPtr result(PyUnicode_Splitlines(str, 1));
2018 EXPECT_EQ(PyErr_Occurred(), nullptr);
2019 ASSERT_NE(result, nullptr);
2020 EXPECT_TRUE(PyList_CheckExact(result));
2021}
2022
2023TEST_F(UnicodeExtensionApiTest, SplitlinesWithNoNewlinesReturnsIdEqualString) {
2024 PyObjectPtr str(PyUnicode_FromString("hello"));
2025 PyObjectPtr result(PyUnicode_Splitlines(str, 1));
2026 EXPECT_EQ(PyErr_Occurred(), nullptr);
2027 ASSERT_NE(result, nullptr);
2028 ASSERT_TRUE(PyList_CheckExact(result));
2029 ASSERT_EQ(PyList_Size(result), 1);
2030 PyObject* str_elt = PyList_GetItem(result, 0);
2031 EXPECT_EQ(str, str_elt);
2032}
2033
2034TEST_F(UnicodeExtensionApiTest, SplitWithNonStrStrRaisesTypeError) {
2035 PyObjectPtr sep(PyUnicode_FromString("."));
2036 PyObjectPtr result(PyUnicode_Split(Py_None, sep, 5));
2037 EXPECT_EQ(result, nullptr);
2038 ASSERT_NE(PyErr_Occurred(), nullptr);
2039 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2040}
2041
2042TEST_F(UnicodeExtensionApiTest, SplitWithNonStrSepRaisesTypeError) {
2043 PyObjectPtr str(PyUnicode_FromString("hello"));
2044 PyObjectPtr sep(PyLong_FromLong(8));
2045 PyObjectPtr result(PyUnicode_Split(str, sep, 5));
2046 EXPECT_EQ(result, nullptr);
2047 ASSERT_NE(PyErr_Occurred(), nullptr);
2048 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2049}
2050
2051TEST_F(UnicodeExtensionApiTest, SplitReturnsList) {
2052 PyObjectPtr str(PyUnicode_FromString("a.b"));
2053 PyObjectPtr sep(PyUnicode_FromString("."));
2054 PyObjectPtr result(PyUnicode_Split(str, sep, 5));
2055 EXPECT_EQ(PyErr_Occurred(), nullptr);
2056 ASSERT_NE(result, nullptr);
2057 EXPECT_TRUE(PyList_CheckExact(result));
2058}
2059
2060TEST_F(UnicodeExtensionApiTest, RSplitWithNonStrStrRaisesTypeError) {
2061 PyObjectPtr sep(PyUnicode_FromString("."));
2062 PyObjectPtr result(PyUnicode_RSplit(Py_None, sep, 5));
2063 EXPECT_EQ(result, nullptr);
2064 ASSERT_NE(PyErr_Occurred(), nullptr);
2065 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2066}
2067
2068TEST_F(UnicodeExtensionApiTest, RSplitWithNonStrSepRaisesTypeError) {
2069 PyObjectPtr str(PyUnicode_FromString("hello"));
2070 PyObjectPtr sep(PyLong_FromLong(8));
2071 PyObjectPtr result(PyUnicode_RSplit(str, sep, 5));
2072 EXPECT_EQ(result, nullptr);
2073 ASSERT_NE(PyErr_Occurred(), nullptr);
2074 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2075}
2076
2077TEST_F(UnicodeExtensionApiTest, RSplitReturnsList) {
2078 PyObjectPtr str(PyUnicode_FromString("a.b"));
2079 PyObjectPtr sep(PyUnicode_FromString("."));
2080 PyObjectPtr result(PyUnicode_RSplit(str, sep, 5));
2081 EXPECT_EQ(PyErr_Occurred(), nullptr);
2082 ASSERT_NE(result, nullptr);
2083 EXPECT_TRUE(PyList_CheckExact(result));
2084}
2085
2086TEST_F(UnicodeExtensionApiTest, StrlenWithEmptyStrReturnsZero) {
2087 const wchar_t* str = L"";
2088#pragma GCC diagnostic push
2089#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
2090 ASSERT_EQ(Py_UNICODE_strlen(str), 0U);
2091#pragma GCC diagnostic pop
2092}
2093
2094TEST_F(UnicodeExtensionApiTest, StrlenWithStrReturnsNumberOfChars) {
2095 const wchar_t* str = L"hello";
2096#pragma GCC diagnostic push
2097#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
2098 ASSERT_EQ(Py_UNICODE_strlen(str), 5U);
2099#pragma GCC diagnostic pop
2100}
2101
2102TEST_F(UnicodeExtensionApiTest, SubstringWithNegativeStartRaisesIndexError) {
2103 PyObjectPtr str(PyUnicode_FromString("foo"));
2104 ASSERT_EQ(PyUnicode_Substring(str, -1, 3), nullptr);
2105 ASSERT_NE(PyErr_Occurred(), nullptr);
2106 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError));
2107}
2108
2109TEST_F(UnicodeExtensionApiTest, SubstringWithNegativeEndRaisesIndexError) {
2110 PyObjectPtr str(PyUnicode_FromString("foo"));
2111 ASSERT_EQ(PyUnicode_Substring(str, 0, -3), nullptr);
2112 ASSERT_NE(PyErr_Occurred(), nullptr);
2113 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError));
2114}
2115
2116TEST_F(UnicodeExtensionApiTest, SubstringWithFullStringReturnsSameObject) {
2117 PyObjectPtr str(PyUnicode_FromString("foo"));
2118 PyObjectPtr result(PyUnicode_Substring(str, 0, 5));
2119 EXPECT_EQ(PyErr_Occurred(), nullptr);
2120 EXPECT_EQ(result, str);
2121}
2122
2123TEST_F(UnicodeExtensionApiTest, SubstringWithSameStartAndEndReturnsEmpty) {
2124 PyObjectPtr str(PyUnicode_FromString("foo"));
2125 PyObjectPtr result(PyUnicode_Substring(str, 2, 2));
2126 EXPECT_EQ(PyErr_Occurred(), nullptr);
2127 ASSERT_TRUE(PyUnicode_CheckExact(result));
2128 EXPECT_STREQ(PyUnicode_AsUTF8(result), "");
2129}
2130
2131TEST_F(UnicodeExtensionApiTest, SubstringWithASCIIReturnsSubstring) {
2132 PyObjectPtr str(PyUnicode_FromString("Hello world!"));
2133 PyObjectPtr result(PyUnicode_Substring(str, 3, 8));
2134 EXPECT_EQ(PyErr_Occurred(), nullptr);
2135 ASSERT_TRUE(PyUnicode_CheckExact(result));
2136 EXPECT_STREQ(PyUnicode_AsUTF8(result), "lo wo");
2137}
2138
2139TEST_F(UnicodeExtensionApiTest, SubstringWithSubClassReturnsSubstring) {
2140 PyRun_SimpleString(R"(
2141class SubStr(str): pass
2142
2143str_val = SubStr('Hello world!')
2144)");
2145 PyObjectPtr str(mainModuleGet("str_val"));
2146 PyObjectPtr result(PyUnicode_Substring(str, 3, 8));
2147 EXPECT_EQ(PyErr_Occurred(), nullptr);
2148 ASSERT_TRUE(PyUnicode_CheckExact(result));
2149 EXPECT_STREQ(PyUnicode_AsUTF8(result), "lo wo");
2150}
2151
2152TEST_F(UnicodeExtensionApiTest, SubstringCountsCodePoints) {
2153 PyObjectPtr str(PyUnicode_FromString("cre\u0300me bru\u0302le\u0301e"));
2154 PyObjectPtr result(PyUnicode_Substring(str, 2, 11));
2155 EXPECT_EQ(PyErr_Occurred(), nullptr);
2156 ASSERT_TRUE(PyUnicode_CheckExact(result));
2157 EXPECT_STREQ(PyUnicode_AsUTF8(result), "e\u0300me bru\u0302");
2158}
2159
2160TEST_F(UnicodeExtensionApiTest, TailmatchSuffixWithEmptyStringsReturnsOne) {
2161 PyObjectPtr str(PyUnicode_FromString(""));
2162 PyObjectPtr substr(PyUnicode_FromString(""));
2163 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 0, 0, 1), 1);
2164 EXPECT_EQ(PyErr_Occurred(), nullptr);
2165}
2166
2167TEST_F(UnicodeExtensionApiTest, TailmatchPrefixWithEmptyStringsReturnsOne) {
2168 PyObjectPtr str(PyUnicode_FromString(""));
2169 PyObjectPtr substr(PyUnicode_FromString(""));
2170 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 0, 0, -1), 1);
2171 EXPECT_EQ(PyErr_Occurred(), nullptr);
2172}
2173
2174TEST_F(UnicodeExtensionApiTest, TailmatchPrefixWithMatchReturnsOne) {
2175 PyObjectPtr str(PyUnicode_FromString("abcde"));
2176 PyObjectPtr substr(PyUnicode_FromString("cde"));
2177 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 2, 9, -1), 1);
2178 EXPECT_EQ(PyErr_Occurred(), nullptr);
2179}
2180
2181TEST_F(UnicodeExtensionApiTest, TailmatchPrefixWithoutMatchReturnsZero) {
2182 PyObjectPtr str(PyUnicode_FromString("abcde"));
2183 PyObjectPtr substr(PyUnicode_FromString("cde"));
2184 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 2, 4, -1), 0);
2185 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 1, 6, -1), 0);
2186
2187 PyObjectPtr substr2(PyUnicode_FromString("cdf"));
2188 EXPECT_EQ(PyUnicode_Tailmatch(str, substr2, 2, 6, -1), 0);
2189 EXPECT_EQ(PyErr_Occurred(), nullptr);
2190}
2191
2192TEST_F(UnicodeExtensionApiTest, TailmatchSuffixWithMatchReturnsOne) {
2193 PyObjectPtr str(PyUnicode_FromString("abcde"));
2194 PyObjectPtr substr(PyUnicode_FromString("cde"));
2195 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 1, 5, 1), 1);
2196 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 1, 6, 1), 1);
2197 EXPECT_EQ(PyErr_Occurred(), nullptr);
2198}
2199
2200TEST_F(UnicodeExtensionApiTest, TailmatchSuffixWithoutMatchReturnsZero) {
2201 PyObjectPtr str(PyUnicode_FromString("abcde"));
2202 PyObjectPtr substr(PyUnicode_FromString("cde"));
2203 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 3, 5, 1), 0);
2204 PyObjectPtr substr2(PyUnicode_FromString("bde"));
2205 EXPECT_EQ(PyUnicode_Tailmatch(str, substr2, 1, 5, 1), 0);
2206 EXPECT_EQ(PyErr_Occurred(), nullptr);
2207}
2208
2209TEST_F(UnicodeExtensionApiTest, TailmatchWithLargerNeedleReturnsZero) {
2210 PyObjectPtr str(PyUnicode_FromString("abcde"));
2211 PyObjectPtr substr(PyUnicode_FromString("bananas"));
2212 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 3, 5, 1), 0);
2213 EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 3, 5, -1), 0);
2214 EXPECT_EQ(PyErr_Occurred(), nullptr);
2215}
2216
2217TEST_F(UnicodeExtensionApiTest, TailmatchWithNonStrHaystackRaisesTypeError) {
2218 PyObjectPtr str(PyUnicode_FromString("abcde"));
2219 PyObjectPtr num(PyLong_FromLong(7));
2220 EXPECT_EQ(PyUnicode_Tailmatch(num, str, 1, 6, 1), -1);
2221 ASSERT_NE(PyErr_Occurred(), nullptr);
2222 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2223}
2224
2225TEST_F(UnicodeExtensionApiTest, TailmatchWithNonStrNeedleRaisesTypeError) {
2226 PyObjectPtr str(PyUnicode_FromString("abcde"));
2227 PyObjectPtr num(PyLong_FromLong(7));
2228 EXPECT_EQ(PyUnicode_Tailmatch(str, num, 1, 6, 1), -1);
2229 ASSERT_NE(PyErr_Occurred(), nullptr);
2230 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2231}
2232
2233TEST_F(UnicodeExtensionApiTest, NewWithInvalidSizeReturnsError) {
2234 EXPECT_EQ(PyUnicode_New(-1, 0), nullptr);
2235 ASSERT_NE(PyErr_Occurred(), nullptr);
2236 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError));
2237}
2238
2239TEST_F(UnicodeExtensionApiTest, NewWithInvalidMaxCharReturnsError) {
2240 EXPECT_EQ(PyUnicode_New(1, 0x11FFFF), nullptr);
2241 ASSERT_NE(PyErr_Occurred(), nullptr);
2242 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError));
2243}
2244
2245TEST_F(UnicodeExtensionApiTest, NewWithZeroSizeAndInvalidMaxCharReturnsStr) {
2246 PyObjectPtr empty(PyUnicode_New(0, 0x11FFFF));
2247 ASSERT_EQ(PyErr_Occurred(), nullptr);
2248 EXPECT_TRUE(PyUnicode_CheckExact(empty));
2249 EXPECT_TRUE(isUnicodeEqualsCStr(empty, ""));
2250}
2251
2252TEST_F(UnicodeExtensionApiTest, FromKindAndDataWithNegativeOneRaiseError) {
2253 char c = 'a';
2254 PyObjectPtr empty(PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, &c, -1));
2255 EXPECT_EQ(empty, nullptr);
2256 ASSERT_NE(PyErr_Occurred(), nullptr);
2257 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
2258}
2259
2260TEST_F(UnicodeExtensionApiTest, FromKindAndDataWithInvalidKindRaiseError) {
2261 char c = 'a';
2262 PyObjectPtr empty(PyUnicode_FromKindAndData(100, &c, 1));
2263 EXPECT_EQ(empty, nullptr);
2264 ASSERT_NE(PyErr_Occurred(), nullptr);
2265 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError));
2266}
2267
2268TEST_F(UnicodeExtensionApiTest,
2269 FromKindAndDataWithOneByteKindAndASCIICodePointsReturnsStr) {
2270 Py_UCS1 buffer[] = {'h', 'e', 'l', 'l', 'o'};
2271 PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, buffer,
2272 Py_ARRAY_LENGTH(buffer)));
2273 ASSERT_EQ(PyErr_Occurred(), nullptr);
2274 ASSERT_TRUE(PyUnicode_CheckExact(str));
2275 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "hello"));
2276}
2277
2278TEST_F(UnicodeExtensionApiTest,
2279 FromKindAndDataWithOneByteKindAndLatin1CodePointsReturnsStr) {
2280 Py_UCS1 buffer[] = {'h', 0xe4, 'l', 'l', 'o'};
2281 PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, buffer,
2282 Py_ARRAY_LENGTH(buffer)));
2283 ASSERT_EQ(PyErr_Occurred(), nullptr);
2284 ASSERT_TRUE(PyUnicode_CheckExact(str));
2285 EXPECT_STREQ(PyUnicode_AsUTF8(str), "h\xc3\xa4llo");
2286}
2287
2288TEST_F(UnicodeExtensionApiTest,
2289 FromKindAndDataWithTwoByteKindAndBMPCodePointsReturnsStr) {
2290 Py_UCS2 buffer[] = {'h', 0xe4, 'l', 0x2cc0, 'o'};
2291 PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, buffer,
2292 Py_ARRAY_LENGTH(buffer)));
2293 ASSERT_EQ(PyErr_Occurred(), nullptr);
2294 ASSERT_TRUE(PyUnicode_CheckExact(str));
2295 EXPECT_STREQ(PyUnicode_AsUTF8(str), "h\xc3\xa4l\xe2\xb3\x80o");
2296}
2297
2298TEST_F(UnicodeExtensionApiTest,
2299 FromKindAndDataWithFourByteKindAndNonBMPCodePointsReturnsStr) {
2300 Py_UCS4 buffer[] = {0x1f192, 'h', 0xe4, 'l', 0x2cc0};
2301 PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer,
2302 Py_ARRAY_LENGTH(buffer)));
2303 ASSERT_EQ(PyErr_Occurred(), nullptr);
2304 ASSERT_TRUE(PyUnicode_CheckExact(str));
2305 EXPECT_STREQ(PyUnicode_AsUTF8(str), "\xf0\x9f\x86\x92h\xc3\xa4l\xe2\xb3\x80");
2306}
2307
2308TEST_F(UnicodeExtensionApiTest, ContainsWithNonStrSelfRaisesTypeError) {
2309 PyObjectPtr self(PyLong_FromLong(7));
2310 PyObjectPtr other(PyUnicode_FromString("hello"));
2311 EXPECT_EQ(PyUnicode_Contains(self, other), -1);
2312 ASSERT_NE(PyErr_Occurred(), nullptr);
2313 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2314}
2315
2316TEST_F(UnicodeExtensionApiTest, ContainsWithNonStrOtherRaisesTypeError) {
2317 PyObjectPtr self(PyUnicode_FromString("hello"));
2318 PyObjectPtr other(PyLong_FromLong(7));
2319 EXPECT_EQ(PyUnicode_Contains(self, other), -1);
2320 ASSERT_NE(PyErr_Occurred(), nullptr);
2321 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2322}
2323
2324TEST_F(UnicodeExtensionApiTest, ContainsWithPresentSubstrReturnsTrue) {
2325 PyObjectPtr self(PyUnicode_FromString("foo"));
2326 PyObjectPtr other(PyUnicode_FromString("f"));
2327 EXPECT_EQ(PyUnicode_Contains(self, other), 1);
2328 EXPECT_EQ(PyErr_Occurred(), nullptr);
2329}
2330
2331TEST_F(UnicodeExtensionApiTest, ContainsWithNotPresentSubstrReturnsTrue) {
2332 PyObjectPtr self(PyUnicode_FromString("foo"));
2333 PyObjectPtr other(PyUnicode_FromString("q"));
2334 EXPECT_EQ(PyUnicode_Contains(self, other), 0);
2335 EXPECT_EQ(PyErr_Occurred(), nullptr);
2336}
2337
2338TEST_F(UnicodeExtensionApiTest, NormalizeEncodingEscapesMidStringPunctuation) {
2339 char buffer[11] = {0};
2340 EXPECT_EQ(_Py_normalize_encoding("utf-8", buffer, sizeof(buffer)), 1);
2341 EXPECT_STREQ(buffer, "utf_8");
2342 EXPECT_EQ(_Py_normalize_encoding("utf}8", buffer, sizeof(buffer)), 1);
2343 EXPECT_STREQ(buffer, "utf_8");
2344}
2345
2346TEST_F(UnicodeExtensionApiTest,
2347 NormalizeEncodingIgnoresEndOfStringPunctuation) {
2348 char buffer[11] = {0};
2349 EXPECT_EQ(_Py_normalize_encoding("_utf8", buffer, sizeof(buffer)), 1);
2350 EXPECT_STREQ(buffer, "utf8");
2351 EXPECT_EQ(_Py_normalize_encoding("utf8_", buffer, sizeof(buffer)), 1);
2352 EXPECT_STREQ(buffer, "utf8");
2353}
2354
2355TEST_F(UnicodeExtensionApiTest, NormalizeEncodingProperlyLowercases) {
2356 char buffer[11] = {0};
2357 EXPECT_EQ(_Py_normalize_encoding("ASCII", buffer, sizeof(buffer)), 1);
2358 EXPECT_STREQ(buffer, "ascii");
2359}
2360
2361TEST_F(UnicodeExtensionApiTest,
2362 NormalizeEncodingWithTooLongStringReturnsEmptyString) {
2363 char buffer[5] = {0};
2364 EXPECT_EQ(_Py_normalize_encoding("12345", buffer, sizeof(buffer)), 0);
2365 EXPECT_STREQ(buffer, "1234");
2366}
2367
2368TEST_F(UnicodeExtensionApiTest,
2369 DecodeLocaleAndSizeWithNullErrorValueEmbeddedNulRaisesValueError) {
2370 PyObject* self = PyUnicode_DecodeLocaleAndSize("a\0b", 3, nullptr);
2371 ASSERT_NE(PyErr_Occurred(), nullptr);
2372 EXPECT_EQ(self, nullptr);
2373 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
2374}
2375
2376TEST_F(
2377 UnicodeExtensionApiTest,
2378 DecodeLocaleAndSizeWithNullErrorValueNonNulTerminatedStrRaisesValueError) {
2379 const char data[] = {'a', 'b'};
2380 PyObject* self = PyUnicode_DecodeLocaleAndSize(data, 1, nullptr);
2381 ASSERT_NE(PyErr_Occurred(), nullptr);
2382 EXPECT_EQ(self, nullptr);
2383 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
2384}
2385
2386TEST_F(UnicodeExtensionApiTest,
2387 DecodeLocaleAndSizeWithNullErrorValueReturnsStr) {
2388 PyObjectPtr str(PyUnicode_DecodeLocaleAndSize("abc", 3, nullptr));
2389 ASSERT_EQ(PyErr_Occurred(), nullptr);
2390 ASSERT_TRUE(PyUnicode_CheckExact(str));
2391 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "abc"));
2392}
2393
2394TEST_F(UnicodeExtensionApiTest,
2395 DecodeLocaleAndSizeWithNullErrorValueStrictAndSurrogatesRaisesError) {
2396 PyObject* str = PyUnicode_DecodeLocaleAndSize("abc\x80", 4, nullptr);
2397 ASSERT_EQ(str, nullptr);
2398 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError));
2399}
2400
2401TEST_F(UnicodeExtensionApiTest,
2402 DecodeLocaleAndSizeWithEmbeddedNulRaisesValueError) {
2403 PyObject* self = PyUnicode_DecodeLocaleAndSize("a\0b", 3, "strict");
2404 ASSERT_NE(PyErr_Occurred(), nullptr);
2405 EXPECT_EQ(self, nullptr);
2406 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
2407}
2408
2409TEST_F(UnicodeExtensionApiTest,
2410 DecodeLocaleAndSizeWithNonNulTerminatedStrRaisesValueError) {
2411 const char data[] = {'a', 'b'};
2412 PyObject* self = PyUnicode_DecodeLocaleAndSize(data, 1, "strict");
2413 ASSERT_NE(PyErr_Occurred(), nullptr);
2414 EXPECT_EQ(self, nullptr);
2415 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
2416}
2417
2418TEST_F(UnicodeExtensionApiTest,
2419 DecodeLocaleAndSizeWithUnknownErrorHandlerNameRaisesValueError) {
2420 PyObject* self = PyUnicode_DecodeLocaleAndSize("abc", 3, "nonexistant");
2421 ASSERT_NE(PyErr_Occurred(), nullptr);
2422 EXPECT_EQ(self, nullptr);
2423 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError));
2424}
2425
2426TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithStrictReturnsStr) {
2427 PyObjectPtr str(PyUnicode_DecodeLocaleAndSize("abc", 3, "strict"));
2428 ASSERT_EQ(PyErr_Occurred(), nullptr);
2429 ASSERT_TRUE(PyUnicode_CheckExact(str));
2430 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "abc"));
2431}
2432
2433TEST_F(UnicodeExtensionApiTest,
2434 DecodeLocaleAndSizeWithSurrogateescapeReturnsStr) {
2435 PyObjectPtr str(PyUnicode_DecodeLocaleAndSize("abc", 3, "surrogateescape"));
2436 ASSERT_EQ(PyErr_Occurred(), nullptr);
2437 ASSERT_TRUE(PyUnicode_CheckExact(str));
2438 EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "abc"));
2439}
2440
2441TEST_F(UnicodeExtensionApiTest,
2442 DecodeLocaleAndSizeWithSurrogateescapeAndSurrogatesReturnsStr) {
2443 PyObjectPtr str(
2444 PyUnicode_DecodeLocaleAndSize("abc\x80", 4, "surrogateescape"));
2445 ASSERT_EQ(PyErr_Occurred(), nullptr);
2446 ASSERT_TRUE(PyUnicode_CheckExact(str));
2447 Py_UCS4 data[] = {'a', 'b', 'c', 0xDC80};
2448 PyObjectPtr test(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, data, 4));
2449 EXPECT_TRUE(_PyUnicode_EQ(str, test));
2450}
2451
2452TEST_F(UnicodeExtensionApiTest,
2453 DecodeLocaleAndSizeWithStrictAndSurrogatesRaisesError) {
2454 PyObject* str = PyUnicode_DecodeLocaleAndSize("abc\x80", 4, "strict");
2455 ASSERT_EQ(str, nullptr);
2456 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError));
2457}
2458
2459TEST_F(UnicodeExtensionApiTest, AsASCIIStringWithNonStringReturnsNull) {
2460 PyObjectPtr bytes(_PyUnicode_AsASCIIString(Py_None, nullptr));
2461 ASSERT_EQ(bytes, nullptr);
2462 ASSERT_NE(PyErr_Occurred(), nullptr);
2463 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2464}
2465
2466TEST_F(UnicodeExtensionApiTest, AsASCIIStringReturnsBytes) {
2467 PyObjectPtr unicode(PyUnicode_FromString("foo"));
2468 PyObjectPtr bytes(_PyUnicode_AsASCIIString(unicode, nullptr));
2469 ASSERT_EQ(PyErr_Occurred(), nullptr);
2470 ASSERT_TRUE(PyBytes_Check(bytes));
2471 EXPECT_EQ(PyBytes_Size(bytes), 3);
2472 EXPECT_STREQ(PyBytes_AsString(bytes), "foo");
2473}
2474
2475TEST_F(UnicodeExtensionApiTest,
2476 AsASCIIStringWithInvalidCodepointRaisesEncodeError) {
2477 PyObjectPtr unicode(PyUnicode_FromString("foo\u00EF"));
2478 PyObjectPtr bytes(_PyUnicode_AsASCIIString(unicode, nullptr));
2479 ASSERT_NE(PyErr_Occurred(), nullptr);
2480 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError));
2481 EXPECT_EQ(bytes, nullptr);
2482}
2483
2484TEST_F(UnicodeExtensionApiTest, AsASCIIStringWithReplaceErrorsReturnsBytes) {
2485 PyObjectPtr unicode(PyUnicode_FromString("foo\u00EF"));
2486 PyObjectPtr bytes(_PyUnicode_AsASCIIString(unicode, "replace"));
2487 ASSERT_EQ(PyErr_Occurred(), nullptr);
2488
2489 ASSERT_TRUE(PyBytes_Check(bytes));
2490 EXPECT_EQ(PyBytes_Size(bytes), 4);
2491 EXPECT_STREQ(PyBytes_AsString(bytes), "foo?");
2492}
2493
2494TEST_F(UnicodeExtensionApiTest, AsLatin1StringWithNonStringReturnsNull) {
2495 PyObjectPtr bytes(_PyUnicode_AsLatin1String(Py_None, nullptr));
2496 ASSERT_EQ(bytes, nullptr);
2497 ASSERT_NE(PyErr_Occurred(), nullptr);
2498 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2499}
2500
2501TEST_F(UnicodeExtensionApiTest, AsLatin1StringReturnsBytes) {
2502 PyObjectPtr unicode(PyUnicode_FromString("foo"));
2503 PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, nullptr));
2504 ASSERT_EQ(PyErr_Occurred(), nullptr);
2505 ASSERT_TRUE(PyBytes_Check(bytes));
2506 EXPECT_EQ(PyBytes_Size(bytes), 3);
2507 EXPECT_STREQ(PyBytes_AsString(bytes), "foo");
2508}
2509
2510TEST_F(UnicodeExtensionApiTest, AsLatin1StringWithLatin1ReturnsBytes) {
2511 PyObjectPtr unicode(PyUnicode_FromString("foo\u00E4"));
2512 PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, "replace"));
2513 ASSERT_EQ(PyErr_Occurred(), nullptr);
2514
2515 ASSERT_TRUE(PyBytes_Check(bytes));
2516 EXPECT_EQ(PyBytes_Size(bytes), 4);
2517 EXPECT_STREQ(PyBytes_AsString(bytes), "foo\xE4");
2518}
2519
2520TEST_F(UnicodeExtensionApiTest,
2521 AsLatin1StringWithInvalidCodepointRaisesEncodeError) {
2522 PyObjectPtr unicode(PyUnicode_FromString("foo\u01EF"));
2523 PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, nullptr));
2524 ASSERT_NE(PyErr_Occurred(), nullptr);
2525 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError));
2526 EXPECT_EQ(bytes, nullptr);
2527}
2528
2529TEST_F(UnicodeExtensionApiTest, AsLatin1StringWithReplaceErrorsReturnsBytes) {
2530 PyObjectPtr unicode(PyUnicode_FromString("foo\u0AE4"));
2531 PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, "replace"));
2532 ASSERT_EQ(PyErr_Occurred(), nullptr);
2533
2534 ASSERT_TRUE(PyBytes_Check(bytes));
2535 EXPECT_EQ(PyBytes_Size(bytes), 4);
2536 EXPECT_STREQ(PyBytes_AsString(bytes), "foo?");
2537}
2538
2539TEST_F(UnicodeExtensionApiTest, AsUTF16StringWithNonStringReturnsNull) {
2540 PyObjectPtr bytes(PyUnicode_AsUTF16String(Py_None));
2541 ASSERT_EQ(bytes, nullptr);
2542 ASSERT_NE(PyErr_Occurred(), nullptr);
2543 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2544}
2545
2546TEST_F(UnicodeExtensionApiTest, AsUTF16StringReturnsBytes) {
2547 PyObjectPtr unicode(PyUnicode_FromString("hi"));
2548 PyObjectPtr bytes(PyUnicode_AsUTF16String(unicode));
2549 ASSERT_EQ(PyErr_Occurred(), nullptr);
2550 ASSERT_TRUE(PyBytes_Check(bytes));
2551 EXPECT_EQ(PyBytes_Size(bytes), 6);
2552 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfeh\x00i\x00", 6), 0);
2553}
2554
2555TEST_F(UnicodeExtensionApiTest,
2556 AsUTF16StringWithInvalidCodepointRaisesEncodeError) {
2557 PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape"));
2558 ASSERT_EQ(PyErr_Occurred(), nullptr);
2559 ASSERT_TRUE(PyUnicode_CheckExact(unicode));
2560 PyObjectPtr bytes(PyUnicode_AsUTF16String(unicode));
2561 ASSERT_NE(PyErr_Occurred(), nullptr);
2562 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError));
2563 EXPECT_EQ(bytes, nullptr);
2564}
2565
2566TEST_F(UnicodeExtensionApiTest, AsUTF16StringWithUTF16ReturnsBytes) {
2567 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i"));
2568 PyObjectPtr bytes(PyUnicode_AsUTF16String(unicode));
2569 ASSERT_EQ(PyErr_Occurred(), nullptr);
2570
2571 ASSERT_TRUE(PyBytes_Check(bytes));
2572 EXPECT_EQ(PyBytes_Size(bytes), 10);
2573 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes),
2574 "\xff\xfeh\x00\x34\xd8\xf0\xddi\x00", 10),
2575 0);
2576}
2577
2578TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16WithUTF16ReturnsBytes) {
2579 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i"));
2580 PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 0));
2581 ASSERT_EQ(PyErr_Occurred(), nullptr);
2582
2583 ASSERT_TRUE(PyBytes_Check(bytes));
2584 EXPECT_EQ(PyBytes_Size(bytes), 10);
2585 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes),
2586 "\xff\xfeh\x00\x34\xd8\xf0\xddi\x00", 10),
2587 0);
2588}
2589
2590TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16LeWithUTF16ReturnsBytes) {
2591 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i"));
2592 PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", -1));
2593 ASSERT_EQ(PyErr_Occurred(), nullptr);
2594
2595 ASSERT_TRUE(PyBytes_Check(bytes));
2596 EXPECT_EQ(PyBytes_Size(bytes), 8);
2597 EXPECT_EQ(
2598 std::memcmp(PyBytes_AsString(bytes), "h\x00\x34\xd8\xf0\xddi\x00", 8), 0);
2599}
2600
2601TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16BeWithUTF16ReturnsBytes) {
2602 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i"));
2603 PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 1));
2604 ASSERT_EQ(PyErr_Occurred(), nullptr);
2605
2606 ASSERT_TRUE(PyBytes_Check(bytes));
2607 EXPECT_EQ(PyBytes_Size(bytes), 8);
2608 EXPECT_EQ(
2609 std::memcmp(PyBytes_AsString(bytes), "\x00h\xd8\x34\xdd\xf0\x00i", 8), 0);
2610}
2611
2612TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16WithReplaceReturnsBytes) {
2613 PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape"));
2614 PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 0));
2615 ASSERT_EQ(PyErr_Occurred(), nullptr);
2616
2617 ASSERT_TRUE(PyBytes_Check(bytes));
2618 EXPECT_EQ(PyBytes_Size(bytes), 8);
2619 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfeh\x00?\x00i\x00", 8),
2620 0);
2621}
2622
2623TEST_F(UnicodeExtensionApiTest, EncodeUTF16WithReplaceReturnsBytes) {
2624 PyObjectPtr unicode(PyUnicode_FromWideChar(L"h\xDC80i", 3));
2625 PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 0));
2626 ASSERT_EQ(PyErr_Occurred(), nullptr);
2627
2628 ASSERT_TRUE(PyBytes_Check(bytes));
2629 EXPECT_EQ(PyBytes_Size(bytes), 8);
2630 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfeh\x00?\x00i\x00", 8),
2631 0);
2632}
2633
2634TEST_F(UnicodeExtensionApiTest, AsUTF32StringWithNonStringReturnsNull) {
2635 PyObjectPtr bytes(PyUnicode_AsUTF32String(Py_None));
2636 ASSERT_EQ(bytes, nullptr);
2637 ASSERT_NE(PyErr_Occurred(), nullptr);
2638 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError));
2639}
2640
2641TEST_F(UnicodeExtensionApiTest, AsUTF32StringReturnsBytes) {
2642 PyObjectPtr unicode(PyUnicode_FromString("hi"));
2643 PyObjectPtr bytes(PyUnicode_AsUTF32String(unicode));
2644 ASSERT_EQ(PyErr_Occurred(), nullptr);
2645 ASSERT_TRUE(PyBytes_Check(bytes));
2646 EXPECT_EQ(PyBytes_Size(bytes), 12);
2647 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes),
2648 "\xff\xfe\x00\x00h\x00\x00\x00i\x00\x00\x00", 12),
2649 0);
2650}
2651
2652TEST_F(UnicodeExtensionApiTest,
2653 AsUTF32StringWithInvalidCodepointRaisesEncodeError) {
2654 PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape"));
2655 ASSERT_EQ(PyErr_Occurred(), nullptr);
2656 ASSERT_TRUE(PyUnicode_CheckExact(unicode));
2657 PyObjectPtr bytes(PyUnicode_AsUTF32String(unicode));
2658 ASSERT_NE(PyErr_Occurred(), nullptr);
2659 EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError));
2660 EXPECT_EQ(bytes, nullptr);
2661}
2662
2663TEST_F(UnicodeExtensionApiTest, AsUTF32StringWithUTF32ReturnsBytes) {
2664 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i"));
2665 PyObjectPtr bytes(PyUnicode_AsUTF32String(unicode));
2666 ASSERT_EQ(PyErr_Occurred(), nullptr);
2667
2668 ASSERT_TRUE(PyBytes_Check(bytes));
2669 EXPECT_EQ(PyBytes_Size(bytes), 16);
2670 EXPECT_EQ(
2671 std::memcmp(PyBytes_AsString(bytes),
2672 "\xff\xfe\x00\x00h\x00\x00\x00\xf0\xd1\x01\x00i\x00\x00\x00",
2673 16),
2674 0);
2675}
2676
2677TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32WithUTF32ReturnsBytes) {
2678 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i"));
2679 PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 0));
2680 ASSERT_EQ(PyErr_Occurred(), nullptr);
2681
2682 ASSERT_TRUE(PyBytes_Check(bytes));
2683 EXPECT_EQ(PyBytes_Size(bytes), 16);
2684 EXPECT_EQ(
2685 std::memcmp(PyBytes_AsString(bytes),
2686 "\xff\xfe\x00\x00h\x00\x00\x00\xf0\xd1\x01\x00i\x00\x00\x00",
2687 16),
2688 0);
2689}
2690
2691TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32LeWithUTF32ReturnsBytes) {
2692 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i"));
2693 PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", -1));
2694 ASSERT_EQ(PyErr_Occurred(), nullptr);
2695
2696 ASSERT_TRUE(PyBytes_Check(bytes));
2697 EXPECT_EQ(PyBytes_Size(bytes), 12);
2698 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes),
2699 "h\x00\x00\x00\xf0\xd1\x01\x00i\x00\x00\x00", 12),
2700 0);
2701}
2702
2703TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32BeWithUTF32ReturnsBytes) {
2704 PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i"));
2705 PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 1));
2706 ASSERT_EQ(PyErr_Occurred(), nullptr);
2707
2708 ASSERT_TRUE(PyBytes_Check(bytes));
2709 EXPECT_EQ(PyBytes_Size(bytes), 12);
2710 EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes),
2711 "\x00\x00\x00h\x00\x01\xd1\xf0\x00\x00\x00i", 12),
2712 0);
2713}
2714
2715TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32WithReplaceReturnsBytes) {
2716 PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape"));
2717 PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 0));
2718 ASSERT_EQ(PyErr_Occurred(), nullptr);
2719
2720 ASSERT_TRUE(PyBytes_Check(bytes));
2721 EXPECT_EQ(PyBytes_Size(bytes), 16);
2722 EXPECT_EQ(std::memcmp(
2723 PyBytes_AsString(bytes),
2724 "\xff\xfe\x00\x00h\x00\x00\x00?\x00\x00\x00i\x00\x00\x00", 16),
2725 0);
2726}
2727
2728TEST_F(UnicodeExtensionApiTest, EncodeUTF32WithReplaceReturnsBytes) {
2729 PyObjectPtr unicode(PyUnicode_FromWideChar(L"h\xDC80i", 3));
2730 PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 0));
2731 ASSERT_EQ(PyErr_Occurred(), nullptr);
2732
2733 ASSERT_TRUE(PyBytes_Check(bytes));
2734 EXPECT_EQ(PyBytes_Size(bytes), 16);
2735 EXPECT_EQ(std::memcmp(
2736 PyBytes_AsString(bytes),
2737 "\xff\xfe\x00\x00h\x00\x00\x00?\x00\x00\x00i\x00\x00\x00", 16),
2738 0);
2739}
2740
2741TEST_F(UnicodeExtensionApiTest, IsAsciiWithAsciiOnlyCharsReturnsOne) {
2742 PyObjectPtr unicode(PyUnicode_FromString("foo"));
2743 EXPECT_EQ(PyUnicode_IS_ASCII(unicode.get()), 1);
2744}
2745
2746TEST_F(UnicodeExtensionApiTest, IsAsciiWithNonAsciiCharsReturnsZero) {
2747 PyObjectPtr unicode(PyUnicode_FromString("fo\u00e4o"));
2748 EXPECT_EQ(PyUnicode_IS_ASCII(unicode.get()), 0);
2749}
2750
2751TEST_F(UnicodeExtensionApiTest, IsCompactAsciiWithAsciiOnlyCharsReturnsOne) {
2752 PyObjectPtr unicode(PyUnicode_FromString("foo"));
2753 EXPECT_EQ(PyUnicode_IS_COMPACT_ASCII(unicode.get()), 1);
2754}
2755
2756TEST_F(UnicodeExtensionApiTest, IsCompactAsciiWithNonAsciiCharsReturnsZero) {
2757 PyObjectPtr unicode(PyUnicode_FromString("fo\u00e4o"));
2758 EXPECT_EQ(PyUnicode_IS_COMPACT_ASCII(unicode.get()), 0);
2759}
2760
2761TEST_F(UnicodeExtensionApiTest, IsIdentifierWithEmptyStringReturnsFalse) {
2762 PyObjectPtr unicode(PyUnicode_FromString(""));
2763 EXPECT_EQ(PyUnicode_IsIdentifier(unicode), 0);
2764 EXPECT_EQ(PyErr_Occurred(), nullptr);
2765}
2766
2767TEST_F(UnicodeExtensionApiTest, IsIdentifierWithValidIdentifierReturnsTrue) {
2768 PyObjectPtr unicode(PyUnicode_FromString("foo"));
2769 EXPECT_EQ(PyUnicode_IsIdentifier(unicode), 1);
2770 EXPECT_EQ(PyErr_Occurred(), nullptr);
2771}
2772
2773TEST_F(UnicodeExtensionApiTest, IsIdentifierWithInvalidIdentifierReturnsFalse) {
2774 PyObjectPtr unicode(PyUnicode_FromString("b$ar"));
2775 EXPECT_EQ(PyUnicode_IsIdentifier(unicode), 0);
2776 EXPECT_EQ(PyErr_Occurred(), nullptr);
2777}
2778
2779TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExWithEmptyStrReturnsZero) {
2780 const char* str = "";
2781 wchar_t* result = nullptr;
2782 EXPECT_EQ(0, _Py_DecodeUTF8Ex(str, /*size=*/0, /*result=*/&result,
2783 /*wlen=*/nullptr,
2784 /*reason=*/nullptr, _Py_ERROR_STRICT));
2785 ASSERT_NE(result, nullptr);
2786 EXPECT_STREQ(result, L"");
2787 PyMem_RawFree(result);
2788}
2789
2790TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExWithASCIIStrReturnsZero) {
2791 const char* str = "hello";
2792 wchar_t* result = nullptr;
2793 EXPECT_EQ(0,
2794 _Py_DecodeUTF8Ex(str, /*size=*/std::strlen(str), /*result=*/&result,
2795 /*wlen=*/nullptr,
2796 /*reason=*/nullptr, _Py_ERROR_STRICT));
2797 ASSERT_NE(result, nullptr);
2798 EXPECT_EQ(std::wcslen(result), size_t{5});
2799 EXPECT_EQ('h', result[0]);
2800 EXPECT_EQ('e', result[1]);
2801 EXPECT_EQ('l', result[2]);
2802 EXPECT_EQ('l', result[3]);
2803 EXPECT_EQ('o', result[4]);
2804 PyMem_RawFree(result);
2805}
2806
2807TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExDecodesUpToSizeBytes) {
2808 const char* str = "hello";
2809 wchar_t* result = nullptr;
2810 EXPECT_EQ(0, _Py_DecodeUTF8Ex(str, /*size=*/3, /*result=*/&result,
2811 /*wlen=*/nullptr,
2812 /*reason=*/nullptr, _Py_ERROR_STRICT));
2813 ASSERT_NE(result, nullptr);
2814 EXPECT_EQ(std::wcslen(result), size_t{3});
2815 EXPECT_EQ('h', result[0]);
2816 EXPECT_EQ('e', result[1]);
2817 EXPECT_EQ('l', result[2]);
2818 PyMem_RawFree(result);
2819}
2820
2821TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExWithASCIIStrSetsWlen) {
2822 const char* str = "hello";
2823 wchar_t* result = nullptr;
2824 size_t wlen = 0;
2825 EXPECT_EQ(0,
2826 _Py_DecodeUTF8Ex(str, /*size=*/std::strlen(str), /*result=*/&result,
2827 /*wlen=*/&wlen,
2828 /*reason=*/nullptr, _Py_ERROR_STRICT));
2829 ASSERT_NE(result, nullptr);
2830 EXPECT_EQ(std::wcslen(result), size_t{5});
2831 EXPECT_EQ('h', result[0]);
2832 EXPECT_EQ('e', result[1]);
2833 EXPECT_EQ('l', result[2]);
2834 EXPECT_EQ('l', result[3]);
2835 EXPECT_EQ('o', result[4]);
2836 EXPECT_EQ(wlen, size_t{5});
2837 PyMem_RawFree(result);
2838}
2839
2840TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithEmptyStrReturnsZero) {
2841 const wchar_t* str = L"";
2842 char* result = nullptr;
2843 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr,
2844 /*reason=*/nullptr, /*raw_malloc=*/0,
2845 _Py_ERROR_STRICT));
2846 ASSERT_NE(result, nullptr);
2847 EXPECT_STREQ(result, "");
2848 PyMem_Free(result);
2849}
2850
2851TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithASCIIStrReturnsZero) {
2852 const wchar_t* str = L"hello";
2853 char* result = nullptr;
2854 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr,
2855 /*reason=*/nullptr, /*raw_malloc=*/0,
2856 _Py_ERROR_STRICT));
2857 ASSERT_NE(result, nullptr);
2858 EXPECT_STREQ(result, "hello");
2859 PyMem_Free(result);
2860}
2861
2862TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithRawMallocReturnsZero) {
2863 const wchar_t* str = L"hello";
2864 char* result = nullptr;
2865 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr,
2866 /*reason=*/nullptr, /*raw_malloc=*/1,
2867 _Py_ERROR_STRICT));
2868 ASSERT_NE(result, nullptr);
2869 EXPECT_STREQ(result, "hello");
2870 PyMem_RawFree(result);
2871}
2872
2873TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithLatin1ReturnsZero) {
2874 const wchar_t* str = L"cr\xe8me br\xfbl\xe9e";
2875 char* result = nullptr;
2876 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr,
2877 /*reason=*/nullptr, /*raw_malloc=*/0,
2878 _Py_ERROR_STRICT));
2879 ASSERT_NE(result, nullptr);
2880 EXPECT_STREQ(result, u8"cr\xC3\xA8me br\xC3\xBBl\xE0\xBA\x9E");
2881 PyMem_Free(result);
2882}
2883
2884TEST_F(UnicodeExtensionApiTest,
2885 EncodeUTF8ExWithoutSurrogateEscapeReturnsNegativeTwo) {
2886 const wchar_t* str = L"\x0000dc80";
2887 char* result = reinterpret_cast<char*>(0xdeadbeef);
2888 EXPECT_EQ(-2, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr,
2889 /*reason=*/nullptr, /*raw_malloc=*/0,
2890 _Py_ERROR_STRICT));
2891 EXPECT_EQ(result, reinterpret_cast<char*>(0xdeadbeef));
2892}
2893
2894TEST_F(UnicodeExtensionApiTest,
2895 EncodeUTF8ExWithoutSurrogateEscapeAndErrorPosSetsErrorPos) {
2896 const wchar_t* str = L"foo\x0000dc80zip";
2897 char* result = reinterpret_cast<char*>(0xdeadbeef);
2898 size_t error_pos = 1337;
2899 EXPECT_EQ(-2, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos,
2900 /*reason=*/nullptr, /*raw_malloc=*/0,
2901 _Py_ERROR_STRICT));
2902 EXPECT_EQ(result, reinterpret_cast<char*>(0xdeadbeef));
2903 EXPECT_EQ(error_pos, size_t{3});
2904}
2905
2906TEST_F(UnicodeExtensionApiTest,
2907 EncodeUTF8ExWithoutSurrogateEscapeAndReasonSetsReason) {
2908 const wchar_t* str = L"\x0000dc80";
2909 char* result = reinterpret_cast<char*>(0xdeadbeef);
2910 const char* reason = nullptr;
2911 EXPECT_EQ(-2, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr,
2912 /*reason=*/&reason, /*raw_malloc=*/0,
2913 _Py_ERROR_STRICT));
2914 EXPECT_EQ(result, reinterpret_cast<char*>(0xdeadbeef));
2915 ASSERT_NE(reason, nullptr);
2916 EXPECT_STREQ(reason, "encoding error");
2917}
2918
2919TEST_F(UnicodeExtensionApiTest,
2920 EncodeUTF8ExWithSurrogateEscapeEscapesSurrogate) {
2921 const wchar_t* str = L"\x0000dc80";
2922 char* result = nullptr;
2923 size_t error_pos = 1337;
2924 const char* reason = const_cast<const char*>(reinterpret_cast<char*>(0x1337));
2925 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos,
2926 /*reason=*/&reason, /*raw_malloc=*/0,
2927 _Py_ERROR_SURROGATEESCAPE));
2928 EXPECT_EQ(error_pos, size_t{1337});
2929 EXPECT_EQ(reason, reinterpret_cast<char*>(0x1337));
2930 ASSERT_NE(result, nullptr);
2931 EXPECT_STREQ(result, u8"\x80");
2932 PyMem_Free(result);
2933}
2934
2935TEST_F(UnicodeExtensionApiTest,
2936 EncodeUTF8ExWithThreeByteCodePointEncodesCodePoint) {
2937 const wchar_t* str = L"\x0000efff";
2938 char* result = nullptr;
2939 size_t error_pos = 1337;
2940 const char* reason = const_cast<const char*>(reinterpret_cast<char*>(0x1337));
2941 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos,
2942 /*reason=*/nullptr, /*raw_malloc=*/0,
2943 _Py_ERROR_SURROGATEESCAPE));
2944 EXPECT_EQ(error_pos, size_t{1337});
2945 EXPECT_EQ(reason, reinterpret_cast<char*>(0x1337));
2946 ASSERT_NE(result, nullptr);
2947 EXPECT_STREQ(result, u8"\xee\xbf\xbf");
2948 PyMem_Free(result);
2949}
2950
2951TEST_F(UnicodeExtensionApiTest,
2952 EncodeUTF8ExWithFourByteCodePointEncodesCodePoint) {
2953 const wchar_t* str = L"\x10000";
2954 char* result = nullptr;
2955 size_t error_pos = 1337;
2956 const char* reason = const_cast<const char*>(reinterpret_cast<char*>(0x1337));
2957 EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos,
2958 /*reason=*/nullptr, /*raw_malloc=*/0,
2959 _Py_ERROR_SURROGATEESCAPE));
2960 EXPECT_EQ(error_pos, size_t{1337});
2961 EXPECT_EQ(reason, reinterpret_cast<char*>(0x1337));
2962 ASSERT_NE(result, nullptr);
2963 EXPECT_STREQ(result, u8"\xf0\x90\x80\x80");
2964 PyMem_Free(result);
2965}
2966
2967TEST_F(UnicodeExtensionApiTest,
2968 FileSystemDefaultEncodeErrorsMatchesSysGetfilesystemencodeerrors) {
2969 PyRun_SimpleString(R"(
2970import sys
2971errors = sys.getfilesystemencodeerrors()
2972)");
2973 PyObjectPtr errors(mainModuleGet("errors"));
2974 EXPECT_TRUE(isUnicodeEqualsCStr(errors, Py_FileSystemDefaultEncodeErrors));
2975}
2976
2977TEST_F(UnicodeExtensionApiTest,
2978 FileSystemDefaultEncodingMatchesSysGetfilesystemencoding) {
2979 PyRun_SimpleString(R"(
2980import sys
2981encoding = sys.getfilesystemencoding()
2982)");
2983 PyObjectPtr errors(mainModuleGet("encoding"));
2984 EXPECT_TRUE(isUnicodeEqualsCStr(errors, Py_FileSystemDefaultEncoding));
2985}
2986
2987TEST_F(UnicodeExtensionApiTest,
2988 GetDefaultEncodingMatchesSysGetdefaultencoding) {
2989 PyRun_SimpleString(R"(
2990import sys
2991sys_default = sys.getdefaultencoding()
2992)");
2993 PyObjectPtr sys_default(mainModuleGet("sys_default"));
2994 EXPECT_TRUE(isUnicodeEqualsCStr(sys_default, PyUnicode_GetDefaultEncoding()));
2995}
2996
2997TEST_F(UnicodeExtensionApiTest,
2998 DecodeUTF8SurrogateEscapeWithEmptyStringReturnsEmptyString) {
2999 size_t wlen;
3000 wchar_t* wpath = _Py_DecodeUTF8_surrogateescape("", 0, &wlen);
3001 EXPECT_STREQ(wpath, L"");
3002 EXPECT_EQ(wlen, size_t{0});
3003 PyMem_RawFree(wpath);
3004}
3005
3006TEST_F(UnicodeExtensionApiTest, DecodeUTF8SurrogateEscapeReturnsWideString) {
3007 const char* path = "/foo/bar/bat";
3008 size_t len = std::strlen(path);
3009 size_t wlen;
3010 wchar_t* wpath = _Py_DecodeUTF8_surrogateescape(path, len, &wlen);
3011 EXPECT_STREQ(wpath, L"/foo/bar/bat");
3012 EXPECT_EQ(wlen, len);
3013 PyMem_RawFree(wpath);
3014}
3015
3016} // namespace testing
3017} // namespace py