Reactos
at master 610 lines 18 kB view raw
1/*** 2*mbrtowc.c - Convert multibyte char to wide char. 3* 4* Copyright (c) Microsoft Corporation. All rights reserved. 5* 6*Purpose: 7* Convert a multibyte character into the equivalent wide character. 8* 9*******************************************************************************/ 10#include <corecrt_internal_mbstring.h> 11#include <corecrt_internal_ptd_propagation.h> 12#include <corecrt_internal_securecrt.h> 13#include <limits.h> 14#include <locale.h> 15#include <stdio.h> 16#include <uchar.h> 17#include <wchar.h> 18 19using namespace __crt_mbstring; 20 21/*** 22*errno_t _mbrtowc_internal() - Helper function to convert multibyte char to wide character. 23* 24*Purpose: 25* Convert a multi-byte character into the equivalent wide character, 26* according to the specified LC_CTYPE category, or the current locale. 27* [ANSI]. 28* 29* NOTE: Currently, the C libraries support the "C" locale only. 30* Non-C locale support now available under _INTL switch. 31*Entry: 32* wchar_t *dst = pointer to (single) destination wide character 33* const char *s = pointer to multibyte character 34* size_t n = maximum length of multibyte character to consider 35* mbstate_t *pmbst = pointer to state (must be not nullptr) 36* _locale_t plocinfo = locale info 37* 38*Exit: 39* returns, in *pRetValue: 40* If s = nullptr, 0, indicating we only use state-independent 41* character encodings. 42* If s != nullptr: 0 (if *s = null char) 43* -1 (if the next n or fewer bytes not valid mbc) 44* number of bytes comprising converted mbc 45* 46*Exceptions: 47* 48*******************************************************************************/ 49 50_Success_(return != 0) 51_Post_satisfies_(*pRetValue <= _String_length_(s)) 52static errno_t __cdecl _mbrtowc_internal( 53 _Inout_ _Out_range_(<=, 1) int * pRetValue, 54 _Pre_maybenull_ _Out_writes_opt_z_(1) wchar_t * dst, 55 _In_opt_z_ const char * s, 56 _In_ size_t n, 57 _Inout_ mbstate_t * pmbst, 58 _Inout_ __crt_cached_ptd_host& ptd 59 ) throw() 60{ 61 _ASSERTE(pmbst != nullptr); 62 _ASSIGN_IF_NOT_NULL(dst, 0); 63 64 if (!s || n == 0) 65 { 66 /* indicate do not have state-dependent encodings, 67 handle zero length string */ 68 _ASSIGN_IF_NOT_NULL(pRetValue, 0); 69 return 0; 70 } 71 72 if (!*s) 73 { 74 /* handle nullptr char */ 75 _ASSIGN_IF_NOT_NULL(pRetValue, 0); 76 return 0; 77 } 78 79 const _locale_t locale = ptd.get_locale(); 80 81 if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8) 82 { 83 const size_t retval = __mbrtowc_utf8(dst, s, n, pmbst, ptd); 84 _ASSIGN_IF_NOT_NULL(pRetValue, static_cast<int>(retval)); 85 return ptd.get_errno().value_or(0); 86 } 87 88 const int locale_mb_cur_max = locale->locinfo->_public._locale_mb_cur_max; 89 _ASSERTE(locale_mb_cur_max == 1 || locale_mb_cur_max == 2); 90 91 if (locale->locinfo->locale_name[LC_CTYPE] == nullptr) 92 { 93 _ASSIGN_IF_NOT_NULL(dst, (wchar_t) (unsigned char) *s); 94 _ASSIGN_IF_NOT_NULL(pRetValue, 1); 95 return 0; 96 } 97 98 if (pmbst->_Wchar != 0) 99 { 100 /* complete two-byte multibyte character */ 101 ((char *) pmbst)[1] = *s; 102 if (locale_mb_cur_max <= 1 || 103 (__acrt_MultiByteToWideChar( 104 locale->locinfo->_public._locale_lc_codepage, 105 MB_PRECOMPOSED | MB_ERR_INVALID_CHARS, 106 (char *) pmbst, 107 2, 108 dst, 109 (dst != nullptr ? 1 : 0)) == 0)) 110 { 111 /* translation failed */ 112 pmbst->_Wchar = 0; 113 _ASSIGN_IF_NOT_NULL(dst, 0); 114 _ASSIGN_IF_NOT_NULL(pRetValue, -1); 115 return ptd.get_errno().set(EILSEQ); 116 } 117 pmbst->_Wchar = 0; 118 _ASSIGN_IF_NOT_NULL(pRetValue, locale_mb_cur_max); 119 return 0; 120 } 121 else if (_isleadbyte_fast_internal((unsigned char) *s, locale)) 122 { 123 /* multi-byte char */ 124 if (n < (size_t) locale_mb_cur_max) 125 { 126 /* save partial multibyte character */ 127 ((char *) pmbst)[0] = *s; 128 _ASSIGN_IF_NOT_NULL(pRetValue, -2); 129 return 0; 130 } 131 else if (locale_mb_cur_max <= 1 || 132 (__acrt_MultiByteToWideChar(locale->locinfo->_public._locale_lc_codepage, 133 MB_PRECOMPOSED | MB_ERR_INVALID_CHARS, 134 s, 135 static_cast<int>(__min(strlen(s), INT_MAX)), 136 dst, 137 (dst != nullptr ? 1 : 0)) == 0)) 138 { 139 /* validate high byte of mbcs char */ 140 if (!*(s + 1)) 141 { 142 pmbst->_Wchar = 0; 143 _ASSIGN_IF_NOT_NULL(dst, 0); 144 _ASSIGN_IF_NOT_NULL(pRetValue, -1); 145 return ptd.get_errno().set(EILSEQ); 146 } 147 } 148 _ASSIGN_IF_NOT_NULL(pRetValue, locale_mb_cur_max); 149 return 0; 150 } 151 else { 152 /* single byte char */ 153 if (__acrt_MultiByteToWideChar( 154 locale->locinfo->_public._locale_lc_codepage, 155 MB_PRECOMPOSED | MB_ERR_INVALID_CHARS, 156 s, 157 1, 158 dst, 159 (dst != nullptr ? 1 : 0)) == 0) 160 { 161 _ASSIGN_IF_NOT_NULL(dst, 0); 162 _ASSIGN_IF_NOT_NULL(pRetValue, -1); 163 return ptd.get_errno().set(EILSEQ); 164 } 165 166 _ASSIGN_IF_NOT_NULL(pRetValue, sizeof(char) ); 167 return 0; 168 } 169} 170 171 172/*** 173*wint_t btowc(c) - translate single byte to wide char 174* 175*Purpose: 176* 177*Entry: 178* 179*Exit: 180* 181*Exceptions: 182* 183*******************************************************************************/ 184 185extern "C" wint_t __cdecl btowc( 186 int c 187 ) 188{ 189 if (c == EOF) 190 { 191 return WEOF; 192 } 193 else 194 { 195 /* convert as one-byte string */ 196 char ch = (char) c; 197 mbstate_t mbst = {}; 198 wchar_t wc = 0; 199 int retValue = -1; 200 201 __crt_cached_ptd_host ptd; 202 _mbrtowc_internal(&retValue, &wc, &ch, 1, &mbst, ptd); 203 return (retValue < 0 ? WEOF : wc); 204 } 205} 206 207 208/*** 209*size_t mbrlen(s, n, pst) - determine next multibyte code, restartably 210* 211*Purpose: 212* 213*Entry: 214* 215*Exit: 216* 217*Exceptions: 218* 219*******************************************************************************/ 220 221extern "C" size_t __cdecl mbrlen( 222 const char *s, 223 size_t n, 224 mbstate_t *pst 225 ) 226{ 227 static mbstate_t mbst = {}; 228 int retValue = -1; 229 230 __crt_cached_ptd_host ptd; 231 _mbrtowc_internal(&retValue, nullptr, s, n, (pst != nullptr ? pst : &mbst), ptd); 232 return retValue; 233} 234 235 236/*** 237*size_t mbrtowc(pwc, s, n, pst) - translate multibyte to wchar_t, restartably 238* 239*Purpose: 240* 241*Entry: 242* 243*Exit: 244* 245*Exceptions: 246* 247*******************************************************************************/ 248 249extern "C" size_t __cdecl mbrtowc( 250 wchar_t *dst, 251 const char *s, 252 size_t n, 253 mbstate_t *pst 254 ) 255{ 256 static mbstate_t mbst = {}; 257 int retValue = -1; 258 259 __crt_cached_ptd_host ptd; 260 261 if (s != nullptr) 262 { 263 _mbrtowc_internal(&retValue, dst, s, n, (pst != nullptr ? pst : &mbst), ptd); 264 } 265 else 266 { 267 _mbrtowc_internal(&retValue, nullptr, "", 1, (pst != nullptr ? pst : &mbst), ptd); 268 } 269 return retValue; 270} 271 272 273/*** 274*size_t mbsrtowcs(wcs, ps, n, pst) - translate multibyte string to wide, 275* restartably 276* 277*Purpose: 278* 279*Entry: 280* 281*Exit: 282* 283*Exceptions: 284* 285*******************************************************************************/ 286 287/* Helper function shared by the secure and non-secure versions. */ 288 289_Success_(return == 0) 290static size_t __cdecl _mbsrtowcs_helper( 291 _Out_writes_opt_z_(n) wchar_t * wcs, 292 _Deref_pre_opt_z_ const char ** ps, 293 _In_ size_t n, 294 _Inout_ mbstate_t * pst, 295 _Inout_ __crt_cached_ptd_host& ptd 296 ) throw() 297{ 298 /* validation section */ 299 _UCRT_VALIDATE_RETURN(ptd, ps != nullptr, EINVAL, (size_t) - 1); 300 301 static mbstate_t mbst = {}; 302 const char *s = *ps; 303 int i = 0; 304 size_t nwc = 0; 305 306 // Use the static cached state if necessary 307 if (pst == nullptr) 308 { 309 pst = &mbst; 310 } 311 312 const _locale_t locale = ptd.get_locale(); 313 314 if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8) 315 { 316 return __mbsrtowcs_utf8(wcs, ps, n, pst, ptd); 317 } 318 319 if (wcs == nullptr) 320 { 321 for (;; ++nwc, s += i) 322 { 323 /* translate but don't store */ 324 wchar_t wc; 325 _mbrtowc_internal(&i, &wc, s, INT_MAX, pst, ptd); 326 if (i < 0) 327 { 328 return (size_t) - 1; 329 } 330 else if (i == 0) 331 { 332 return nwc; 333 } 334 } 335 } 336 337 for (; 0 < n; ++nwc, s += i, ++wcs, --n) 338 { 339 /* translate and store */ 340 _mbrtowc_internal(&i, wcs, s, INT_MAX, pst, ptd); 341 if (i < 0) 342 { 343 /* encountered invalid sequence */ 344 nwc = (size_t) - 1; 345 break; 346 } 347 else if (i == 0) 348 { 349 /* encountered terminating null */ 350 s = 0; 351 break; 352 } 353 } 354 355 *ps = s; 356 return nwc; 357} 358 359/*** 360*size_t mbsrtowcs() - Convert multibyte char string to wide char string. 361* 362*Purpose: 363* Convert a multi-byte char string into the equivalent wide char string, 364* according to the LC_CTYPE category of the current locale. 365* Same as mbsrtowcs_s(), but the destination may not be null terminated. 366* If there's not enough space, we return EINVAL. 367* 368*Entry: 369* wchar_t *pwcs = pointer to destination wide character string buffer 370* const char **s = pointer to source multibyte character string 371* size_t n = maximum number of wide characters to store (not including the terminating null character) 372* mbstate_t *pst = pointer to the conversion state 373* 374*Exit: 375* The nunber if wide characters written to *wcs, not including any terminating null character) 376* 377*Exceptions: 378* Input parameters are validated. Refer to the validation section of the function. 379* 380*******************************************************************************/ 381extern "C" size_t __cdecl mbsrtowcs( 382 wchar_t * wcs, 383 const char ** ps, 384 size_t n, 385 mbstate_t * pst 386 ) 387{ 388 /* Call a non-deprecated helper to do the work. */ 389 __crt_cached_ptd_host ptd; 390 return _mbsrtowcs_helper(wcs, ps, n, pst, ptd); 391} 392 393 394/*** 395*errno_t mbsrtowcs_s() - Convert multibyte char string to wide char string. 396* 397*Purpose: 398* Convert a multi-byte char string into the equivalent wide char string, 399* according to the LC_CTYPE category of the current locale. 400* Same as mbsrtowcs(), but the destination is ensured to be null terminated. 401* If there's not enough space, we return EINVAL. 402* 403*Entry: 404* size_t *pRetValue = Number of bytes modified including the terminating nullptr 405* This pointer can be nullptr. 406* wchar_t *pwcs = pointer to destination wide character string buffer 407* size_t sizeInWords = size of the destination buffer 408* const char **s = pointer to source multibyte character string 409* size_t n = maximum number of wide characters to store (not including the terminating null character) 410* mbstate_t *pst = pointer to the conversion state 411* 412*Exit: 413* The error code. 414* 415*Exceptions: 416* Input parameters are validated. Refer to the validation section of the function. 417* 418*******************************************************************************/ 419 420static errno_t __cdecl mbsrtowcs_s_internal( 421 size_t * pRetValue, 422 wchar_t * dst, 423 size_t sizeInWords, 424 const char ** ps, 425 size_t n, 426 mbstate_t * pmbst, 427 __crt_cached_ptd_host& ptd 428 ) 429{ 430 size_t retsize; 431 432 /* validation section */ 433 _ASSIGN_IF_NOT_NULL(pRetValue, (size_t) - 1); 434 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, (dst == nullptr && sizeInWords == 0) || (dst != nullptr && sizeInWords > 0), EINVAL); 435 if (dst != nullptr) 436 { 437 _RESET_STRING(dst, sizeInWords); 438 } 439 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, ps != nullptr, EINVAL); 440 441 /* Call a non-deprecated helper to do the work. */ 442 443 retsize = _mbsrtowcs_helper(dst, ps, (n > sizeInWords ? sizeInWords : n), pmbst, ptd); 444 445 if (retsize == (size_t) - 1) 446 { 447 if (dst != nullptr) 448 { 449 _RESET_STRING(dst, sizeInWords); 450 } 451 return ptd.get_errno().value_or(0); 452 } 453 454 /* count the null terminator */ 455 retsize++; 456 457 if (dst != nullptr) 458 { 459 /* return error if the string does not fit */ 460 if (retsize > sizeInWords) 461 { 462 _RESET_STRING(dst, sizeInWords); 463 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, sizeInWords <= retsize, ERANGE); 464 } 465 else 466 { 467 /* ensure the string is null terminated */ 468 dst[retsize - 1] = '\0'; 469 } 470 } 471 472 _ASSIGN_IF_NOT_NULL(pRetValue, retsize); 473 474 return 0; 475} 476 477extern "C" errno_t __cdecl mbsrtowcs_s( 478 size_t * pRetValue, 479 wchar_t * dst, 480 size_t sizeInWords, 481 const char ** ps, 482 size_t n, 483 mbstate_t * pmbst 484 ) 485{ 486 __crt_cached_ptd_host ptd; 487 return mbsrtowcs_s_internal(pRetValue, dst, sizeInWords, ps, n, pmbst, ptd); 488} 489 490size_t __cdecl __crt_mbstring::__mbrtowc_utf8(wchar_t* pwc, const char* s, size_t n, mbstate_t* ps, __crt_cached_ptd_host& ptd) 491{ 492 static_assert(sizeof(wchar_t) == 2, "wchar_t is assumed to be 16 bits"); 493 char32_t c32; 494 const size_t retval = __mbrtoc32_utf8(&c32, s, n, ps, ptd); 495 // If we succesfully consumed a character, write the result after a quick range check 496 if (retval <= 4) 497 { 498 if (c32 > 0xffff) 499 { 500 // A 4-byte UTF-8 character won't fit into a single UTF-16 wchar 501 // So return the "replacement char" 502 c32 = 0xfffd; 503 } 504 _ASSIGN_IF_NOT_NULL(pwc, static_cast<wchar_t>(c32)); 505 } 506 return retval; 507} 508 509size_t __cdecl __crt_mbstring::__mbsrtowcs_utf8(wchar_t* dst, const char** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd) 510{ 511 const char* current_src = *src; 512 513 auto compute_available = [](const char* s) -> size_t 514 { 515 // We shouldn't just blindly request to read 4 bytes, because there might not be 4 bytes left to read. 516 if (s[0] == '\0') 517 { 518 return 1; 519 } 520 else if (s[1] == '\0') 521 { 522 return 2; 523 } 524 else if (s[2] == '\0') 525 { 526 return 3; 527 } 528 return 4; 529 }; 530 531 if (dst != nullptr) 532 { 533 wchar_t* current_dest = dst; 534 for (; len > 0; --len) 535 { 536 const size_t avail = compute_available(current_src); 537 char32_t c32; 538 const size_t retval = __mbrtoc32_utf8(&c32, current_src, avail, ps, ptd); 539 if (retval == __crt_mbstring::INVALID) 540 { 541 // Set src to the beginning of the invalid char 542 *src = current_src; 543 ptd.get_errno().set(EILSEQ); 544 return retval; 545 } 546 else if (retval == 0) 547 { 548 current_src = nullptr; 549 *current_dest = L'\0'; 550 break; 551 } 552 else if (c32 > 0xffff) 553 { 554 // This is going to take two output wchars. Make sure we have enough room for this output. 555 if (len > 1) 556 { 557 --len; 558 c32 -= 0x10000; 559 const char16_t high_surrogate = static_cast<char16_t>((c32 >> 10) | 0xd800); 560 const char16_t low_surrogate = static_cast<char16_t>((c32 & 0x03ff) | 0xdc00); 561 *current_dest++ = high_surrogate; 562 *current_dest++ = low_surrogate; 563 } 564 else 565 { 566 break; 567 } 568 } 569 else 570 { 571 *current_dest++ = static_cast<wchar_t>(c32); 572 } 573 current_src += retval; 574 } 575 *src = current_src; 576 return current_dest - dst; 577 } 578 else 579 { 580 size_t total_count = 0; 581 for (;; ++total_count) 582 { 583 const size_t avail = compute_available(current_src); 584 585 const size_t retval = __mbrtoc32_utf8(nullptr, current_src, avail, ps, ptd); 586 if (retval == __crt_mbstring::INVALID) 587 { 588 ptd.get_errno().set(EILSEQ); 589 return retval; 590 } 591 else if (retval == 0) 592 { 593 break; 594 } 595 else if (retval == 4) 596 { 597 // SMP characters take two UTF-16 wide chars 598 ++total_count; 599 } 600 else 601 { 602 // This should be impossible. Means we encountered a multibyte char 603 // that extended past the null terminator, or is more than 4 bytes long 604 _ASSERTE(retval != __crt_mbstring::INCOMPLETE); 605 } 606 current_src += retval; 607 } 608 return total_count; 609 } 610}