Reactos
at listview 93 lines 2.3 kB view raw
1// 2// c32rtomb.cpp 3// 4// Copyright (c) Microsoft Corporation. All rights reserved. 5// 6 7#include <corecrt_internal_mbstring.h> 8#include <corecrt_internal_ptd_propagation.h> 9#include <stdint.h> 10#include <uchar.h> 11 12using namespace __crt_mbstring; 13 14extern "C" size_t __cdecl c32rtomb(char* s, char32_t c32, mbstate_t* ps) 15{ 16 // TODO: Bug 13307590 says this is always assuming UTF-8. 17 __crt_cached_ptd_host ptd; 18 return __c32rtomb_utf8(s, c32, ps, ptd); 19} 20 21size_t __cdecl __crt_mbstring::__c32rtomb_utf8(char* s, char32_t c32, mbstate_t* ps, __crt_cached_ptd_host& ptd) 22{ 23 if (!s) 24 { 25 // Equivalent to c32rtomb(buf, U'\0', ps) for some internal buffer buf 26 *ps = {}; 27 return 1; 28 } 29 30 if (c32 == U'\0') 31 { 32 *s = '\0'; 33 *ps = {}; 34 return 1; 35 } 36 37 // Fast path for ASCII 38 if ((c32 & ~0x7f) == 0) 39 { 40 *s = static_cast<char>(c32); 41 return 1; 42 } 43 44 // Figure out how many trail bytes we need 45 size_t trail_bytes; 46 uint8_t lead_byte; 47 if ((c32 & ~0x7ff) == 0) 48 { 49 trail_bytes = 1; 50 lead_byte = 0xc0; 51 } 52 else if ((c32 & ~0xffff) == 0) 53 { 54 // high/low surrogates are only valid in UTF-16 encoded data 55 if (0xd800 <= c32 && c32 <= 0xdfff) 56 { 57 return return_illegal_sequence(ps, ptd); 58 } 59 trail_bytes = 2; 60 lead_byte = 0xe0; 61 } 62 else if ((c32 & ~0x001fffff) == 0) 63 { 64 // Unicode's max code point is 0x10ffff 65 if (0x10ffff < c32) 66 { 67 return return_illegal_sequence(ps, ptd); 68 } 69 trail_bytes = 3; 70 lead_byte = 0xf0; 71 } 72 else 73 { 74 return return_illegal_sequence(ps, ptd); 75 } 76 _ASSERTE(1 <= trail_bytes && trail_bytes <= 3); 77 78 // Put six bits into each of the trail bytes 79 // Lowest bits are in the last UTF-8 byte. 80 // Filling back to front. 81 for (size_t i = trail_bytes; i > 0; --i) 82 { 83 s[i] = (c32 & 0x3f) | 0x80; 84 c32 >>= 6; 85 } 86 87 // The first byte needs the upper (trail_bytes + 1) bits to store the length 88 // And the lower (7 - trail_bytes) to store the upper bits of the code point 89 _ASSERTE(c32 < (1u << (7 - trail_bytes))); 90 s[0] = static_cast<uint8_t>(c32) | lead_byte; 91 92 return reset_and_return(trail_bytes + 1, ps); 93}