Reactos
1/***
2*wcrtomb.cpp - Convert wide character to multibyte character, with locale.
3*
4* Copyright (c) Microsoft Corporation. All rights reserved.
5*
6*Purpose:
7* Convert a wide character into the equivalent multibyte character.
8*
9*******************************************************************************/
10#include <corecrt_internal_mbstring.h>
11#include <corecrt_internal_ptd_propagation.h>
12#include <corecrt_internal_securecrt.h>
13#include <limits.h>
14#include <locale.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <wchar.h>
19
20using namespace __crt_mbstring;
21
22/***
23*errno_t _wcrtomb_internal() - Helper function to convert wide character to multibyte character.
24*
25*Purpose:
26* Convert a wide character into the equivalent multi-byte character,
27* according to the specified LC_CTYPE category, or the current locale.
28* [ANSI].
29*
30* NOTE: Currently, the C libraries support the "C" locale only.
31* Non-C locale support now available under _INTL switch.
32*Entry:
33* int *return_value = the number of chars written (-1 in error case)
34* char *destination = pointer to multibyte character
35* size_t destination_count = size of the destinarion buffer
36* wchar_t wchar = source wide character
37* mbstate_t *state = pointer to state (not used)
38* _locale_t locale = locale info
39*
40*Exit:
41* Returns:
42* Value of errno if errors, 0 otherwise. *return_value is set to -1 in error case.
43*
44*Exceptions:
45*
46*******************************************************************************/
47
48_Success_(return == 0)
49static errno_t __cdecl _wcrtomb_internal(
50 int* const return_value,
51 __out_bcount_z_opt(destination_count) char* const destination,
52 size_t const destination_count,
53 wchar_t const wchar,
54 mbstate_t* const state,
55 _Inout_ __crt_cached_ptd_host& ptd
56 )
57{
58 _ASSERTE(destination != nullptr && destination_count > 0);
59
60 _locale_t const locale = ptd.get_locale();
61
62 _ASSERTE(
63 locale->locinfo->_public._locale_mb_cur_max == 1 ||
64 locale->locinfo->_public._locale_mb_cur_max == 2 ||
65 locale->locinfo->_public._locale_lc_codepage == CP_UTF8);
66
67 if (state)
68 {
69 state->_Wchar = 0;
70 }
71
72 if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
73 {
74 // Unlike c16rtomb. wctomb/wcrtomb have no ability to process a partial code point.
75 // So, we could call c16rtomb and check for a lone surrogate or other error, or for simplicity
76 // We can instead just call c32rtomb and check for any error. I choose the latter.
77 static mbstate_t local_state{};
78 int result = static_cast<int>(__crt_mbstring::__c32rtomb_utf8(destination, static_cast<char32_t>(wchar), (state != nullptr ? state : &local_state), ptd));
79 if (return_value != nullptr)
80 {
81 *return_value = result;
82 }
83 if (result <= 4)
84 {
85 return 0;
86 }
87 else
88 {
89 return ptd.get_errno().value_or(0);
90 }
91 }
92
93 if (!locale->locinfo->locale_name[LC_CTYPE])
94 {
95 if (wchar > 255) // Validate high byte
96 {
97 if (return_value)
98 *return_value = -1;
99
100 return ptd.get_errno().set(EILSEQ);
101 }
102
103 *destination = static_cast<char>(wchar);
104 if (return_value)
105 {
106 *return_value = 1;
107 }
108
109 return 0;
110 }
111
112 BOOL default_used{};
113 int const size = __acrt_WideCharToMultiByte(
114 locale->locinfo->_public._locale_lc_codepage,
115 0,
116 &wchar,
117 1,
118 destination,
119 static_cast<int>(destination_count),
120 nullptr,
121 &default_used);
122
123 if (size == 0 || default_used)
124 {
125 if (return_value)
126 {
127 *return_value = -1;
128 }
129
130 return ptd.get_errno().set(EILSEQ);
131 }
132
133 if (return_value)
134 {
135 *return_value = size;
136 }
137
138 return 0;
139}
140
141/***
142*errno_t wcrtomb_s(retValue, destination, destination_count, wchar, state) - translate wchar_t to multibyte, restartably
143*
144*Purpose:
145*
146*Entry:
147*
148*Exit:
149*
150*Exceptions:
151*
152*******************************************************************************/
153
154static errno_t __cdecl wcrtomb_s_internal(
155 size_t* const return_value,
156 char* const destination,
157 size_t const destination_count,
158 wchar_t const wchar,
159 mbstate_t* const state,
160 __crt_cached_ptd_host& ptd
161 )
162{
163 // Note that we do not force destination_count > 0 in the destination !=
164 // nullptr case because we do not need to add a null terminator, due to
165 // the fact that the destination will receive a character and not a string.
166 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, (destination == nullptr && destination_count == 0) || (destination != nullptr), EINVAL);
167
168 errno_t e = 0;
169 int int_return_value = -1;
170 if (destination == nullptr)
171 {
172 char buf[MB_LEN_MAX];
173 e = _wcrtomb_internal(&int_return_value, buf, MB_LEN_MAX, wchar, state, ptd);
174 }
175 else
176 {
177 e = _wcrtomb_internal(&int_return_value, destination, destination_count, wchar, state, ptd);
178 }
179
180 if (return_value != nullptr)
181 {
182 *return_value = static_cast<size_t>(int_return_value);
183 }
184
185 return e;
186}
187
188extern "C" errno_t __cdecl wcrtomb_s(
189 size_t* const return_value,
190 char* const destination,
191 size_t const destination_count,
192 wchar_t const wchar,
193 mbstate_t* const state
194 )
195{
196 __crt_cached_ptd_host ptd;
197 return wcrtomb_s_internal(return_value, destination, destination_count, wchar, state, ptd);
198}
199
200extern "C" size_t __cdecl wcrtomb(
201 char* const destination,
202 wchar_t const wchar,
203 mbstate_t* const state
204 )
205{
206 size_t return_value = static_cast<size_t>(-1);
207 wcrtomb_s(&return_value, destination, (destination == nullptr ? 0 : MB_LEN_MAX), wchar, state);
208 return return_value;
209}
210
211/***
212*errno_t wcsrtombs_s(retValue, destination, destination_count, pwcs, n, state) - translate wide char string to multibyte
213* string
214*
215*Purpose:
216*
217*Entry:
218*
219*Exit:
220*
221*Exceptions:
222*
223*******************************************************************************/
224
225/* Helper shared by secure and non-secure functions. */
226
227static size_t __cdecl _wcsrtombs_internal(
228 _Pre_maybenull_ _Post_z_ char* destination,
229 _Inout_ _Deref_prepost_z_ wchar_t const** const source,
230 _In_ size_t n,
231 _Out_opt_ mbstate_t* const state,
232 _Inout_ __crt_cached_ptd_host& ptd
233 ) throw()
234{
235 /* validation section */
236 _UCRT_VALIDATE_RETURN(ptd, source != nullptr, EINVAL, (size_t)-1);
237
238 _locale_t const locale = ptd.get_locale();
239
240 if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
241 {
242 return __wcsrtombs_utf8(destination, source, n, state, ptd);
243 }
244
245 char buf[MB_LEN_MAX];
246 int i = 0;
247 size_t nc = 0;
248 wchar_t const* wcs = *source;
249
250 if (!destination)
251 {
252 for (; ; nc += i, ++wcs)
253 {
254 /* translate but don't store */
255 _wcrtomb_internal(&i, buf, MB_LEN_MAX, *wcs, state, ptd);
256 if (i <= 0)
257 {
258 return static_cast<size_t>(-1);
259 }
260 else if (buf[i - 1] == '\0')
261 {
262 return nc + i - 1;
263 }
264 }
265 }
266
267 for (; 0 < n; nc += i, ++wcs, destination += i, n -= i)
268 {
269 /* translate and store */
270 char *t = nullptr;
271
272 if (n < (size_t)locale->locinfo->_public._locale_mb_cur_max)
273 {
274 t = buf;
275 }
276 else
277 {
278 t = destination;
279 }
280
281 _wcrtomb_internal(&i, t, MB_LEN_MAX, *wcs, state, ptd);
282 if (i <= 0)
283 {
284 /* encountered invalid sequence */
285 nc = (size_t)-1;
286 break;
287 }
288
289 if (destination == t)
290 {
291 /* do nothing */
292 }
293 else if (n < static_cast<size_t>(i))
294 {
295 break; // Won't all fit
296 }
297 else
298 {
299 memcpy_s(destination, n, buf, i);
300 }
301
302 if (destination[i - 1] == '\0')
303 {
304 // Encountered terminating null
305 *source = 0;
306 return nc + i - 1;
307 }
308 }
309
310 *source = wcs;
311 return nc;
312}
313
314extern "C" size_t __cdecl wcsrtombs(
315 char* const destination,
316 wchar_t const** const source,
317 size_t const n,
318 mbstate_t* const state
319 )
320{
321 __crt_cached_ptd_host ptd;
322 return _wcsrtombs_internal(destination, source, n, state, ptd);
323}
324
325/***
326*errno_t wcstombs_s() - Convert wide char string to multibyte char string.
327*
328*Purpose:
329* Convert a wide char string into the equivalent multibyte char string,
330* according to the LC_CTYPE category of the current locale.
331*
332* The destination string is always null terminated.
333*
334*Entry:
335* size_t *return_value = Number of bytes modified including the terminating nullptr
336* This pointer can be nullptr.
337* char *destination = pointer to destination multibyte char string
338* size_t destination_count = size of the destination buffer
339* const wchar_t *source = pointer to source wide character string
340* size_t n = maximum number of bytes to store in s (not including the terminating nullptr)
341* mbstate_t *state = pointer to state
342*
343*Exit:
344* The error code.
345*
346*Exceptions:
347* Input parameters are validated. Refer to the validation section of the function.
348*
349*******************************************************************************/
350
351extern "C" errno_t __cdecl wcsrtombs_s(
352 size_t* const return_value,
353 char* const destination,
354 size_t const destination_count,
355 wchar_t const** const source,
356 size_t const n,
357 mbstate_t* const state
358 )
359{
360 __crt_cached_ptd_host ptd;
361
362 if (return_value != nullptr)
363 {
364 *return_value = static_cast<size_t>(-1);
365 }
366
367 _UCRT_VALIDATE_RETURN_ERRCODE(
368 ptd,
369 (destination == nullptr && destination_count == 0) ||
370 (destination != nullptr && destination_count > 0),
371 EINVAL);
372
373 if (destination != nullptr)
374 {
375 _RESET_STRING(destination, destination_count);
376 }
377
378 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, source != nullptr, EINVAL);
379
380 size_t retsize = _wcsrtombs_internal(destination, source, (n > destination_count ? destination_count : n), state, ptd);
381 if (retsize == static_cast<size_t>(-1))
382 {
383 if (destination != nullptr)
384 {
385 _RESET_STRING(destination, destination_count);
386 }
387
388 return ptd.get_errno().value_or(0);
389 }
390
391 ++retsize; // Account for the null terminator
392
393 if (destination != nullptr)
394 {
395 // Return error if the string does not fit:
396 if (retsize > destination_count)
397 {
398 _RESET_STRING(destination, destination_count);
399 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, retsize <= destination_count, ERANGE);
400 }
401
402 // Ensure the string is null terminated:
403 destination[retsize - 1] = '\0';
404 }
405
406 if (return_value != nullptr)
407 {
408 *return_value = retsize;
409 }
410
411 return 0;
412}
413
414
415
416// Converts a wide character into a one-byte character
417extern "C" int __cdecl wctob(wint_t const wchar)
418{
419 __crt_cached_ptd_host ptd;
420
421 if (wchar == WEOF)
422 {
423 return EOF;
424 }
425
426 int return_value = -1;
427 char local_buffer[MB_LEN_MAX];
428
429 mbstate_t state{};
430 errno_t const e = _wcrtomb_internal(&return_value, local_buffer, MB_LEN_MAX, wchar, &state, ptd);
431 if (e == 0 && return_value == 1)
432 {
433 return local_buffer[0];
434 }
435
436 return EOF;
437}
438
439size_t __cdecl __crt_mbstring::__wcsrtombs_utf8(char* dst, const wchar_t** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd)
440{
441 const wchar_t* current_src = *src;
442 char buf[MB_LEN_MAX];
443
444 if (dst != nullptr)
445 {
446 char* current_dest = dst;
447
448 // Wide chars are actually UTF-16, so a code point might take 2 input units (a surrogate pair)
449 // In case of a failure, keep track of where the current code point began, which might be the previous
450 // wchar for a surrogate pair
451 const wchar_t* start_of_code_point = current_src;
452 for (;;)
453 {
454 // If we don't have at least 4 MB_CUR_LEN bytes available in the buffer
455 // the next char isn't guaranteed to fit, so put it into a temp buffer
456 char* temp;
457 if (len < 4)
458 {
459 temp = buf;
460 }
461 else
462 {
463 temp = current_dest;
464 }
465 const size_t retval = __c16rtomb_utf8(temp, *current_src, ps, ptd);
466
467 if (retval == __crt_mbstring::INVALID)
468 {
469 // Set src to the beginning of the invalid char
470 // If this was the second half of a surrogate pair, return the beginning of the surrogate pair
471 *src = start_of_code_point;
472 return retval;
473 }
474
475 if (temp == current_dest)
476 {
477 // We wrote in-place. Nothing to do.
478 }
479 else if (len < retval)
480 {
481 // Won't fit, so bail out
482 // If this was the second half of a surrogate pair, make sure we return that location
483 current_src = start_of_code_point;
484 break;
485 }
486 else
487 {
488 // Will fit in remaining buffer, so let's copy it over
489 memcpy(current_dest, temp, retval);
490 }
491
492 if (retval > 0 && current_dest[retval - 1] == '\0')
493 {
494 // Reached null terminator, so break out, but don't count that last terminating byte
495 current_src = nullptr;
496 current_dest += retval - 1;
497 break;
498 }
499
500 ++current_src;
501 if (retval > 0)
502 {
503 start_of_code_point = current_src;
504 }
505
506 len -= retval;
507 current_dest += retval;
508 }
509 *src = current_src;
510 return current_dest - dst;
511 }
512 else
513 {
514 size_t total_count = 0;
515 for (;;)
516 {
517 const size_t retval = __c16rtomb_utf8(buf, *current_src, ps, ptd);
518 if (retval == __crt_mbstring::INVALID)
519 {
520 return retval;
521 }
522 else if (retval > 0 && buf[retval - 1] == '\0')
523 {
524 // Hit null terminator. Don't count it in the return value.
525 total_count += retval - 1;
526 break;
527 }
528 total_count += retval;
529 ++current_src;
530 }
531 return total_count;
532 }
533}