Serenity Operating System
1/*
2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3 * Copyright (c) 2022, Tim Schumacher <timschumi@serenityos.org>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#include <AK/Assertions.h>
9#include <AK/Format.h>
10#include <AK/ScopeGuard.h>
11#include <AK/UnicodeUtils.h>
12#include <errno.h>
13#include <string.h>
14#include <time.h>
15#include <wchar.h>
16
17static unsigned int mbstate_expected_bytes(mbstate_t* state)
18{
19 if (state->stored_bytes == 0) {
20 return 0;
21 }
22
23 unsigned char first = state->bytes[0];
24
25 // Single-byte sequences have their first bit unset
26 if ((first & 0b10000000) == 0) {
27 return 1;
28 }
29
30 // Two-byte sequences start with 0b110xxxxx
31 if ((first & 0b11100000) == 0b11000000) {
32 return 2;
33 }
34
35 // Three-byte sequences start with 0b1110xxxx
36 if ((first & 0b11110000) == 0b11100000) {
37 return 3;
38 }
39
40 // Four-byte sequences start with 0b11110xxx
41 if ((first & 0b11111000) == 0b11110000) {
42 return 4;
43 }
44
45 // Everything else is invalid
46 return 0;
47}
48
49extern "C" {
50
51// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcslen.html
52size_t wcslen(wchar_t const* str)
53{
54 size_t len = 0;
55 while (*(str++))
56 ++len;
57 return len;
58}
59
60// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcscpy.html
61wchar_t* wcscpy(wchar_t* dest, wchar_t const* src)
62{
63 wchar_t* original_dest = dest;
64 while ((*dest++ = *src++) != '\0')
65 ;
66 return original_dest;
67}
68
69// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsdup.html
70wchar_t* wcsdup(wchar_t const* str)
71{
72 size_t length = wcslen(str);
73 wchar_t* new_str = (wchar_t*)malloc(sizeof(wchar_t) * (length + 1));
74
75 if (!new_str) {
76 errno = ENOMEM;
77 return nullptr;
78 }
79
80 return wcscpy(new_str, str);
81}
82
83// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsncpy.html
84wchar_t* wcsncpy(wchar_t* dest, wchar_t const* src, size_t num)
85{
86 wchar_t* original_dest = dest;
87 while (((*dest++ = *src++) != '\0') && ((size_t)(dest - original_dest) < num))
88 ;
89 return original_dest;
90}
91
92size_t wcslcpy(wchar_t* dest, wchar_t const* src, size_t n)
93{
94 size_t i;
95 for (i = 0; i + 1 < n && src[i] != L'\0'; ++i)
96 dest[i] = src[i];
97 if (n)
98 dest[i] = L'\0';
99 for (; src[i] != L'\0'; ++i)
100 ; // Determine the length of src, don't copy.
101 return i;
102}
103
104// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcscmp.html
105int wcscmp(wchar_t const* s1, wchar_t const* s2)
106{
107 while (*s1 == *s2++)
108 if (*s1++ == 0)
109 return 0;
110 return *(wchar_t const*)s1 - *(wchar_t const*)--s2;
111}
112
113// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsncmp.html
114int wcsncmp(wchar_t const* s1, wchar_t const* s2, size_t n)
115{
116 if (!n)
117 return 0;
118 do {
119 if (*s1 != *s2++)
120 return *(wchar_t const*)s1 - *(wchar_t const*)--s2;
121 if (*s1++ == 0)
122 break;
123 } while (--n);
124 return 0;
125}
126
127// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcschr.html
128wchar_t* wcschr(wchar_t const* str, int c)
129{
130 wchar_t ch = c;
131 for (;; ++str) {
132 if (*str == ch)
133 return const_cast<wchar_t*>(str);
134 if (!*str)
135 return nullptr;
136 }
137}
138
139// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsrchr.html
140wchar_t* wcsrchr(wchar_t const* str, wchar_t wc)
141{
142 wchar_t* last = nullptr;
143 wchar_t c;
144 for (; (c = *str); ++str) {
145 if (c == wc)
146 last = const_cast<wchar_t*>(str);
147 }
148 return last;
149}
150
151// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcscat.html
152wchar_t* wcscat(wchar_t* dest, wchar_t const* src)
153{
154 size_t dest_length = wcslen(dest);
155 size_t i;
156 for (i = 0; src[i] != '\0'; i++)
157 dest[dest_length + i] = src[i];
158 dest[dest_length + i] = '\0';
159 return dest;
160}
161
162// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsncat.html
163wchar_t* wcsncat(wchar_t* dest, wchar_t const* src, size_t n)
164{
165 size_t dest_length = wcslen(dest);
166 size_t i;
167 for (i = 0; i < n && src[i] != '\0'; i++)
168 dest[dest_length + i] = src[i];
169 dest[dest_length + i] = '\0';
170 return dest;
171}
172
173// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstok.html
174wchar_t* wcstok(wchar_t* str, wchar_t const* delim, wchar_t** ptr)
175{
176 wchar_t* used_str = str;
177 if (!used_str) {
178 used_str = *ptr;
179 }
180
181 size_t token_start = 0;
182 size_t token_end = 0;
183 size_t str_len = wcslen(used_str);
184 size_t delim_len = wcslen(delim);
185
186 for (size_t i = 0; i < str_len; ++i) {
187 bool is_proper_delim = false;
188
189 for (size_t j = 0; j < delim_len; ++j) {
190 if (used_str[i] == delim[j]) {
191 // Skip beginning delimiters
192 if (token_end - token_start == 0) {
193 ++token_start;
194 break;
195 }
196
197 is_proper_delim = true;
198 }
199 }
200
201 ++token_end;
202 if (is_proper_delim && token_end > 0) {
203 --token_end;
204 break;
205 }
206 }
207
208 if (used_str[token_start] == '\0')
209 return nullptr;
210
211 if (token_end == 0) {
212 return &used_str[token_start];
213 }
214
215 used_str[token_end] = '\0';
216 return &used_str[token_start];
217}
218
219// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstol.html
220long wcstol(wchar_t const*, wchar_t**, int)
221{
222 dbgln("FIXME: Implement wcstol()");
223 TODO();
224}
225
226// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstoll.html
227long long wcstoll(wchar_t const*, wchar_t**, int)
228{
229 dbgln("FIXME: Implement wcstoll()");
230 TODO();
231}
232
233// https://pubs.opengroup.org/onlinepubs/9699919799/functions/btowc.html
234wint_t btowc(int c)
235{
236 if (c == EOF) {
237 return WEOF;
238 }
239
240 // Multi-byte sequences in UTF-8 have their highest bit set
241 if (c & (1 << 7)) {
242 return WEOF;
243 }
244
245 return c;
246}
247
248// https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbrtowc.html
249size_t mbrtowc(wchar_t* pwc, char const* s, size_t n, mbstate_t* state)
250{
251 static mbstate_t _anonymous_state = {};
252
253 if (state == nullptr) {
254 state = &_anonymous_state;
255 }
256
257 // s being a null pointer is a shorthand for reading a single null byte.
258 if (s == nullptr) {
259 pwc = nullptr;
260 s = "";
261 n = 1;
262 }
263
264 // Stop early if we can't read anything
265 if (n == 0) {
266 return 0;
267 }
268
269 size_t consumed_bytes = 0;
270
271 // Fill the first byte if we haven't done that yet
272 if (state->stored_bytes == 0) {
273 state->bytes[state->stored_bytes++] = s[0];
274 consumed_bytes++;
275 }
276
277 size_t expected_bytes = mbstate_expected_bytes(state);
278
279 // Check if the first byte is invalid
280 if (expected_bytes == 0) {
281 *state = {};
282 errno = EILSEQ;
283 return -1;
284 }
285
286 while (state->stored_bytes < expected_bytes) {
287 if (consumed_bytes == n) {
288 // No complete multibyte character
289 return -2;
290 }
291
292 unsigned char c = s[consumed_bytes];
293
294 // Continuation bytes have to start with 0b10xxxxxx
295 if ((c & 0b11000000) != 0b10000000) {
296 // Invalid multibyte character
297 *state = {};
298 errno = EILSEQ;
299 return -1;
300 }
301
302 state->bytes[state->stored_bytes++] = c;
303 consumed_bytes++;
304 }
305
306 wchar_t codepoint = state->bytes[0];
307
308 // Mask out the "length" bits if necessary
309 if (expected_bytes > 1) {
310 codepoint &= (1 << (7 - expected_bytes)) - 1;
311 }
312
313 for (unsigned int i = 1; i < expected_bytes; i++) {
314 // Each continuation byte contains 6 bits of data
315 codepoint = codepoint << 6;
316 codepoint |= state->bytes[i] & 0b111111;
317 }
318
319 if (pwc) {
320 *pwc = codepoint;
321 }
322
323 // We want to read the next multibyte character, but keep all other properties.
324 state->stored_bytes = 0;
325
326 if (codepoint == 0) {
327 *state = {};
328 return 0;
329 }
330
331 return consumed_bytes;
332}
333
334// https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbrlen.html
335size_t mbrlen(char const* s, size_t n, mbstate_t* ps)
336{
337 static mbstate_t anonymous_state = {};
338
339 if (ps == nullptr)
340 ps = &anonymous_state;
341
342 return mbrtowc(nullptr, s, n, ps);
343}
344
345// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcrtomb.html
346size_t wcrtomb(char* s, wchar_t wc, mbstate_t*)
347{
348 if (s == nullptr)
349 wc = L'\0';
350
351 auto nwritten = AK::UnicodeUtils::code_point_to_utf8(wc, [&s](char byte) {
352 if (s != nullptr)
353 *s++ = byte;
354 });
355
356 if (nwritten < 0) {
357 errno = EILSEQ;
358 return (size_t)-1;
359 } else {
360 return nwritten;
361 }
362}
363
364// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcscoll.html
365int wcscoll(wchar_t const* ws1, wchar_t const* ws2)
366{
367 // TODO: Actually implement a sensible sort order for this,
368 // because right now we are doing what LC_COLLATE=C would do.
369 return wcscmp(ws1, ws2);
370}
371
372// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsxfrm.html
373size_t wcsxfrm(wchar_t* dest, wchar_t const* src, size_t n)
374{
375 // TODO: This needs to be changed when wcscoll is not just doing wcscmp
376 return wcslcpy(dest, src, n);
377}
378
379// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wctob.html
380int wctob(wint_t c)
381{
382 if (c > 0x7f)
383 return EOF;
384
385 return static_cast<unsigned char>(c);
386}
387
388// https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbsinit.html
389int mbsinit(mbstate_t const* state)
390{
391 if (!state) {
392 return 1;
393 }
394
395 if (state->stored_bytes != 0) {
396 return 0;
397 }
398
399 return 1;
400}
401
402// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcspbrk.html
403wchar_t* wcspbrk(wchar_t const* wcs, wchar_t const* accept)
404{
405 for (wchar_t const* cur = accept; *cur; cur++) {
406 wchar_t* res = wcschr(wcs, *cur);
407 if (res)
408 return res;
409 }
410
411 return nullptr;
412}
413
414// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsstr.html
415wchar_t* wcsstr(wchar_t const* haystack, wchar_t const* needle)
416{
417 size_t nlen = wcslen(needle);
418
419 if (nlen == 0)
420 return const_cast<wchar_t*>(haystack);
421
422 size_t hlen = wcslen(haystack);
423
424 while (hlen >= nlen) {
425 if (wcsncmp(haystack, needle, nlen) == 0)
426 return const_cast<wchar_t*>(haystack);
427
428 haystack++;
429 hlen--;
430 }
431
432 return nullptr;
433}
434
435// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wmemchr.html
436wchar_t* wmemchr(wchar_t const* s, wchar_t c, size_t n)
437{
438 for (size_t i = 0; i < n; i++) {
439 if (s[i] == c)
440 return const_cast<wchar_t*>(&s[i]);
441 }
442
443 return nullptr;
444}
445
446// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wmemcpy.html
447wchar_t* wmemcpy(wchar_t* dest, wchar_t const* src, size_t n)
448{
449 for (size_t i = 0; i < n; i++)
450 dest[i] = src[i];
451
452 return dest;
453}
454
455// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wmemset.html
456wchar_t* wmemset(wchar_t* wcs, wchar_t wc, size_t n)
457{
458 for (size_t i = 0; i < n; i++) {
459 wcs[i] = wc;
460 }
461
462 return wcs;
463}
464
465// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wmemmove.html
466wchar_t* wmemmove(wchar_t* dest, wchar_t const* src, size_t n)
467{
468 if (dest > src) {
469 for (size_t i = 1; i <= n; i++) {
470 dest[n - i] = src[n - i];
471 }
472 } else if (dest < src) {
473 for (size_t i = 0; i < n; i++) {
474 dest[i] = src[i];
475 }
476 }
477
478 return dest;
479}
480
481// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstoul.html
482unsigned long wcstoul(wchar_t const*, wchar_t**, int)
483{
484 dbgln("TODO: Implement wcstoul()");
485 TODO();
486}
487
488// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstoull.html
489unsigned long long wcstoull(wchar_t const*, wchar_t**, int)
490{
491 dbgln("TODO: Implement wcstoull()");
492 TODO();
493}
494
495// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstof.html
496float wcstof(wchar_t const*, wchar_t**)
497{
498 dbgln("TODO: Implement wcstof()");
499 TODO();
500}
501
502// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstod.html
503double wcstod(wchar_t const*, wchar_t**)
504{
505 dbgln("TODO: Implement wcstod()");
506 TODO();
507}
508
509// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstold.html
510long double wcstold(wchar_t const*, wchar_t**)
511{
512 dbgln("TODO: Implement wcstold()");
513 TODO();
514}
515
516// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcwidth.html
517int wcwidth(wchar_t wc)
518{
519 if (wc == L'\0')
520 return 0;
521
522 // Printable ASCII.
523 if (wc >= 0x20 && wc <= 0x7e)
524 return 1;
525
526 // Non-printable ASCII.
527 if (wc <= 0x7f)
528 return -1;
529
530 // TODO: Implement wcwidth for non-ASCII characters.
531 return 1;
532}
533
534// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcswidth.html
535int wcswidth(wchar_t const* pwcs, size_t n)
536{
537 int len = 0;
538
539 for (size_t i = 0; i < n && pwcs[i]; i++) {
540 int char_len = wcwidth(pwcs[i]);
541
542 if (char_len == -1)
543 return -1;
544
545 len += char_len;
546 }
547
548 return len;
549}
550
551// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsnrtombs.html
552size_t wcsnrtombs(char* dest, wchar_t const** src, size_t nwc, size_t len, mbstate_t* ps)
553{
554 static mbstate_t _anonymous_state = {};
555
556 if (ps == nullptr)
557 ps = &_anonymous_state;
558
559 size_t written = 0;
560 size_t read = 0;
561 while (read < nwc) {
562 size_t ret = 0;
563 char buf[MB_LEN_MAX];
564
565 // Convert next wchar to multibyte.
566 ret = wcrtomb(buf, **src, ps);
567
568 // wchar can't be represented as multibyte.
569 if (ret == (size_t)-1) {
570 errno = EILSEQ;
571 return (size_t)-1;
572 }
573
574 // New bytes don't fit the buffer.
575 if (dest && len < written + ret) {
576 return written;
577 }
578
579 if (dest) {
580 memcpy(dest, buf, ret);
581 dest += ret;
582 }
583
584 // Null character has been reached
585 if (**src == L'\0') {
586 *src = nullptr;
587 return written;
588 }
589
590 *src += 1;
591 read += 1;
592 written += ret;
593 }
594
595 return written;
596}
597
598// https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbsnrtowcs.html
599size_t mbsnrtowcs(wchar_t* dst, char const** src, size_t nms, size_t len, mbstate_t* ps)
600{
601 static mbstate_t _anonymous_state = {};
602
603 if (ps == nullptr)
604 ps = &_anonymous_state;
605
606 size_t written = 0;
607 while (written < len || !dst) {
608 // End of source buffer, no incomplete character.
609 // src continues to point to the next byte.
610 if (nms == 0) {
611 return written;
612 }
613
614 // Convert next multibyte to wchar.
615 size_t ret = mbrtowc(dst, *src, nms, ps);
616
617 // Multibyte sequence is incomplete.
618 if (ret == -2ul) {
619 // Point just past the last processed byte.
620 *src += nms;
621 return written;
622 }
623
624 // Multibyte sequence is invalid.
625 if (ret == -1ul) {
626 errno = EILSEQ;
627 return (size_t)-1;
628 }
629
630 // Null byte has been reached.
631 if (**src == '\0') {
632 *src = nullptr;
633 return written;
634 }
635
636 *src += ret;
637 nms -= ret;
638 written += 1;
639 if (dst)
640 dst += 1;
641 }
642
643 // If we are here, we have written `len` wchars, but not reached the null byte.
644 return written;
645}
646
647// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wmemcmp.html
648int wmemcmp(wchar_t const* s1, wchar_t const* s2, size_t n)
649{
650 while (n-- > 0) {
651 if (*s1++ != *s2++)
652 return s1[-1] < s2[-1] ? -1 : 1;
653 }
654 return 0;
655}
656
657// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsrtombs.html
658size_t wcsrtombs(char* dest, wchar_t const** src, size_t len, mbstate_t* ps)
659{
660 static mbstate_t anonymous_state = {};
661
662 if (ps == nullptr)
663 ps = &anonymous_state;
664
665 // SIZE_MAX is as close as we are going to get to "unlimited".
666 return wcsnrtombs(dest, src, SIZE_MAX, len, ps);
667}
668
669// https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbsrtowcs.html
670size_t mbsrtowcs(wchar_t* dst, char const** src, size_t len, mbstate_t* ps)
671{
672 static mbstate_t anonymous_state = {};
673
674 if (ps == nullptr)
675 ps = &anonymous_state;
676
677 // SIZE_MAX is as close as we are going to get to "unlimited".
678 return mbsnrtowcs(dst, src, SIZE_MAX, len, ps);
679}
680
681// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcscspn.html
682size_t wcscspn(wchar_t const* wcs, wchar_t const* reject)
683{
684 for (auto const* wc_pointer = wcs;;) {
685 auto c = *wc_pointer++;
686 wchar_t rc;
687 auto const* reject_copy = reject;
688 do {
689 if ((rc = *reject_copy++) == c)
690 return wc_pointer - 1 - wcs;
691 } while (rc != 0);
692 }
693}
694
695// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsspn.html
696size_t wcsspn(wchar_t const* wcs, wchar_t const* accept)
697{
698 for (auto const* wc_pointer = wcs;;) {
699 auto c = *wc_pointer++;
700 wchar_t rc;
701 auto const* accept_copy = accept;
702 do {
703 if ((rc = *accept_copy++) != c)
704 return wc_pointer - 1 - wcs;
705 } while (rc != 0);
706 }
707}
708
709// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsftime.html
710size_t wcsftime(wchar_t* destination, size_t maxsize, wchar_t const* format, const struct tm* tm)
711{
712 // FIXME: Add actual wide char support for this.
713 char* ascii_format = static_cast<char*>(malloc(wcslen(format) + 1));
714 char* ascii_destination = static_cast<char*>(malloc(maxsize));
715
716 VERIFY(ascii_format && ascii_destination);
717
718 // These are copied by value because we will change the pointers without rolling them back.
719 ScopeGuard free_ascii = [ascii_format, ascii_destination] {
720 free(ascii_format);
721 free(ascii_destination);
722 };
723
724 char* ascii_format_copy = ascii_format;
725 do {
726 VERIFY(*format <= 0x7f);
727 *ascii_format_copy++ = static_cast<char>(*format);
728 } while (*format++ != L'\0');
729
730#pragma GCC diagnostic push
731#pragma GCC diagnostic ignored "-Wformat-nonliteral"
732 size_t ret = strftime(ascii_destination, maxsize, ascii_format, tm);
733#pragma GCC diagnostic pop
734
735 if (ret == 0)
736 return 0;
737
738 do {
739 *destination++ = *ascii_destination;
740 } while (*ascii_destination++ != '\0');
741
742 return ret;
743}
744}