Serenity Operating System
at master 744 lines 19 kB view raw
1/* 2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> 3 * Copyright (c) 2022, Tim Schumacher <timschumi@serenityos.org> 4 * 5 * SPDX-License-Identifier: BSD-2-Clause 6 */ 7 8#include <AK/Assertions.h> 9#include <AK/Format.h> 10#include <AK/ScopeGuard.h> 11#include <AK/UnicodeUtils.h> 12#include <errno.h> 13#include <string.h> 14#include <time.h> 15#include <wchar.h> 16 17static unsigned int mbstate_expected_bytes(mbstate_t* state) 18{ 19 if (state->stored_bytes == 0) { 20 return 0; 21 } 22 23 unsigned char first = state->bytes[0]; 24 25 // Single-byte sequences have their first bit unset 26 if ((first & 0b10000000) == 0) { 27 return 1; 28 } 29 30 // Two-byte sequences start with 0b110xxxxx 31 if ((first & 0b11100000) == 0b11000000) { 32 return 2; 33 } 34 35 // Three-byte sequences start with 0b1110xxxx 36 if ((first & 0b11110000) == 0b11100000) { 37 return 3; 38 } 39 40 // Four-byte sequences start with 0b11110xxx 41 if ((first & 0b11111000) == 0b11110000) { 42 return 4; 43 } 44 45 // Everything else is invalid 46 return 0; 47} 48 49extern "C" { 50 51// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcslen.html 52size_t wcslen(wchar_t const* str) 53{ 54 size_t len = 0; 55 while (*(str++)) 56 ++len; 57 return len; 58} 59 60// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcscpy.html 61wchar_t* wcscpy(wchar_t* dest, wchar_t const* src) 62{ 63 wchar_t* original_dest = dest; 64 while ((*dest++ = *src++) != '\0') 65 ; 66 return original_dest; 67} 68 69// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsdup.html 70wchar_t* wcsdup(wchar_t const* str) 71{ 72 size_t length = wcslen(str); 73 wchar_t* new_str = (wchar_t*)malloc(sizeof(wchar_t) * (length + 1)); 74 75 if (!new_str) { 76 errno = ENOMEM; 77 return nullptr; 78 } 79 80 return wcscpy(new_str, str); 81} 82 83// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsncpy.html 84wchar_t* wcsncpy(wchar_t* dest, wchar_t const* src, size_t num) 85{ 86 wchar_t* original_dest = dest; 87 while (((*dest++ = *src++) != '\0') && ((size_t)(dest - original_dest) < num)) 88 ; 89 return original_dest; 90} 91 92size_t wcslcpy(wchar_t* dest, wchar_t const* src, size_t n) 93{ 94 size_t i; 95 for (i = 0; i + 1 < n && src[i] != L'\0'; ++i) 96 dest[i] = src[i]; 97 if (n) 98 dest[i] = L'\0'; 99 for (; src[i] != L'\0'; ++i) 100 ; // Determine the length of src, don't copy. 101 return i; 102} 103 104// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcscmp.html 105int wcscmp(wchar_t const* s1, wchar_t const* s2) 106{ 107 while (*s1 == *s2++) 108 if (*s1++ == 0) 109 return 0; 110 return *(wchar_t const*)s1 - *(wchar_t const*)--s2; 111} 112 113// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsncmp.html 114int wcsncmp(wchar_t const* s1, wchar_t const* s2, size_t n) 115{ 116 if (!n) 117 return 0; 118 do { 119 if (*s1 != *s2++) 120 return *(wchar_t const*)s1 - *(wchar_t const*)--s2; 121 if (*s1++ == 0) 122 break; 123 } while (--n); 124 return 0; 125} 126 127// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcschr.html 128wchar_t* wcschr(wchar_t const* str, int c) 129{ 130 wchar_t ch = c; 131 for (;; ++str) { 132 if (*str == ch) 133 return const_cast<wchar_t*>(str); 134 if (!*str) 135 return nullptr; 136 } 137} 138 139// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsrchr.html 140wchar_t* wcsrchr(wchar_t const* str, wchar_t wc) 141{ 142 wchar_t* last = nullptr; 143 wchar_t c; 144 for (; (c = *str); ++str) { 145 if (c == wc) 146 last = const_cast<wchar_t*>(str); 147 } 148 return last; 149} 150 151// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcscat.html 152wchar_t* wcscat(wchar_t* dest, wchar_t const* src) 153{ 154 size_t dest_length = wcslen(dest); 155 size_t i; 156 for (i = 0; src[i] != '\0'; i++) 157 dest[dest_length + i] = src[i]; 158 dest[dest_length + i] = '\0'; 159 return dest; 160} 161 162// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsncat.html 163wchar_t* wcsncat(wchar_t* dest, wchar_t const* src, size_t n) 164{ 165 size_t dest_length = wcslen(dest); 166 size_t i; 167 for (i = 0; i < n && src[i] != '\0'; i++) 168 dest[dest_length + i] = src[i]; 169 dest[dest_length + i] = '\0'; 170 return dest; 171} 172 173// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstok.html 174wchar_t* wcstok(wchar_t* str, wchar_t const* delim, wchar_t** ptr) 175{ 176 wchar_t* used_str = str; 177 if (!used_str) { 178 used_str = *ptr; 179 } 180 181 size_t token_start = 0; 182 size_t token_end = 0; 183 size_t str_len = wcslen(used_str); 184 size_t delim_len = wcslen(delim); 185 186 for (size_t i = 0; i < str_len; ++i) { 187 bool is_proper_delim = false; 188 189 for (size_t j = 0; j < delim_len; ++j) { 190 if (used_str[i] == delim[j]) { 191 // Skip beginning delimiters 192 if (token_end - token_start == 0) { 193 ++token_start; 194 break; 195 } 196 197 is_proper_delim = true; 198 } 199 } 200 201 ++token_end; 202 if (is_proper_delim && token_end > 0) { 203 --token_end; 204 break; 205 } 206 } 207 208 if (used_str[token_start] == '\0') 209 return nullptr; 210 211 if (token_end == 0) { 212 return &used_str[token_start]; 213 } 214 215 used_str[token_end] = '\0'; 216 return &used_str[token_start]; 217} 218 219// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstol.html 220long wcstol(wchar_t const*, wchar_t**, int) 221{ 222 dbgln("FIXME: Implement wcstol()"); 223 TODO(); 224} 225 226// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstoll.html 227long long wcstoll(wchar_t const*, wchar_t**, int) 228{ 229 dbgln("FIXME: Implement wcstoll()"); 230 TODO(); 231} 232 233// https://pubs.opengroup.org/onlinepubs/9699919799/functions/btowc.html 234wint_t btowc(int c) 235{ 236 if (c == EOF) { 237 return WEOF; 238 } 239 240 // Multi-byte sequences in UTF-8 have their highest bit set 241 if (c & (1 << 7)) { 242 return WEOF; 243 } 244 245 return c; 246} 247 248// https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbrtowc.html 249size_t mbrtowc(wchar_t* pwc, char const* s, size_t n, mbstate_t* state) 250{ 251 static mbstate_t _anonymous_state = {}; 252 253 if (state == nullptr) { 254 state = &_anonymous_state; 255 } 256 257 // s being a null pointer is a shorthand for reading a single null byte. 258 if (s == nullptr) { 259 pwc = nullptr; 260 s = ""; 261 n = 1; 262 } 263 264 // Stop early if we can't read anything 265 if (n == 0) { 266 return 0; 267 } 268 269 size_t consumed_bytes = 0; 270 271 // Fill the first byte if we haven't done that yet 272 if (state->stored_bytes == 0) { 273 state->bytes[state->stored_bytes++] = s[0]; 274 consumed_bytes++; 275 } 276 277 size_t expected_bytes = mbstate_expected_bytes(state); 278 279 // Check if the first byte is invalid 280 if (expected_bytes == 0) { 281 *state = {}; 282 errno = EILSEQ; 283 return -1; 284 } 285 286 while (state->stored_bytes < expected_bytes) { 287 if (consumed_bytes == n) { 288 // No complete multibyte character 289 return -2; 290 } 291 292 unsigned char c = s[consumed_bytes]; 293 294 // Continuation bytes have to start with 0b10xxxxxx 295 if ((c & 0b11000000) != 0b10000000) { 296 // Invalid multibyte character 297 *state = {}; 298 errno = EILSEQ; 299 return -1; 300 } 301 302 state->bytes[state->stored_bytes++] = c; 303 consumed_bytes++; 304 } 305 306 wchar_t codepoint = state->bytes[0]; 307 308 // Mask out the "length" bits if necessary 309 if (expected_bytes > 1) { 310 codepoint &= (1 << (7 - expected_bytes)) - 1; 311 } 312 313 for (unsigned int i = 1; i < expected_bytes; i++) { 314 // Each continuation byte contains 6 bits of data 315 codepoint = codepoint << 6; 316 codepoint |= state->bytes[i] & 0b111111; 317 } 318 319 if (pwc) { 320 *pwc = codepoint; 321 } 322 323 // We want to read the next multibyte character, but keep all other properties. 324 state->stored_bytes = 0; 325 326 if (codepoint == 0) { 327 *state = {}; 328 return 0; 329 } 330 331 return consumed_bytes; 332} 333 334// https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbrlen.html 335size_t mbrlen(char const* s, size_t n, mbstate_t* ps) 336{ 337 static mbstate_t anonymous_state = {}; 338 339 if (ps == nullptr) 340 ps = &anonymous_state; 341 342 return mbrtowc(nullptr, s, n, ps); 343} 344 345// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcrtomb.html 346size_t wcrtomb(char* s, wchar_t wc, mbstate_t*) 347{ 348 if (s == nullptr) 349 wc = L'\0'; 350 351 auto nwritten = AK::UnicodeUtils::code_point_to_utf8(wc, [&s](char byte) { 352 if (s != nullptr) 353 *s++ = byte; 354 }); 355 356 if (nwritten < 0) { 357 errno = EILSEQ; 358 return (size_t)-1; 359 } else { 360 return nwritten; 361 } 362} 363 364// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcscoll.html 365int wcscoll(wchar_t const* ws1, wchar_t const* ws2) 366{ 367 // TODO: Actually implement a sensible sort order for this, 368 // because right now we are doing what LC_COLLATE=C would do. 369 return wcscmp(ws1, ws2); 370} 371 372// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsxfrm.html 373size_t wcsxfrm(wchar_t* dest, wchar_t const* src, size_t n) 374{ 375 // TODO: This needs to be changed when wcscoll is not just doing wcscmp 376 return wcslcpy(dest, src, n); 377} 378 379// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wctob.html 380int wctob(wint_t c) 381{ 382 if (c > 0x7f) 383 return EOF; 384 385 return static_cast<unsigned char>(c); 386} 387 388// https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbsinit.html 389int mbsinit(mbstate_t const* state) 390{ 391 if (!state) { 392 return 1; 393 } 394 395 if (state->stored_bytes != 0) { 396 return 0; 397 } 398 399 return 1; 400} 401 402// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcspbrk.html 403wchar_t* wcspbrk(wchar_t const* wcs, wchar_t const* accept) 404{ 405 for (wchar_t const* cur = accept; *cur; cur++) { 406 wchar_t* res = wcschr(wcs, *cur); 407 if (res) 408 return res; 409 } 410 411 return nullptr; 412} 413 414// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsstr.html 415wchar_t* wcsstr(wchar_t const* haystack, wchar_t const* needle) 416{ 417 size_t nlen = wcslen(needle); 418 419 if (nlen == 0) 420 return const_cast<wchar_t*>(haystack); 421 422 size_t hlen = wcslen(haystack); 423 424 while (hlen >= nlen) { 425 if (wcsncmp(haystack, needle, nlen) == 0) 426 return const_cast<wchar_t*>(haystack); 427 428 haystack++; 429 hlen--; 430 } 431 432 return nullptr; 433} 434 435// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wmemchr.html 436wchar_t* wmemchr(wchar_t const* s, wchar_t c, size_t n) 437{ 438 for (size_t i = 0; i < n; i++) { 439 if (s[i] == c) 440 return const_cast<wchar_t*>(&s[i]); 441 } 442 443 return nullptr; 444} 445 446// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wmemcpy.html 447wchar_t* wmemcpy(wchar_t* dest, wchar_t const* src, size_t n) 448{ 449 for (size_t i = 0; i < n; i++) 450 dest[i] = src[i]; 451 452 return dest; 453} 454 455// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wmemset.html 456wchar_t* wmemset(wchar_t* wcs, wchar_t wc, size_t n) 457{ 458 for (size_t i = 0; i < n; i++) { 459 wcs[i] = wc; 460 } 461 462 return wcs; 463} 464 465// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wmemmove.html 466wchar_t* wmemmove(wchar_t* dest, wchar_t const* src, size_t n) 467{ 468 if (dest > src) { 469 for (size_t i = 1; i <= n; i++) { 470 dest[n - i] = src[n - i]; 471 } 472 } else if (dest < src) { 473 for (size_t i = 0; i < n; i++) { 474 dest[i] = src[i]; 475 } 476 } 477 478 return dest; 479} 480 481// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstoul.html 482unsigned long wcstoul(wchar_t const*, wchar_t**, int) 483{ 484 dbgln("TODO: Implement wcstoul()"); 485 TODO(); 486} 487 488// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstoull.html 489unsigned long long wcstoull(wchar_t const*, wchar_t**, int) 490{ 491 dbgln("TODO: Implement wcstoull()"); 492 TODO(); 493} 494 495// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstof.html 496float wcstof(wchar_t const*, wchar_t**) 497{ 498 dbgln("TODO: Implement wcstof()"); 499 TODO(); 500} 501 502// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstod.html 503double wcstod(wchar_t const*, wchar_t**) 504{ 505 dbgln("TODO: Implement wcstod()"); 506 TODO(); 507} 508 509// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcstold.html 510long double wcstold(wchar_t const*, wchar_t**) 511{ 512 dbgln("TODO: Implement wcstold()"); 513 TODO(); 514} 515 516// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcwidth.html 517int wcwidth(wchar_t wc) 518{ 519 if (wc == L'\0') 520 return 0; 521 522 // Printable ASCII. 523 if (wc >= 0x20 && wc <= 0x7e) 524 return 1; 525 526 // Non-printable ASCII. 527 if (wc <= 0x7f) 528 return -1; 529 530 // TODO: Implement wcwidth for non-ASCII characters. 531 return 1; 532} 533 534// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcswidth.html 535int wcswidth(wchar_t const* pwcs, size_t n) 536{ 537 int len = 0; 538 539 for (size_t i = 0; i < n && pwcs[i]; i++) { 540 int char_len = wcwidth(pwcs[i]); 541 542 if (char_len == -1) 543 return -1; 544 545 len += char_len; 546 } 547 548 return len; 549} 550 551// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsnrtombs.html 552size_t wcsnrtombs(char* dest, wchar_t const** src, size_t nwc, size_t len, mbstate_t* ps) 553{ 554 static mbstate_t _anonymous_state = {}; 555 556 if (ps == nullptr) 557 ps = &_anonymous_state; 558 559 size_t written = 0; 560 size_t read = 0; 561 while (read < nwc) { 562 size_t ret = 0; 563 char buf[MB_LEN_MAX]; 564 565 // Convert next wchar to multibyte. 566 ret = wcrtomb(buf, **src, ps); 567 568 // wchar can't be represented as multibyte. 569 if (ret == (size_t)-1) { 570 errno = EILSEQ; 571 return (size_t)-1; 572 } 573 574 // New bytes don't fit the buffer. 575 if (dest && len < written + ret) { 576 return written; 577 } 578 579 if (dest) { 580 memcpy(dest, buf, ret); 581 dest += ret; 582 } 583 584 // Null character has been reached 585 if (**src == L'\0') { 586 *src = nullptr; 587 return written; 588 } 589 590 *src += 1; 591 read += 1; 592 written += ret; 593 } 594 595 return written; 596} 597 598// https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbsnrtowcs.html 599size_t mbsnrtowcs(wchar_t* dst, char const** src, size_t nms, size_t len, mbstate_t* ps) 600{ 601 static mbstate_t _anonymous_state = {}; 602 603 if (ps == nullptr) 604 ps = &_anonymous_state; 605 606 size_t written = 0; 607 while (written < len || !dst) { 608 // End of source buffer, no incomplete character. 609 // src continues to point to the next byte. 610 if (nms == 0) { 611 return written; 612 } 613 614 // Convert next multibyte to wchar. 615 size_t ret = mbrtowc(dst, *src, nms, ps); 616 617 // Multibyte sequence is incomplete. 618 if (ret == -2ul) { 619 // Point just past the last processed byte. 620 *src += nms; 621 return written; 622 } 623 624 // Multibyte sequence is invalid. 625 if (ret == -1ul) { 626 errno = EILSEQ; 627 return (size_t)-1; 628 } 629 630 // Null byte has been reached. 631 if (**src == '\0') { 632 *src = nullptr; 633 return written; 634 } 635 636 *src += ret; 637 nms -= ret; 638 written += 1; 639 if (dst) 640 dst += 1; 641 } 642 643 // If we are here, we have written `len` wchars, but not reached the null byte. 644 return written; 645} 646 647// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wmemcmp.html 648int wmemcmp(wchar_t const* s1, wchar_t const* s2, size_t n) 649{ 650 while (n-- > 0) { 651 if (*s1++ != *s2++) 652 return s1[-1] < s2[-1] ? -1 : 1; 653 } 654 return 0; 655} 656 657// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsrtombs.html 658size_t wcsrtombs(char* dest, wchar_t const** src, size_t len, mbstate_t* ps) 659{ 660 static mbstate_t anonymous_state = {}; 661 662 if (ps == nullptr) 663 ps = &anonymous_state; 664 665 // SIZE_MAX is as close as we are going to get to "unlimited". 666 return wcsnrtombs(dest, src, SIZE_MAX, len, ps); 667} 668 669// https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbsrtowcs.html 670size_t mbsrtowcs(wchar_t* dst, char const** src, size_t len, mbstate_t* ps) 671{ 672 static mbstate_t anonymous_state = {}; 673 674 if (ps == nullptr) 675 ps = &anonymous_state; 676 677 // SIZE_MAX is as close as we are going to get to "unlimited". 678 return mbsnrtowcs(dst, src, SIZE_MAX, len, ps); 679} 680 681// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcscspn.html 682size_t wcscspn(wchar_t const* wcs, wchar_t const* reject) 683{ 684 for (auto const* wc_pointer = wcs;;) { 685 auto c = *wc_pointer++; 686 wchar_t rc; 687 auto const* reject_copy = reject; 688 do { 689 if ((rc = *reject_copy++) == c) 690 return wc_pointer - 1 - wcs; 691 } while (rc != 0); 692 } 693} 694 695// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsspn.html 696size_t wcsspn(wchar_t const* wcs, wchar_t const* accept) 697{ 698 for (auto const* wc_pointer = wcs;;) { 699 auto c = *wc_pointer++; 700 wchar_t rc; 701 auto const* accept_copy = accept; 702 do { 703 if ((rc = *accept_copy++) != c) 704 return wc_pointer - 1 - wcs; 705 } while (rc != 0); 706 } 707} 708 709// https://pubs.opengroup.org/onlinepubs/9699919799/functions/wcsftime.html 710size_t wcsftime(wchar_t* destination, size_t maxsize, wchar_t const* format, const struct tm* tm) 711{ 712 // FIXME: Add actual wide char support for this. 713 char* ascii_format = static_cast<char*>(malloc(wcslen(format) + 1)); 714 char* ascii_destination = static_cast<char*>(malloc(maxsize)); 715 716 VERIFY(ascii_format && ascii_destination); 717 718 // These are copied by value because we will change the pointers without rolling them back. 719 ScopeGuard free_ascii = [ascii_format, ascii_destination] { 720 free(ascii_format); 721 free(ascii_destination); 722 }; 723 724 char* ascii_format_copy = ascii_format; 725 do { 726 VERIFY(*format <= 0x7f); 727 *ascii_format_copy++ = static_cast<char>(*format); 728 } while (*format++ != L'\0'); 729 730#pragma GCC diagnostic push 731#pragma GCC diagnostic ignored "-Wformat-nonliteral" 732 size_t ret = strftime(ascii_destination, maxsize, ascii_format, tm); 733#pragma GCC diagnostic pop 734 735 if (ret == 0) 736 return 0; 737 738 do { 739 *destination++ = *ascii_destination; 740 } while (*ascii_destination++ != '\0'); 741 742 return ret; 743} 744}