Serenity Operating System
at master 101 lines 2.8 kB view raw
1/* 2 * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/String.h> 8#include <AK/StringBuilder.h> 9#include <AK/Utf32View.h> 10#include <AK/Utf8View.h> 11#include <LibUnicode/UnicodeUtils.h> 12 13// This file contains definitions of AK::String methods which require UCD data. 14 15namespace AK { 16 17ErrorOr<String> String::to_lowercase(Optional<StringView> const& locale) const 18{ 19 StringBuilder builder; 20 TRY(Unicode::Detail::build_lowercase_string(code_points(), builder, locale)); 21 return builder.to_string(); 22} 23 24ErrorOr<String> String::to_uppercase(Optional<StringView> const& locale) const 25{ 26 StringBuilder builder; 27 TRY(Unicode::Detail::build_uppercase_string(code_points(), builder, locale)); 28 return builder.to_string(); 29} 30 31ErrorOr<String> String::to_titlecase(Optional<StringView> const& locale) const 32{ 33 StringBuilder builder; 34 TRY(Unicode::Detail::build_titlecase_string(code_points(), builder, locale)); 35 return builder.to_string(); 36} 37 38ErrorOr<String> String::to_casefold() const 39{ 40 StringBuilder builder; 41 TRY(Unicode::Detail::build_casefold_string(code_points(), builder)); 42 return builder.to_string(); 43} 44 45class CasefoldStringComparator { 46public: 47 explicit CasefoldStringComparator(Utf8View string) 48 : m_string(string) 49 , m_it(m_string.begin()) 50 { 51 } 52 53 bool has_more_data() const 54 { 55 return !m_casefolded_code_points.is_empty() || (m_it != m_string.end()); 56 } 57 58 u32 next_code_point() 59 { 60 VERIFY(has_more_data()); 61 62 if (m_casefolded_code_points.is_empty()) { 63 m_current_code_point = *m_it; 64 ++m_it; 65 66 m_casefolded_code_points = Unicode::Detail::casefold_code_point(m_current_code_point); 67 VERIFY(!m_casefolded_code_points.is_empty()); // Must at least contain the provided code point. 68 } 69 70 auto code_point = m_casefolded_code_points[0]; 71 m_casefolded_code_points = m_casefolded_code_points.substring_view(1); 72 73 return code_point; 74 } 75 76private: 77 Utf8View m_string; 78 Utf8CodePointIterator m_it; 79 80 u32 m_current_code_point { 0 }; 81 Utf32View m_casefolded_code_points; 82}; 83 84// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34145 85bool String::equals_ignoring_case(String const& other) const 86{ 87 // A string X is a caseless match for a string Y if and only if: 88 // toCasefold(X) = toCasefold(Y) 89 90 CasefoldStringComparator lhs { code_points() }; 91 CasefoldStringComparator rhs { other.code_points() }; 92 93 while (lhs.has_more_data() && rhs.has_more_data()) { 94 if (lhs.next_code_point() != rhs.next_code_point()) 95 return false; 96 } 97 98 return !lhs.has_more_data() && !rhs.has_more_data(); 99} 100 101}