Serenity Operating System
1/*
2 * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/String.h>
8#include <AK/StringBuilder.h>
9#include <AK/Utf32View.h>
10#include <AK/Utf8View.h>
11#include <LibUnicode/UnicodeUtils.h>
12
13// This file contains definitions of AK::String methods which require UCD data.
14
15namespace AK {
16
17ErrorOr<String> String::to_lowercase(Optional<StringView> const& locale) const
18{
19 StringBuilder builder;
20 TRY(Unicode::Detail::build_lowercase_string(code_points(), builder, locale));
21 return builder.to_string();
22}
23
24ErrorOr<String> String::to_uppercase(Optional<StringView> const& locale) const
25{
26 StringBuilder builder;
27 TRY(Unicode::Detail::build_uppercase_string(code_points(), builder, locale));
28 return builder.to_string();
29}
30
31ErrorOr<String> String::to_titlecase(Optional<StringView> const& locale) const
32{
33 StringBuilder builder;
34 TRY(Unicode::Detail::build_titlecase_string(code_points(), builder, locale));
35 return builder.to_string();
36}
37
38ErrorOr<String> String::to_casefold() const
39{
40 StringBuilder builder;
41 TRY(Unicode::Detail::build_casefold_string(code_points(), builder));
42 return builder.to_string();
43}
44
45class CasefoldStringComparator {
46public:
47 explicit CasefoldStringComparator(Utf8View string)
48 : m_string(string)
49 , m_it(m_string.begin())
50 {
51 }
52
53 bool has_more_data() const
54 {
55 return !m_casefolded_code_points.is_empty() || (m_it != m_string.end());
56 }
57
58 u32 next_code_point()
59 {
60 VERIFY(has_more_data());
61
62 if (m_casefolded_code_points.is_empty()) {
63 m_current_code_point = *m_it;
64 ++m_it;
65
66 m_casefolded_code_points = Unicode::Detail::casefold_code_point(m_current_code_point);
67 VERIFY(!m_casefolded_code_points.is_empty()); // Must at least contain the provided code point.
68 }
69
70 auto code_point = m_casefolded_code_points[0];
71 m_casefolded_code_points = m_casefolded_code_points.substring_view(1);
72
73 return code_point;
74 }
75
76private:
77 Utf8View m_string;
78 Utf8CodePointIterator m_it;
79
80 u32 m_current_code_point { 0 };
81 Utf32View m_casefolded_code_points;
82};
83
84// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34145
85bool String::equals_ignoring_case(String const& other) const
86{
87 // A string X is a caseless match for a string Y if and only if:
88 // toCasefold(X) = toCasefold(Y)
89
90 CasefoldStringComparator lhs { code_points() };
91 CasefoldStringComparator rhs { other.code_points() };
92
93 while (lhs.has_more_data() && rhs.has_more_data()) {
94 if (lhs.next_code_point() != rhs.next_code_point())
95 return false;
96 }
97
98 return !lhs.has_more_data() && !rhs.has_more_data();
99}
100
101}