Serenity Operating System
1/*
2 * Copyright (c) 2022-2023, Linus Groh <linusg@serenityos.org>
3 * Copyright (c) 2022, networkException <networkexception@serenityos.org>
4 * Copyright (c) 2023, Kenneth Myhra <kennethmyhra@serenityos.org>
5 * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
6 *
7 * SPDX-License-Identifier: BSD-2-Clause
8 */
9
10#include <AK/CharacterTypes.h>
11#include <AK/String.h>
12#include <AK/Utf16View.h>
13#include <AK/Utf8View.h>
14#include <LibWeb/Infra/CharacterTypes.h>
15#include <LibWeb/Infra/Strings.h>
16
17namespace Web::Infra {
18
19// https://infra.spec.whatwg.org/#ascii-case-insensitive
20bool is_ascii_case_insensitive_match(StringView a, StringView b)
21{
22 // A string A is an ASCII case-insensitive match for a string B,
23 // if the ASCII lowercase of A is the ASCII lowercase of B.
24 return AK::StringUtils::equals_ignoring_ascii_case(a, b);
25}
26
27// https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace
28ErrorOr<String> strip_and_collapse_whitespace(StringView string)
29{
30 // Replace any sequence of one or more consecutive code points that are ASCII whitespace in the string with a single U+0020 SPACE code point.
31 StringBuilder builder;
32 for (auto code_point : Utf8View { string }) {
33 if (Infra::is_ascii_whitespace(code_point)) {
34 if (!builder.string_view().ends_with(' '))
35 builder.append(' ');
36 continue;
37 }
38 TRY(builder.try_append_code_point(code_point));
39 }
40
41 // ...and then remove any leading and trailing ASCII whitespace from that string.
42 return String::from_utf8(builder.string_view().trim(Infra::ASCII_WHITESPACE));
43}
44
45// https://infra.spec.whatwg.org/#code-unit-prefix
46bool is_code_unit_prefix(StringView potential_prefix, StringView input)
47{
48 auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix).release_value_but_fixme_should_propagate_errors();
49 auto input_utf16 = utf8_to_utf16(input).release_value_but_fixme_should_propagate_errors();
50
51 // 1. Let i be 0.
52 size_t i = 0;
53
54 // 2. While true:
55 while (true) {
56 // 1. If i is greater than or equal to potentialPrefix’s length, then return true.
57 if (i >= potential_prefix.length())
58 return true;
59
60 // 2. If i is greater than or equal to input’s length, then return false.
61 if (i >= input.length())
62 return false;
63
64 // 3. Let potentialPrefixCodeUnit be the ith code unit of potentialPrefix.
65 auto potential_prefix_code_unit = Utf16View(potential_prefix_utf16).code_unit_at(i);
66
67 // 4. Let inputCodeUnit be the ith code unit of input.
68 auto input_code_unit = Utf16View(input_utf16).code_unit_at(i);
69
70 // 5. Return false if potentialPrefixCodeUnit is not inputCodeUnit.
71 if (potential_prefix_code_unit != input_code_unit)
72 return false;
73
74 // 6. Set i to i + 1.
75 ++i;
76 }
77}
78
79// https://infra.spec.whatwg.org/#scalar-value-string
80ErrorOr<String> convert_to_scalar_value_string(StringView string)
81{
82 // To convert a string into a scalar value string, replace any surrogates with U+FFFD.
83 StringBuilder scalar_value_builder;
84 auto utf8_view = Utf8View { string };
85 for (u32 code_point : utf8_view) {
86 if (is_unicode_surrogate(code_point))
87 code_point = 0xFFFD;
88 TRY(scalar_value_builder.try_append(code_point));
89 }
90 return scalar_value_builder.to_string();
91}
92
93// https://infra.spec.whatwg.org/#ascii-lowercase
94ErrorOr<String> to_ascii_lowercase(StringView string)
95{
96 // To ASCII lowercase a string, replace all ASCII upper alphas in the string with their
97 // corresponding code point in ASCII lower alpha.
98 StringBuilder string_builder;
99 auto utf8_view = Utf8View { string };
100 for (u32 code_point : utf8_view) {
101 code_point = AK::to_ascii_lowercase(code_point);
102 TRY(string_builder.try_append(code_point));
103 }
104 return string_builder.to_string();
105}
106
107// https://infra.spec.whatwg.org/#ascii-uppercase
108ErrorOr<String> to_ascii_uppercase(StringView string)
109{
110 // To ASCII uppercase a string, replace all ASCII lower alphas in the string with their
111 // corresponding code point in ASCII upper alpha.
112 StringBuilder string_builder;
113 auto utf8_view = Utf8View { string };
114 for (u32 code_point : utf8_view) {
115 code_point = AK::to_ascii_uppercase(code_point);
116 TRY(string_builder.try_append(code_point));
117 }
118 return string_builder.to_string();
119}
120
121}