Serenity Operating System
at master 121 lines 4.4 kB view raw
1/* 2 * Copyright (c) 2022-2023, Linus Groh <linusg@serenityos.org> 3 * Copyright (c) 2022, networkException <networkexception@serenityos.org> 4 * Copyright (c) 2023, Kenneth Myhra <kennethmyhra@serenityos.org> 5 * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org> 6 * 7 * SPDX-License-Identifier: BSD-2-Clause 8 */ 9 10#include <AK/CharacterTypes.h> 11#include <AK/String.h> 12#include <AK/Utf16View.h> 13#include <AK/Utf8View.h> 14#include <LibWeb/Infra/CharacterTypes.h> 15#include <LibWeb/Infra/Strings.h> 16 17namespace Web::Infra { 18 19// https://infra.spec.whatwg.org/#ascii-case-insensitive 20bool is_ascii_case_insensitive_match(StringView a, StringView b) 21{ 22 // A string A is an ASCII case-insensitive match for a string B, 23 // if the ASCII lowercase of A is the ASCII lowercase of B. 24 return AK::StringUtils::equals_ignoring_ascii_case(a, b); 25} 26 27// https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace 28ErrorOr<String> strip_and_collapse_whitespace(StringView string) 29{ 30 // Replace any sequence of one or more consecutive code points that are ASCII whitespace in the string with a single U+0020 SPACE code point. 31 StringBuilder builder; 32 for (auto code_point : Utf8View { string }) { 33 if (Infra::is_ascii_whitespace(code_point)) { 34 if (!builder.string_view().ends_with(' ')) 35 builder.append(' '); 36 continue; 37 } 38 TRY(builder.try_append_code_point(code_point)); 39 } 40 41 // ...and then remove any leading and trailing ASCII whitespace from that string. 42 return String::from_utf8(builder.string_view().trim(Infra::ASCII_WHITESPACE)); 43} 44 45// https://infra.spec.whatwg.org/#code-unit-prefix 46bool is_code_unit_prefix(StringView potential_prefix, StringView input) 47{ 48 auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix).release_value_but_fixme_should_propagate_errors(); 49 auto input_utf16 = utf8_to_utf16(input).release_value_but_fixme_should_propagate_errors(); 50 51 // 1. Let i be 0. 52 size_t i = 0; 53 54 // 2. While true: 55 while (true) { 56 // 1. If i is greater than or equal to potentialPrefix’s length, then return true. 57 if (i >= potential_prefix.length()) 58 return true; 59 60 // 2. If i is greater than or equal to input’s length, then return false. 61 if (i >= input.length()) 62 return false; 63 64 // 3. Let potentialPrefixCodeUnit be the ith code unit of potentialPrefix. 65 auto potential_prefix_code_unit = Utf16View(potential_prefix_utf16).code_unit_at(i); 66 67 // 4. Let inputCodeUnit be the ith code unit of input. 68 auto input_code_unit = Utf16View(input_utf16).code_unit_at(i); 69 70 // 5. Return false if potentialPrefixCodeUnit is not inputCodeUnit. 71 if (potential_prefix_code_unit != input_code_unit) 72 return false; 73 74 // 6. Set i to i + 1. 75 ++i; 76 } 77} 78 79// https://infra.spec.whatwg.org/#scalar-value-string 80ErrorOr<String> convert_to_scalar_value_string(StringView string) 81{ 82 // To convert a string into a scalar value string, replace any surrogates with U+FFFD. 83 StringBuilder scalar_value_builder; 84 auto utf8_view = Utf8View { string }; 85 for (u32 code_point : utf8_view) { 86 if (is_unicode_surrogate(code_point)) 87 code_point = 0xFFFD; 88 TRY(scalar_value_builder.try_append(code_point)); 89 } 90 return scalar_value_builder.to_string(); 91} 92 93// https://infra.spec.whatwg.org/#ascii-lowercase 94ErrorOr<String> to_ascii_lowercase(StringView string) 95{ 96 // To ASCII lowercase a string, replace all ASCII upper alphas in the string with their 97 // corresponding code point in ASCII lower alpha. 98 StringBuilder string_builder; 99 auto utf8_view = Utf8View { string }; 100 for (u32 code_point : utf8_view) { 101 code_point = AK::to_ascii_lowercase(code_point); 102 TRY(string_builder.try_append(code_point)); 103 } 104 return string_builder.to_string(); 105} 106 107// https://infra.spec.whatwg.org/#ascii-uppercase 108ErrorOr<String> to_ascii_uppercase(StringView string) 109{ 110 // To ASCII uppercase a string, replace all ASCII lower alphas in the string with their 111 // corresponding code point in ASCII upper alpha. 112 StringBuilder string_builder; 113 auto utf8_view = Utf8View { string }; 114 for (u32 code_point : utf8_view) { 115 code_point = AK::to_ascii_uppercase(code_point); 116 TRY(string_builder.try_append(code_point)); 117 } 118 return string_builder.to_string(); 119} 120 121}