Serenity Operating System
at master 598 lines 19 kB view raw
1/* 2 * Copyright (c) 2018-2022, Andreas Kling <awesomekling@gmail.com> 3 * Copyright (c) 2020, Fei Wu <f.eiwu@yahoo.com> 4 * 5 * SPDX-License-Identifier: BSD-2-Clause 6 */ 7 8#include <AK/CharacterTypes.h> 9#include <AK/MemMem.h> 10#include <AK/Optional.h> 11#include <AK/String.h> 12#include <AK/StringBuilder.h> 13#include <AK/StringUtils.h> 14#include <AK/StringView.h> 15#include <AK/Vector.h> 16 17#ifdef KERNEL 18# include <Kernel/StdLib.h> 19#else 20# include <AK/DeprecatedString.h> 21# include <AK/FloatingPointStringConversions.h> 22# include <string.h> 23#endif 24 25namespace AK { 26 27namespace StringUtils { 28 29bool matches(StringView str, StringView mask, CaseSensitivity case_sensitivity, Vector<MaskSpan>* match_spans) 30{ 31 auto record_span = [&match_spans](size_t start, size_t length) { 32 if (match_spans) 33 match_spans->append({ start, length }); 34 }; 35 36 if (str.is_null() || mask.is_null()) 37 return str.is_null() && mask.is_null(); 38 39 if (mask == "*"sv) { 40 record_span(0, str.length()); 41 return true; 42 } 43 44 char const* string_ptr = str.characters_without_null_termination(); 45 char const* string_start = str.characters_without_null_termination(); 46 char const* string_end = string_ptr + str.length(); 47 char const* mask_ptr = mask.characters_without_null_termination(); 48 char const* mask_end = mask_ptr + mask.length(); 49 50 while (string_ptr < string_end && mask_ptr < mask_end) { 51 auto string_start_ptr = string_ptr; 52 switch (*mask_ptr) { 53 case '*': 54 if (mask_ptr == mask_end - 1) { 55 record_span(string_ptr - string_start, string_end - string_ptr); 56 return true; 57 } 58 while (string_ptr < string_end && !matches({ string_ptr, static_cast<size_t>(string_end - string_ptr) }, { mask_ptr + 1, static_cast<size_t>(mask_end - mask_ptr - 1) }, case_sensitivity)) 59 ++string_ptr; 60 record_span(string_start_ptr - string_start, string_ptr - string_start_ptr); 61 --string_ptr; 62 break; 63 case '?': 64 record_span(string_ptr - string_start, 1); 65 break; 66 case '\\': 67 // if backslash is last character in mask, just treat it as an exact match 68 // otherwise use it as escape for next character 69 if (mask_ptr + 1 < mask_end) 70 ++mask_ptr; 71 [[fallthrough]]; 72 default: 73 auto p = *mask_ptr; 74 auto ch = *string_ptr; 75 if (case_sensitivity == CaseSensitivity::CaseSensitive ? p != ch : to_ascii_lowercase(p) != to_ascii_lowercase(ch)) 76 return false; 77 break; 78 } 79 ++string_ptr; 80 ++mask_ptr; 81 } 82 83 if (string_ptr == string_end) { 84 // Allow ending '*' to contain nothing. 85 while (mask_ptr != mask_end && *mask_ptr == '*') { 86 record_span(string_ptr - string_start, 0); 87 ++mask_ptr; 88 } 89 } 90 91 return string_ptr == string_end && mask_ptr == mask_end; 92} 93 94template<typename T> 95Optional<T> convert_to_int(StringView str, TrimWhitespace trim_whitespace) 96{ 97 auto string = trim_whitespace == TrimWhitespace::Yes 98 ? str.trim_whitespace() 99 : str; 100 if (string.is_empty()) 101 return {}; 102 103 T sign = 1; 104 size_t i = 0; 105 auto const characters = string.characters_without_null_termination(); 106 107 if (characters[0] == '-' || characters[0] == '+') { 108 if (string.length() == 1) 109 return {}; 110 i++; 111 if (characters[0] == '-') 112 sign = -1; 113 } 114 115 T value = 0; 116 for (; i < string.length(); i++) { 117 if (characters[i] < '0' || characters[i] > '9') 118 return {}; 119 120 if (__builtin_mul_overflow(value, 10, &value)) 121 return {}; 122 123 if (__builtin_add_overflow(value, sign * (characters[i] - '0'), &value)) 124 return {}; 125 } 126 return value; 127} 128 129template Optional<i8> convert_to_int(StringView str, TrimWhitespace); 130template Optional<i16> convert_to_int(StringView str, TrimWhitespace); 131template Optional<i32> convert_to_int(StringView str, TrimWhitespace); 132template Optional<long> convert_to_int(StringView str, TrimWhitespace); 133template Optional<long long> convert_to_int(StringView str, TrimWhitespace); 134 135template<typename T> 136Optional<T> convert_to_uint(StringView str, TrimWhitespace trim_whitespace) 137{ 138 auto string = trim_whitespace == TrimWhitespace::Yes 139 ? str.trim_whitespace() 140 : str; 141 if (string.is_empty()) 142 return {}; 143 144 T value = 0; 145 auto const characters = string.characters_without_null_termination(); 146 147 for (size_t i = 0; i < string.length(); i++) { 148 if (characters[i] < '0' || characters[i] > '9') 149 return {}; 150 151 if (__builtin_mul_overflow(value, 10, &value)) 152 return {}; 153 154 if (__builtin_add_overflow(value, characters[i] - '0', &value)) 155 return {}; 156 } 157 return value; 158} 159 160template Optional<u8> convert_to_uint(StringView str, TrimWhitespace); 161template Optional<u16> convert_to_uint(StringView str, TrimWhitespace); 162template Optional<u32> convert_to_uint(StringView str, TrimWhitespace); 163template Optional<unsigned long> convert_to_uint(StringView str, TrimWhitespace); 164template Optional<unsigned long long> convert_to_uint(StringView str, TrimWhitespace); 165 166template<typename T> 167Optional<T> convert_to_uint_from_hex(StringView str, TrimWhitespace trim_whitespace) 168{ 169 auto string = trim_whitespace == TrimWhitespace::Yes 170 ? str.trim_whitespace() 171 : str; 172 if (string.is_empty()) 173 return {}; 174 175 T value = 0; 176 auto const count = string.length(); 177 const T upper_bound = NumericLimits<T>::max(); 178 179 for (size_t i = 0; i < count; i++) { 180 char digit = string[i]; 181 u8 digit_val; 182 if (value > (upper_bound >> 4)) 183 return {}; 184 185 if (digit >= '0' && digit <= '9') { 186 digit_val = digit - '0'; 187 } else if (digit >= 'a' && digit <= 'f') { 188 digit_val = 10 + (digit - 'a'); 189 } else if (digit >= 'A' && digit <= 'F') { 190 digit_val = 10 + (digit - 'A'); 191 } else { 192 return {}; 193 } 194 195 value = (value << 4) + digit_val; 196 } 197 return value; 198} 199 200template Optional<u8> convert_to_uint_from_hex(StringView str, TrimWhitespace); 201template Optional<u16> convert_to_uint_from_hex(StringView str, TrimWhitespace); 202template Optional<u32> convert_to_uint_from_hex(StringView str, TrimWhitespace); 203template Optional<u64> convert_to_uint_from_hex(StringView str, TrimWhitespace); 204 205template<typename T> 206Optional<T> convert_to_uint_from_octal(StringView str, TrimWhitespace trim_whitespace) 207{ 208 auto string = trim_whitespace == TrimWhitespace::Yes 209 ? str.trim_whitespace() 210 : str; 211 if (string.is_empty()) 212 return {}; 213 214 T value = 0; 215 auto const count = string.length(); 216 const T upper_bound = NumericLimits<T>::max(); 217 218 for (size_t i = 0; i < count; i++) { 219 char digit = string[i]; 220 u8 digit_val; 221 if (value > (upper_bound >> 3)) 222 return {}; 223 224 if (digit >= '0' && digit <= '7') { 225 digit_val = digit - '0'; 226 } else { 227 return {}; 228 } 229 230 value = (value << 3) + digit_val; 231 } 232 return value; 233} 234 235template Optional<u8> convert_to_uint_from_octal(StringView str, TrimWhitespace); 236template Optional<u16> convert_to_uint_from_octal(StringView str, TrimWhitespace); 237template Optional<u32> convert_to_uint_from_octal(StringView str, TrimWhitespace); 238template Optional<u64> convert_to_uint_from_octal(StringView str, TrimWhitespace); 239 240#ifndef KERNEL 241template<typename T> 242Optional<T> convert_to_floating_point(StringView str, TrimWhitespace trim_whitespace) 243{ 244 static_assert(IsSame<T, double> || IsSame<T, float>); 245 auto string = trim_whitespace == TrimWhitespace::Yes 246 ? str.trim_whitespace() 247 : str; 248 249 char const* start = string.characters_without_null_termination(); 250 return parse_floating_point_completely<T>(start, start + str.length()); 251} 252 253template Optional<double> convert_to_floating_point(StringView str, TrimWhitespace); 254template Optional<float> convert_to_floating_point(StringView str, TrimWhitespace); 255#endif 256 257bool equals_ignoring_ascii_case(StringView a, StringView b) 258{ 259 if (a.length() != b.length()) 260 return false; 261 for (size_t i = 0; i < a.length(); ++i) { 262 if (to_ascii_lowercase(a.characters_without_null_termination()[i]) != to_ascii_lowercase(b.characters_without_null_termination()[i])) 263 return false; 264 } 265 return true; 266} 267 268bool ends_with(StringView str, StringView end, CaseSensitivity case_sensitivity) 269{ 270 if (end.is_empty()) 271 return true; 272 if (str.is_empty()) 273 return false; 274 if (end.length() > str.length()) 275 return false; 276 277 if (case_sensitivity == CaseSensitivity::CaseSensitive) 278 return !memcmp(str.characters_without_null_termination() + (str.length() - end.length()), end.characters_without_null_termination(), end.length()); 279 280 auto str_chars = str.characters_without_null_termination(); 281 auto end_chars = end.characters_without_null_termination(); 282 283 size_t si = str.length() - end.length(); 284 for (size_t ei = 0; ei < end.length(); ++si, ++ei) { 285 if (to_ascii_lowercase(str_chars[si]) != to_ascii_lowercase(end_chars[ei])) 286 return false; 287 } 288 return true; 289} 290 291bool starts_with(StringView str, StringView start, CaseSensitivity case_sensitivity) 292{ 293 if (start.is_empty()) 294 return true; 295 if (str.is_empty()) 296 return false; 297 if (start.length() > str.length()) 298 return false; 299 if (str.characters_without_null_termination() == start.characters_without_null_termination()) 300 return true; 301 302 if (case_sensitivity == CaseSensitivity::CaseSensitive) 303 return !memcmp(str.characters_without_null_termination(), start.characters_without_null_termination(), start.length()); 304 305 auto str_chars = str.characters_without_null_termination(); 306 auto start_chars = start.characters_without_null_termination(); 307 308 size_t si = 0; 309 for (size_t starti = 0; starti < start.length(); ++si, ++starti) { 310 if (to_ascii_lowercase(str_chars[si]) != to_ascii_lowercase(start_chars[starti])) 311 return false; 312 } 313 return true; 314} 315 316bool contains(StringView str, StringView needle, CaseSensitivity case_sensitivity) 317{ 318 if (str.is_null() || needle.is_null() || str.is_empty() || needle.length() > str.length()) 319 return false; 320 if (needle.is_empty()) 321 return true; 322 auto str_chars = str.characters_without_null_termination(); 323 auto needle_chars = needle.characters_without_null_termination(); 324 if (case_sensitivity == CaseSensitivity::CaseSensitive) 325 return memmem(str_chars, str.length(), needle_chars, needle.length()) != nullptr; 326 327 auto needle_first = to_ascii_lowercase(needle_chars[0]); 328 for (size_t si = 0; si < str.length(); si++) { 329 if (to_ascii_lowercase(str_chars[si]) != needle_first) 330 continue; 331 for (size_t ni = 0; si + ni < str.length(); ni++) { 332 if (to_ascii_lowercase(str_chars[si + ni]) != to_ascii_lowercase(needle_chars[ni])) { 333 if (ni > 0) 334 si += ni - 1; 335 break; 336 } 337 if (ni + 1 == needle.length()) 338 return true; 339 } 340 } 341 return false; 342} 343 344bool is_whitespace(StringView str) 345{ 346 return all_of(str, is_ascii_space); 347} 348 349StringView trim(StringView str, StringView characters, TrimMode mode) 350{ 351 size_t substring_start = 0; 352 size_t substring_length = str.length(); 353 354 if (mode == TrimMode::Left || mode == TrimMode::Both) { 355 for (size_t i = 0; i < str.length(); ++i) { 356 if (substring_length == 0) 357 return ""sv; 358 if (!characters.contains(str[i])) 359 break; 360 ++substring_start; 361 --substring_length; 362 } 363 } 364 365 if (mode == TrimMode::Right || mode == TrimMode::Both) { 366 for (size_t i = str.length(); i > 0; --i) { 367 if (substring_length == 0) 368 return ""sv; 369 if (!characters.contains(str[i - 1])) 370 break; 371 --substring_length; 372 } 373 } 374 375 return str.substring_view(substring_start, substring_length); 376} 377 378StringView trim_whitespace(StringView str, TrimMode mode) 379{ 380 return trim(str, " \n\t\v\f\r"sv, mode); 381} 382 383Optional<size_t> find(StringView haystack, char needle, size_t start) 384{ 385 if (start >= haystack.length()) 386 return {}; 387 for (size_t i = start; i < haystack.length(); ++i) { 388 if (haystack[i] == needle) 389 return i; 390 } 391 return {}; 392} 393 394Optional<size_t> find(StringView haystack, StringView needle, size_t start) 395{ 396 if (start > haystack.length()) 397 return {}; 398 auto index = AK::memmem_optional( 399 haystack.characters_without_null_termination() + start, haystack.length() - start, 400 needle.characters_without_null_termination(), needle.length()); 401 return index.has_value() ? (*index + start) : index; 402} 403 404Optional<size_t> find_last(StringView haystack, char needle) 405{ 406 for (size_t i = haystack.length(); i > 0; --i) { 407 if (haystack[i - 1] == needle) 408 return i - 1; 409 } 410 return {}; 411} 412 413Optional<size_t> find_last(StringView haystack, StringView needle) 414{ 415 for (size_t i = haystack.length(); i > 0; --i) { 416 auto value = StringUtils::find(haystack, needle, i - 1); 417 if (value.has_value()) 418 return value; 419 } 420 421 return {}; 422} 423 424Optional<size_t> find_last_not(StringView haystack, char needle) 425{ 426 for (size_t i = haystack.length(); i > 0; --i) { 427 if (haystack[i - 1] != needle) 428 return i - 1; 429 } 430 return {}; 431} 432 433Vector<size_t> find_all(StringView haystack, StringView needle) 434{ 435 Vector<size_t> positions; 436 size_t current_position = 0; 437 while (current_position <= haystack.length()) { 438 auto maybe_position = AK::memmem_optional( 439 haystack.characters_without_null_termination() + current_position, haystack.length() - current_position, 440 needle.characters_without_null_termination(), needle.length()); 441 if (!maybe_position.has_value()) 442 break; 443 positions.append(current_position + *maybe_position); 444 current_position += *maybe_position + 1; 445 } 446 return positions; 447} 448 449Optional<size_t> find_any_of(StringView haystack, StringView needles, SearchDirection direction) 450{ 451 if (haystack.is_empty() || needles.is_empty()) 452 return {}; 453 if (direction == SearchDirection::Forward) { 454 for (size_t i = 0; i < haystack.length(); ++i) { 455 if (needles.contains(haystack[i])) 456 return i; 457 } 458 } else if (direction == SearchDirection::Backward) { 459 for (size_t i = haystack.length(); i > 0; --i) { 460 if (needles.contains(haystack[i - 1])) 461 return i - 1; 462 } 463 } 464 return {}; 465} 466 467#ifndef KERNEL 468DeprecatedString to_snakecase(StringView str) 469{ 470 auto should_insert_underscore = [&](auto i, auto current_char) { 471 if (i == 0) 472 return false; 473 auto previous_ch = str[i - 1]; 474 if (is_ascii_lower_alpha(previous_ch) && is_ascii_upper_alpha(current_char)) 475 return true; 476 if (i >= str.length() - 1) 477 return false; 478 auto next_ch = str[i + 1]; 479 if (is_ascii_upper_alpha(current_char) && is_ascii_lower_alpha(next_ch)) 480 return true; 481 return false; 482 }; 483 484 StringBuilder builder; 485 for (size_t i = 0; i < str.length(); ++i) { 486 auto ch = str[i]; 487 if (should_insert_underscore(i, ch)) 488 builder.append('_'); 489 builder.append_as_lowercase(ch); 490 } 491 return builder.to_deprecated_string(); 492} 493 494DeprecatedString to_titlecase(StringView str) 495{ 496 StringBuilder builder; 497 bool next_is_upper = true; 498 499 for (auto ch : str) { 500 if (next_is_upper) 501 builder.append(to_ascii_uppercase(ch)); 502 else 503 builder.append(to_ascii_lowercase(ch)); 504 next_is_upper = ch == ' '; 505 } 506 507 return builder.to_deprecated_string(); 508} 509 510DeprecatedString invert_case(StringView str) 511{ 512 StringBuilder builder(str.length()); 513 514 for (auto ch : str) { 515 if (is_ascii_lower_alpha(ch)) 516 builder.append(to_ascii_uppercase(ch)); 517 else 518 builder.append(to_ascii_lowercase(ch)); 519 } 520 521 return builder.to_deprecated_string(); 522} 523 524DeprecatedString replace(StringView str, StringView needle, StringView replacement, ReplaceMode replace_mode) 525{ 526 if (str.is_empty()) 527 return str; 528 529 Vector<size_t> positions; 530 if (replace_mode == ReplaceMode::All) { 531 positions = str.find_all(needle); 532 if (!positions.size()) 533 return str; 534 } else { 535 auto pos = str.find(needle); 536 if (!pos.has_value()) 537 return str; 538 positions.append(pos.value()); 539 } 540 541 StringBuilder replaced_string; 542 size_t last_position = 0; 543 for (auto& position : positions) { 544 replaced_string.append(str.substring_view(last_position, position - last_position)); 545 replaced_string.append(replacement); 546 last_position = position + needle.length(); 547 } 548 replaced_string.append(str.substring_view(last_position, str.length() - last_position)); 549 return replaced_string.to_deprecated_string(); 550} 551 552ErrorOr<String> replace(String const& haystack, StringView needle, StringView replacement, ReplaceMode replace_mode) 553{ 554 if (haystack.is_empty()) 555 return haystack; 556 557 // FIXME: Propagate Vector allocation failures (or do this without putting positions in a vector) 558 Vector<size_t> positions; 559 if (replace_mode == ReplaceMode::All) { 560 positions = haystack.bytes_as_string_view().find_all(needle); 561 if (!positions.size()) 562 return haystack; 563 } else { 564 auto pos = haystack.bytes_as_string_view().find(needle); 565 if (!pos.has_value()) 566 return haystack; 567 positions.append(pos.value()); 568 } 569 570 StringBuilder replaced_string; 571 size_t last_position = 0; 572 for (auto& position : positions) { 573 replaced_string.append(haystack.bytes_as_string_view().substring_view(last_position, position - last_position)); 574 replaced_string.append(replacement); 575 last_position = position + needle.length(); 576 } 577 replaced_string.append(haystack.bytes_as_string_view().substring_view(last_position, haystack.bytes_as_string_view().length() - last_position)); 578 return replaced_string.to_string(); 579} 580#endif 581 582// TODO: Benchmark against KMP (AK/MemMem.h) and switch over if it's faster for short strings too 583size_t count(StringView str, StringView needle) 584{ 585 if (needle.is_empty()) 586 return str.length(); 587 588 size_t count = 0; 589 for (size_t i = 0; i < str.length() - needle.length() + 1; ++i) { 590 if (str.substring_view(i).starts_with(needle)) 591 count++; 592 } 593 return count; 594} 595 596} 597 598}