Serenity Operating System
1/*
2 * Copyright (c) 2018-2022, Andreas Kling <awesomekling@gmail.com>
3 * Copyright (c) 2020, Fei Wu <f.eiwu@yahoo.com>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#include <AK/CharacterTypes.h>
9#include <AK/MemMem.h>
10#include <AK/Optional.h>
11#include <AK/String.h>
12#include <AK/StringBuilder.h>
13#include <AK/StringUtils.h>
14#include <AK/StringView.h>
15#include <AK/Vector.h>
16
17#ifdef KERNEL
18# include <Kernel/StdLib.h>
19#else
20# include <AK/DeprecatedString.h>
21# include <AK/FloatingPointStringConversions.h>
22# include <string.h>
23#endif
24
25namespace AK {
26
27namespace StringUtils {
28
29bool matches(StringView str, StringView mask, CaseSensitivity case_sensitivity, Vector<MaskSpan>* match_spans)
30{
31 auto record_span = [&match_spans](size_t start, size_t length) {
32 if (match_spans)
33 match_spans->append({ start, length });
34 };
35
36 if (str.is_null() || mask.is_null())
37 return str.is_null() && mask.is_null();
38
39 if (mask == "*"sv) {
40 record_span(0, str.length());
41 return true;
42 }
43
44 char const* string_ptr = str.characters_without_null_termination();
45 char const* string_start = str.characters_without_null_termination();
46 char const* string_end = string_ptr + str.length();
47 char const* mask_ptr = mask.characters_without_null_termination();
48 char const* mask_end = mask_ptr + mask.length();
49
50 while (string_ptr < string_end && mask_ptr < mask_end) {
51 auto string_start_ptr = string_ptr;
52 switch (*mask_ptr) {
53 case '*':
54 if (mask_ptr == mask_end - 1) {
55 record_span(string_ptr - string_start, string_end - string_ptr);
56 return true;
57 }
58 while (string_ptr < string_end && !matches({ string_ptr, static_cast<size_t>(string_end - string_ptr) }, { mask_ptr + 1, static_cast<size_t>(mask_end - mask_ptr - 1) }, case_sensitivity))
59 ++string_ptr;
60 record_span(string_start_ptr - string_start, string_ptr - string_start_ptr);
61 --string_ptr;
62 break;
63 case '?':
64 record_span(string_ptr - string_start, 1);
65 break;
66 case '\\':
67 // if backslash is last character in mask, just treat it as an exact match
68 // otherwise use it as escape for next character
69 if (mask_ptr + 1 < mask_end)
70 ++mask_ptr;
71 [[fallthrough]];
72 default:
73 auto p = *mask_ptr;
74 auto ch = *string_ptr;
75 if (case_sensitivity == CaseSensitivity::CaseSensitive ? p != ch : to_ascii_lowercase(p) != to_ascii_lowercase(ch))
76 return false;
77 break;
78 }
79 ++string_ptr;
80 ++mask_ptr;
81 }
82
83 if (string_ptr == string_end) {
84 // Allow ending '*' to contain nothing.
85 while (mask_ptr != mask_end && *mask_ptr == '*') {
86 record_span(string_ptr - string_start, 0);
87 ++mask_ptr;
88 }
89 }
90
91 return string_ptr == string_end && mask_ptr == mask_end;
92}
93
94template<typename T>
95Optional<T> convert_to_int(StringView str, TrimWhitespace trim_whitespace)
96{
97 auto string = trim_whitespace == TrimWhitespace::Yes
98 ? str.trim_whitespace()
99 : str;
100 if (string.is_empty())
101 return {};
102
103 T sign = 1;
104 size_t i = 0;
105 auto const characters = string.characters_without_null_termination();
106
107 if (characters[0] == '-' || characters[0] == '+') {
108 if (string.length() == 1)
109 return {};
110 i++;
111 if (characters[0] == '-')
112 sign = -1;
113 }
114
115 T value = 0;
116 for (; i < string.length(); i++) {
117 if (characters[i] < '0' || characters[i] > '9')
118 return {};
119
120 if (__builtin_mul_overflow(value, 10, &value))
121 return {};
122
123 if (__builtin_add_overflow(value, sign * (characters[i] - '0'), &value))
124 return {};
125 }
126 return value;
127}
128
129template Optional<i8> convert_to_int(StringView str, TrimWhitespace);
130template Optional<i16> convert_to_int(StringView str, TrimWhitespace);
131template Optional<i32> convert_to_int(StringView str, TrimWhitespace);
132template Optional<long> convert_to_int(StringView str, TrimWhitespace);
133template Optional<long long> convert_to_int(StringView str, TrimWhitespace);
134
135template<typename T>
136Optional<T> convert_to_uint(StringView str, TrimWhitespace trim_whitespace)
137{
138 auto string = trim_whitespace == TrimWhitespace::Yes
139 ? str.trim_whitespace()
140 : str;
141 if (string.is_empty())
142 return {};
143
144 T value = 0;
145 auto const characters = string.characters_without_null_termination();
146
147 for (size_t i = 0; i < string.length(); i++) {
148 if (characters[i] < '0' || characters[i] > '9')
149 return {};
150
151 if (__builtin_mul_overflow(value, 10, &value))
152 return {};
153
154 if (__builtin_add_overflow(value, characters[i] - '0', &value))
155 return {};
156 }
157 return value;
158}
159
160template Optional<u8> convert_to_uint(StringView str, TrimWhitespace);
161template Optional<u16> convert_to_uint(StringView str, TrimWhitespace);
162template Optional<u32> convert_to_uint(StringView str, TrimWhitespace);
163template Optional<unsigned long> convert_to_uint(StringView str, TrimWhitespace);
164template Optional<unsigned long long> convert_to_uint(StringView str, TrimWhitespace);
165
166template<typename T>
167Optional<T> convert_to_uint_from_hex(StringView str, TrimWhitespace trim_whitespace)
168{
169 auto string = trim_whitespace == TrimWhitespace::Yes
170 ? str.trim_whitespace()
171 : str;
172 if (string.is_empty())
173 return {};
174
175 T value = 0;
176 auto const count = string.length();
177 const T upper_bound = NumericLimits<T>::max();
178
179 for (size_t i = 0; i < count; i++) {
180 char digit = string[i];
181 u8 digit_val;
182 if (value > (upper_bound >> 4))
183 return {};
184
185 if (digit >= '0' && digit <= '9') {
186 digit_val = digit - '0';
187 } else if (digit >= 'a' && digit <= 'f') {
188 digit_val = 10 + (digit - 'a');
189 } else if (digit >= 'A' && digit <= 'F') {
190 digit_val = 10 + (digit - 'A');
191 } else {
192 return {};
193 }
194
195 value = (value << 4) + digit_val;
196 }
197 return value;
198}
199
200template Optional<u8> convert_to_uint_from_hex(StringView str, TrimWhitespace);
201template Optional<u16> convert_to_uint_from_hex(StringView str, TrimWhitespace);
202template Optional<u32> convert_to_uint_from_hex(StringView str, TrimWhitespace);
203template Optional<u64> convert_to_uint_from_hex(StringView str, TrimWhitespace);
204
205template<typename T>
206Optional<T> convert_to_uint_from_octal(StringView str, TrimWhitespace trim_whitespace)
207{
208 auto string = trim_whitespace == TrimWhitespace::Yes
209 ? str.trim_whitespace()
210 : str;
211 if (string.is_empty())
212 return {};
213
214 T value = 0;
215 auto const count = string.length();
216 const T upper_bound = NumericLimits<T>::max();
217
218 for (size_t i = 0; i < count; i++) {
219 char digit = string[i];
220 u8 digit_val;
221 if (value > (upper_bound >> 3))
222 return {};
223
224 if (digit >= '0' && digit <= '7') {
225 digit_val = digit - '0';
226 } else {
227 return {};
228 }
229
230 value = (value << 3) + digit_val;
231 }
232 return value;
233}
234
235template Optional<u8> convert_to_uint_from_octal(StringView str, TrimWhitespace);
236template Optional<u16> convert_to_uint_from_octal(StringView str, TrimWhitespace);
237template Optional<u32> convert_to_uint_from_octal(StringView str, TrimWhitespace);
238template Optional<u64> convert_to_uint_from_octal(StringView str, TrimWhitespace);
239
240#ifndef KERNEL
241template<typename T>
242Optional<T> convert_to_floating_point(StringView str, TrimWhitespace trim_whitespace)
243{
244 static_assert(IsSame<T, double> || IsSame<T, float>);
245 auto string = trim_whitespace == TrimWhitespace::Yes
246 ? str.trim_whitespace()
247 : str;
248
249 char const* start = string.characters_without_null_termination();
250 return parse_floating_point_completely<T>(start, start + str.length());
251}
252
253template Optional<double> convert_to_floating_point(StringView str, TrimWhitespace);
254template Optional<float> convert_to_floating_point(StringView str, TrimWhitespace);
255#endif
256
257bool equals_ignoring_ascii_case(StringView a, StringView b)
258{
259 if (a.length() != b.length())
260 return false;
261 for (size_t i = 0; i < a.length(); ++i) {
262 if (to_ascii_lowercase(a.characters_without_null_termination()[i]) != to_ascii_lowercase(b.characters_without_null_termination()[i]))
263 return false;
264 }
265 return true;
266}
267
268bool ends_with(StringView str, StringView end, CaseSensitivity case_sensitivity)
269{
270 if (end.is_empty())
271 return true;
272 if (str.is_empty())
273 return false;
274 if (end.length() > str.length())
275 return false;
276
277 if (case_sensitivity == CaseSensitivity::CaseSensitive)
278 return !memcmp(str.characters_without_null_termination() + (str.length() - end.length()), end.characters_without_null_termination(), end.length());
279
280 auto str_chars = str.characters_without_null_termination();
281 auto end_chars = end.characters_without_null_termination();
282
283 size_t si = str.length() - end.length();
284 for (size_t ei = 0; ei < end.length(); ++si, ++ei) {
285 if (to_ascii_lowercase(str_chars[si]) != to_ascii_lowercase(end_chars[ei]))
286 return false;
287 }
288 return true;
289}
290
291bool starts_with(StringView str, StringView start, CaseSensitivity case_sensitivity)
292{
293 if (start.is_empty())
294 return true;
295 if (str.is_empty())
296 return false;
297 if (start.length() > str.length())
298 return false;
299 if (str.characters_without_null_termination() == start.characters_without_null_termination())
300 return true;
301
302 if (case_sensitivity == CaseSensitivity::CaseSensitive)
303 return !memcmp(str.characters_without_null_termination(), start.characters_without_null_termination(), start.length());
304
305 auto str_chars = str.characters_without_null_termination();
306 auto start_chars = start.characters_without_null_termination();
307
308 size_t si = 0;
309 for (size_t starti = 0; starti < start.length(); ++si, ++starti) {
310 if (to_ascii_lowercase(str_chars[si]) != to_ascii_lowercase(start_chars[starti]))
311 return false;
312 }
313 return true;
314}
315
316bool contains(StringView str, StringView needle, CaseSensitivity case_sensitivity)
317{
318 if (str.is_null() || needle.is_null() || str.is_empty() || needle.length() > str.length())
319 return false;
320 if (needle.is_empty())
321 return true;
322 auto str_chars = str.characters_without_null_termination();
323 auto needle_chars = needle.characters_without_null_termination();
324 if (case_sensitivity == CaseSensitivity::CaseSensitive)
325 return memmem(str_chars, str.length(), needle_chars, needle.length()) != nullptr;
326
327 auto needle_first = to_ascii_lowercase(needle_chars[0]);
328 for (size_t si = 0; si < str.length(); si++) {
329 if (to_ascii_lowercase(str_chars[si]) != needle_first)
330 continue;
331 for (size_t ni = 0; si + ni < str.length(); ni++) {
332 if (to_ascii_lowercase(str_chars[si + ni]) != to_ascii_lowercase(needle_chars[ni])) {
333 if (ni > 0)
334 si += ni - 1;
335 break;
336 }
337 if (ni + 1 == needle.length())
338 return true;
339 }
340 }
341 return false;
342}
343
344bool is_whitespace(StringView str)
345{
346 return all_of(str, is_ascii_space);
347}
348
349StringView trim(StringView str, StringView characters, TrimMode mode)
350{
351 size_t substring_start = 0;
352 size_t substring_length = str.length();
353
354 if (mode == TrimMode::Left || mode == TrimMode::Both) {
355 for (size_t i = 0; i < str.length(); ++i) {
356 if (substring_length == 0)
357 return ""sv;
358 if (!characters.contains(str[i]))
359 break;
360 ++substring_start;
361 --substring_length;
362 }
363 }
364
365 if (mode == TrimMode::Right || mode == TrimMode::Both) {
366 for (size_t i = str.length(); i > 0; --i) {
367 if (substring_length == 0)
368 return ""sv;
369 if (!characters.contains(str[i - 1]))
370 break;
371 --substring_length;
372 }
373 }
374
375 return str.substring_view(substring_start, substring_length);
376}
377
378StringView trim_whitespace(StringView str, TrimMode mode)
379{
380 return trim(str, " \n\t\v\f\r"sv, mode);
381}
382
383Optional<size_t> find(StringView haystack, char needle, size_t start)
384{
385 if (start >= haystack.length())
386 return {};
387 for (size_t i = start; i < haystack.length(); ++i) {
388 if (haystack[i] == needle)
389 return i;
390 }
391 return {};
392}
393
394Optional<size_t> find(StringView haystack, StringView needle, size_t start)
395{
396 if (start > haystack.length())
397 return {};
398 auto index = AK::memmem_optional(
399 haystack.characters_without_null_termination() + start, haystack.length() - start,
400 needle.characters_without_null_termination(), needle.length());
401 return index.has_value() ? (*index + start) : index;
402}
403
404Optional<size_t> find_last(StringView haystack, char needle)
405{
406 for (size_t i = haystack.length(); i > 0; --i) {
407 if (haystack[i - 1] == needle)
408 return i - 1;
409 }
410 return {};
411}
412
413Optional<size_t> find_last(StringView haystack, StringView needle)
414{
415 for (size_t i = haystack.length(); i > 0; --i) {
416 auto value = StringUtils::find(haystack, needle, i - 1);
417 if (value.has_value())
418 return value;
419 }
420
421 return {};
422}
423
424Optional<size_t> find_last_not(StringView haystack, char needle)
425{
426 for (size_t i = haystack.length(); i > 0; --i) {
427 if (haystack[i - 1] != needle)
428 return i - 1;
429 }
430 return {};
431}
432
433Vector<size_t> find_all(StringView haystack, StringView needle)
434{
435 Vector<size_t> positions;
436 size_t current_position = 0;
437 while (current_position <= haystack.length()) {
438 auto maybe_position = AK::memmem_optional(
439 haystack.characters_without_null_termination() + current_position, haystack.length() - current_position,
440 needle.characters_without_null_termination(), needle.length());
441 if (!maybe_position.has_value())
442 break;
443 positions.append(current_position + *maybe_position);
444 current_position += *maybe_position + 1;
445 }
446 return positions;
447}
448
449Optional<size_t> find_any_of(StringView haystack, StringView needles, SearchDirection direction)
450{
451 if (haystack.is_empty() || needles.is_empty())
452 return {};
453 if (direction == SearchDirection::Forward) {
454 for (size_t i = 0; i < haystack.length(); ++i) {
455 if (needles.contains(haystack[i]))
456 return i;
457 }
458 } else if (direction == SearchDirection::Backward) {
459 for (size_t i = haystack.length(); i > 0; --i) {
460 if (needles.contains(haystack[i - 1]))
461 return i - 1;
462 }
463 }
464 return {};
465}
466
467#ifndef KERNEL
468DeprecatedString to_snakecase(StringView str)
469{
470 auto should_insert_underscore = [&](auto i, auto current_char) {
471 if (i == 0)
472 return false;
473 auto previous_ch = str[i - 1];
474 if (is_ascii_lower_alpha(previous_ch) && is_ascii_upper_alpha(current_char))
475 return true;
476 if (i >= str.length() - 1)
477 return false;
478 auto next_ch = str[i + 1];
479 if (is_ascii_upper_alpha(current_char) && is_ascii_lower_alpha(next_ch))
480 return true;
481 return false;
482 };
483
484 StringBuilder builder;
485 for (size_t i = 0; i < str.length(); ++i) {
486 auto ch = str[i];
487 if (should_insert_underscore(i, ch))
488 builder.append('_');
489 builder.append_as_lowercase(ch);
490 }
491 return builder.to_deprecated_string();
492}
493
494DeprecatedString to_titlecase(StringView str)
495{
496 StringBuilder builder;
497 bool next_is_upper = true;
498
499 for (auto ch : str) {
500 if (next_is_upper)
501 builder.append(to_ascii_uppercase(ch));
502 else
503 builder.append(to_ascii_lowercase(ch));
504 next_is_upper = ch == ' ';
505 }
506
507 return builder.to_deprecated_string();
508}
509
510DeprecatedString invert_case(StringView str)
511{
512 StringBuilder builder(str.length());
513
514 for (auto ch : str) {
515 if (is_ascii_lower_alpha(ch))
516 builder.append(to_ascii_uppercase(ch));
517 else
518 builder.append(to_ascii_lowercase(ch));
519 }
520
521 return builder.to_deprecated_string();
522}
523
524DeprecatedString replace(StringView str, StringView needle, StringView replacement, ReplaceMode replace_mode)
525{
526 if (str.is_empty())
527 return str;
528
529 Vector<size_t> positions;
530 if (replace_mode == ReplaceMode::All) {
531 positions = str.find_all(needle);
532 if (!positions.size())
533 return str;
534 } else {
535 auto pos = str.find(needle);
536 if (!pos.has_value())
537 return str;
538 positions.append(pos.value());
539 }
540
541 StringBuilder replaced_string;
542 size_t last_position = 0;
543 for (auto& position : positions) {
544 replaced_string.append(str.substring_view(last_position, position - last_position));
545 replaced_string.append(replacement);
546 last_position = position + needle.length();
547 }
548 replaced_string.append(str.substring_view(last_position, str.length() - last_position));
549 return replaced_string.to_deprecated_string();
550}
551
552ErrorOr<String> replace(String const& haystack, StringView needle, StringView replacement, ReplaceMode replace_mode)
553{
554 if (haystack.is_empty())
555 return haystack;
556
557 // FIXME: Propagate Vector allocation failures (or do this without putting positions in a vector)
558 Vector<size_t> positions;
559 if (replace_mode == ReplaceMode::All) {
560 positions = haystack.bytes_as_string_view().find_all(needle);
561 if (!positions.size())
562 return haystack;
563 } else {
564 auto pos = haystack.bytes_as_string_view().find(needle);
565 if (!pos.has_value())
566 return haystack;
567 positions.append(pos.value());
568 }
569
570 StringBuilder replaced_string;
571 size_t last_position = 0;
572 for (auto& position : positions) {
573 replaced_string.append(haystack.bytes_as_string_view().substring_view(last_position, position - last_position));
574 replaced_string.append(replacement);
575 last_position = position + needle.length();
576 }
577 replaced_string.append(haystack.bytes_as_string_view().substring_view(last_position, haystack.bytes_as_string_view().length() - last_position));
578 return replaced_string.to_string();
579}
580#endif
581
582// TODO: Benchmark against KMP (AK/MemMem.h) and switch over if it's faster for short strings too
583size_t count(StringView str, StringView needle)
584{
585 if (needle.is_empty())
586 return str.length();
587
588 size_t count = 0;
589 for (size_t i = 0; i < str.length() - needle.length() + 1; ++i) {
590 if (str.substring_view(i).starts_with(needle))
591 count++;
592 }
593 return count;
594}
595
596}
597
598}