Serenity Operating System
1/*
2 * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#pragma once
8
9#include <AK/Assertions.h>
10#include <AK/Checked.h>
11#include <AK/EnumBits.h>
12#include <AK/Forward.h>
13#include <AK/Optional.h>
14#include <AK/Span.h>
15#include <AK/StdLibExtras.h>
16#include <AK/StringHash.h>
17#include <AK/StringUtils.h>
18
19namespace AK {
20
21class StringView {
22public:
23 ALWAYS_INLINE constexpr StringView() = default;
24 ALWAYS_INLINE constexpr StringView(char const* characters, size_t length)
25 : m_characters(characters)
26 , m_length(length)
27 {
28 if (!is_constant_evaluated())
29 VERIFY(!Checked<uintptr_t>::addition_would_overflow(reinterpret_cast<uintptr_t>(characters), length));
30 }
31 ALWAYS_INLINE StringView(unsigned char const* characters, size_t length)
32 : m_characters(reinterpret_cast<char const*>(characters))
33 , m_length(length)
34 {
35 VERIFY(!Checked<uintptr_t>::addition_would_overflow(reinterpret_cast<uintptr_t>(characters), length));
36 }
37 ALWAYS_INLINE StringView(ReadonlyBytes bytes)
38 : m_characters(reinterpret_cast<char const*>(bytes.data()))
39 , m_length(bytes.size())
40 {
41 }
42
43 // Note: This is here for Jakt.
44 ALWAYS_INLINE static StringView from_string_literal(StringView string)
45 {
46 return string;
47 }
48
49 StringView(ByteBuffer const&);
50#ifndef KERNEL
51 StringView(String const&);
52 StringView(FlyString const&);
53 StringView(DeprecatedString const&);
54 StringView(DeprecatedFlyString const&);
55#endif
56
57 explicit StringView(ByteBuffer&&) = delete;
58#ifndef KERNEL
59 explicit StringView(String&&) = delete;
60 explicit StringView(FlyString&&) = delete;
61 explicit StringView(DeprecatedString&&) = delete;
62 explicit StringView(DeprecatedFlyString&&) = delete;
63#endif
64
65 template<OneOf<String, FlyString, DeprecatedString, DeprecatedFlyString, ByteBuffer> StringType>
66 StringView& operator=(StringType&&) = delete;
67
68 [[nodiscard]] constexpr bool is_null() const
69 {
70 return m_characters == nullptr;
71 }
72 [[nodiscard]] constexpr bool is_empty() const { return m_length == 0; }
73
74 [[nodiscard]] constexpr char const* characters_without_null_termination() const { return m_characters; }
75 [[nodiscard]] constexpr size_t length() const { return m_length; }
76
77 [[nodiscard]] ReadonlyBytes bytes() const { return { m_characters, m_length }; }
78
79 constexpr char const& operator[](size_t index) const
80 {
81 if (!is_constant_evaluated())
82 VERIFY(index < m_length);
83 return m_characters[index];
84 }
85
86 using ConstIterator = SimpleIterator<const StringView, char const>;
87
88 [[nodiscard]] constexpr ConstIterator begin() const { return ConstIterator::begin(*this); }
89 [[nodiscard]] constexpr ConstIterator end() const { return ConstIterator::end(*this); }
90
91 [[nodiscard]] constexpr unsigned hash() const
92 {
93 if (is_empty())
94 return 0;
95 return string_hash(characters_without_null_termination(), length());
96 }
97
98 [[nodiscard]] bool starts_with(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
99 [[nodiscard]] bool ends_with(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
100 [[nodiscard]] bool starts_with(char) const;
101 [[nodiscard]] bool ends_with(char) const;
102 [[nodiscard]] bool matches(StringView mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
103 [[nodiscard]] bool matches(StringView mask, Vector<MaskSpan>&, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
104 [[nodiscard]] bool contains(char) const;
105 [[nodiscard]] bool contains(u32) const;
106 [[nodiscard]] bool contains(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
107 [[nodiscard]] bool equals_ignoring_ascii_case(StringView) const;
108
109 [[nodiscard]] StringView trim(StringView characters, TrimMode mode = TrimMode::Both) const { return StringUtils::trim(*this, characters, mode); }
110 [[nodiscard]] StringView trim_whitespace(TrimMode mode = TrimMode::Both) const { return StringUtils::trim_whitespace(*this, mode); }
111
112#ifndef KERNEL
113 [[nodiscard]] DeprecatedString to_lowercase_string() const;
114 [[nodiscard]] DeprecatedString to_uppercase_string() const;
115 [[nodiscard]] DeprecatedString to_titlecase_string() const;
116#endif
117
118 [[nodiscard]] Optional<size_t> find(char needle, size_t start = 0) const
119 {
120 return StringUtils::find(*this, needle, start);
121 }
122 [[nodiscard]] Optional<size_t> find(StringView needle, size_t start = 0) const { return StringUtils::find(*this, needle, start); }
123 [[nodiscard]] Optional<size_t> find_last(char needle) const { return StringUtils::find_last(*this, needle); }
124 [[nodiscard]] Optional<size_t> find_last(StringView needle) const { return StringUtils::find_last(*this, needle); }
125 [[nodiscard]] Optional<size_t> find_last_not(char needle) const { return StringUtils::find_last_not(*this, needle); }
126
127 [[nodiscard]] Vector<size_t> find_all(StringView needle) const;
128
129 using SearchDirection = StringUtils::SearchDirection;
130 [[nodiscard]] Optional<size_t> find_any_of(StringView needles, SearchDirection direction = SearchDirection::Forward) const { return StringUtils::find_any_of(*this, needles, direction); }
131
132 [[nodiscard]] constexpr StringView substring_view(size_t start, size_t length) const
133 {
134 if (!is_constant_evaluated())
135 VERIFY(start + length <= m_length);
136 return { m_characters + start, length };
137 }
138
139 [[nodiscard]] constexpr StringView substring_view(size_t start) const
140 {
141 if (!is_constant_evaluated())
142 VERIFY(start <= length());
143 return substring_view(start, length() - start);
144 }
145
146 [[nodiscard]] Vector<StringView> split_view(char, SplitBehavior = SplitBehavior::Nothing) const;
147 [[nodiscard]] Vector<StringView> split_view(StringView, SplitBehavior = SplitBehavior::Nothing) const;
148
149 [[nodiscard]] Vector<StringView> split_view_if(Function<bool(char)> const& predicate, SplitBehavior = SplitBehavior::Nothing) const;
150
151 [[nodiscard]] StringView find_last_split_view(char separator) const
152 {
153 auto begin = find_last(separator);
154 if (!begin.has_value())
155 return *this;
156 return substring_view(begin.release_value() + 1);
157 }
158
159 [[nodiscard]] StringView find_first_split_view(char separator) const
160 {
161 auto needle_begin = find(separator);
162 if (!needle_begin.has_value())
163 return *this;
164 return substring_view(0, needle_begin.release_value());
165 }
166
167 template<typename Callback>
168 auto for_each_split_view(char separator, SplitBehavior split_behavior, Callback callback) const
169 {
170 StringView seperator_view { &separator, 1 };
171 return for_each_split_view(seperator_view, split_behavior, callback);
172 }
173
174 template<typename Callback>
175 auto for_each_split_view(StringView separator, SplitBehavior split_behavior, Callback callback) const
176 {
177 VERIFY(!separator.is_empty());
178 // FIXME: This can't go in the template header since declval won't allow the incomplete StringView type.
179 using CallbackReturn = decltype(declval<Callback>()(StringView {}));
180 constexpr auto ReturnsErrorOr = IsSpecializationOf<CallbackReturn, ErrorOr>;
181 using ReturnType = Conditional<ReturnsErrorOr, ErrorOr<void>, void>;
182 return [&]() -> ReturnType {
183 if (is_empty())
184 return ReturnType();
185
186 StringView view { *this };
187 auto maybe_separator_index = find(separator);
188 bool keep_empty = has_flag(split_behavior, SplitBehavior::KeepEmpty);
189 bool keep_separator = has_flag(split_behavior, SplitBehavior::KeepTrailingSeparator);
190 while (maybe_separator_index.has_value()) {
191 auto separator_index = maybe_separator_index.value();
192 auto part_with_separator = view.substring_view(0, separator_index + separator.length());
193 if (keep_empty || separator_index > 0) {
194 auto part = part_with_separator;
195 if (!keep_separator)
196 part = part_with_separator.substring_view(0, separator_index);
197 if constexpr (ReturnsErrorOr)
198 TRY(callback(part));
199 else
200 callback(part);
201 }
202 view = view.substring_view_starting_after_substring(part_with_separator);
203 maybe_separator_index = view.find(separator);
204 }
205 if (keep_empty || !view.is_empty()) {
206 if constexpr (ReturnsErrorOr)
207 TRY(callback(view));
208 else
209 callback(view);
210 }
211
212 return ReturnType();
213 }();
214 }
215
216 // Create a Vector of StringViews split by line endings. As of CommonMark
217 // 0.29, the spec defines a line ending as "a newline (U+000A), a carriage
218 // return (U+000D) not followed by a newline, or a carriage return and a
219 // following newline.".
220 [[nodiscard]] Vector<StringView> lines(bool consider_cr = true) const;
221
222 template<typename T = int>
223 Optional<T> to_int() const;
224 template<typename T = unsigned>
225 Optional<T> to_uint() const;
226#ifndef KERNEL
227 Optional<double> to_double(TrimWhitespace trim_whitespace = TrimWhitespace::Yes) const;
228 Optional<float> to_float(TrimWhitespace trim_whitespace = TrimWhitespace::Yes) const;
229#endif
230
231 // Create a new substring view of this string view, starting either at the beginning of
232 // the given substring view, or after its end, and continuing until the end of this string
233 // view (that is, for the remaining part of its length). For example,
234 //
235 // StringView str { "foobar" };
236 // StringView substr = str.substring_view(1, 2); // "oo"
237 // StringView substr_from = str.substring_view_starting_from_substring(subst); // "oobar"
238 // StringView substr_after = str.substring_view_starting_after_substring(subst); // "bar"
239 //
240 // Note that this only works if the string view passed as an argument is indeed a substring
241 // view of this string view, such as one created by substring_view() and split_view(). It
242 // does not work for arbitrary strings; for example declaring substr in the example above as
243 //
244 // StringView substr { "oo" };
245 //
246 // would not work.
247 [[nodiscard]] StringView substring_view_starting_from_substring(StringView substring) const;
248 [[nodiscard]] StringView substring_view_starting_after_substring(StringView substring) const;
249
250 [[nodiscard]] bool copy_characters_to_buffer(char* buffer, size_t buffer_size) const;
251
252 constexpr bool operator==(char const* cstring) const
253 {
254 if (is_null())
255 return cstring == nullptr;
256 if (!cstring)
257 return false;
258 // NOTE: `m_characters` is not guaranteed to be null-terminated, but `cstring` is.
259 char const* cp = cstring;
260 for (size_t i = 0; i < m_length; ++i) {
261 if (*cp == '\0')
262 return false;
263 if (m_characters[i] != *(cp++))
264 return false;
265 }
266 return *cp == '\0';
267 }
268
269 constexpr bool operator==(char const c) const
270 {
271 return m_length == 1 && *m_characters == c;
272 }
273
274#ifndef KERNEL
275 bool operator==(DeprecatedString const&) const;
276#endif
277
278 [[nodiscard]] constexpr int compare(StringView other) const
279 {
280 if (m_characters == nullptr)
281 return other.m_characters ? -1 : 0;
282
283 if (other.m_characters == nullptr)
284 return 1;
285
286 size_t rlen = min(m_length, other.m_length);
287 int c = __builtin_memcmp(m_characters, other.m_characters, rlen);
288 if (c == 0) {
289 if (length() < other.length())
290 return -1;
291 if (length() == other.length())
292 return 0;
293 return 1;
294 }
295 return c;
296 }
297
298 constexpr bool operator==(StringView other) const
299 {
300 return length() == other.length() && compare(other) == 0;
301 }
302
303 constexpr bool operator!=(StringView other) const
304 {
305 return length() != other.length() || compare(other) != 0;
306 }
307
308 constexpr bool operator<(StringView other) const { return compare(other) < 0; }
309
310 constexpr bool operator<=(StringView other) const { return compare(other) <= 0; }
311
312 constexpr bool operator>(StringView other) const { return compare(other) > 0; }
313
314 constexpr bool operator>=(StringView other) const { return compare(other) >= 0; }
315
316#ifndef KERNEL
317 [[nodiscard]] DeprecatedString to_deprecated_string() const;
318#endif
319
320 [[nodiscard]] bool is_whitespace() const
321 {
322 return StringUtils::is_whitespace(*this);
323 }
324
325#ifndef KERNEL
326 [[nodiscard]] DeprecatedString replace(StringView needle, StringView replacement, ReplaceMode) const;
327#endif
328 [[nodiscard]] size_t count(StringView needle) const
329 {
330 return StringUtils::count(*this, needle);
331 }
332
333 template<typename... Ts>
334 [[nodiscard]] ALWAYS_INLINE constexpr bool is_one_of(Ts&&... strings) const
335 {
336 return (... || this->operator==(forward<Ts>(strings)));
337 }
338
339 template<typename... Ts>
340 [[nodiscard]] ALWAYS_INLINE constexpr bool is_one_of_ignoring_ascii_case(Ts&&... strings) const
341 {
342 return (... ||
343 [this, &strings]() -> bool {
344 if constexpr (requires(Ts a) { a.view()->StringView; })
345 return this->equals_ignoring_ascii_case(forward<Ts>(strings.view()));
346 else
347 return this->equals_ignoring_ascii_case(forward<Ts>(strings));
348 }());
349 }
350
351private:
352 friend class DeprecatedString;
353 char const* m_characters { nullptr };
354 size_t m_length { 0 };
355};
356
357template<>
358struct Traits<StringView> : public GenericTraits<StringView> {
359 static unsigned hash(StringView s) { return s.hash(); }
360};
361
362// FIXME: Rename this to indicate that it's about ASCII-only case insensitivity.
363struct CaseInsensitiveStringViewTraits : public Traits<StringView> {
364 static unsigned hash(StringView s)
365 {
366 if (s.is_empty())
367 return 0;
368 return case_insensitive_string_hash(s.characters_without_null_termination(), s.length());
369 }
370 static bool equals(StringView const& a, StringView const& b) { return a.equals_ignoring_ascii_case(b); }
371};
372
373}
374
375// FIXME: Remove this when clang fully supports consteval (specifically in the context of default parameter initialization).
376// See: https://stackoverflow.com/questions/68789984/immediate-function-as-default-function-argument-initializer-in-clang
377#if defined(AK_COMPILER_CLANG)
378# define AK_STRING_VIEW_LITERAL_CONSTEVAL constexpr
379#else
380# define AK_STRING_VIEW_LITERAL_CONSTEVAL consteval
381#endif
382
383[[nodiscard]] ALWAYS_INLINE AK_STRING_VIEW_LITERAL_CONSTEVAL AK::StringView operator"" sv(char const* cstring, size_t length)
384{
385 return AK::StringView(cstring, length);
386}
387
388#if USING_AK_GLOBALLY
389using AK::CaseInsensitiveStringViewTraits;
390using AK::StringView;
391#endif