Serenity Operating System
1/*
2 * Copyright (c) 2022-2023, Tim Flynn <trflynn89@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/CharacterTypes.h>
8#include <AK/Utf32View.h>
9#include <AK/Utf8View.h>
10#include <LibUnicode/CharacterTypes.h>
11#include <LibUnicode/Emoji.h>
12
13#if ENABLE_UNICODE_DATA
14# include <LibUnicode/UnicodeData.h>
15#endif
16
17namespace Unicode {
18
19Optional<Emoji> __attribute__((weak)) find_emoji_for_code_points(ReadonlySpan<u32>) { return {}; }
20
21#if ENABLE_UNICODE_DATA
22
23// https://unicode.org/reports/tr51/#def_emoji_core_sequence
24static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional<u32> const& next_code_point, SequenceType type)
25{
26 // emoji_core_sequence := emoji_character | emoji_presentation_sequence | emoji_keycap_sequence | emoji_modifier_sequence | emoji_flag_sequence
27
28 static constexpr auto emoji_presentation_selector = 0xFE0Fu;
29 static constexpr auto combining_enclosing_keycap = 0x20E3u;
30 static constexpr auto zero_width_joiner = 0x200Du;
31
32 // https://unicode.org/reports/tr51/#def_emoji_keycap_sequence
33 // emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3}
34 if (is_ascii_digit(code_point) || code_point == '#' || code_point == '*')
35 return next_code_point == emoji_presentation_selector || next_code_point == combining_enclosing_keycap;
36
37 // A little non-standard, but all other ASCII code points are not the beginning of any emoji sequence.
38 if (is_ascii(code_point))
39 return false;
40
41 // https://unicode.org/reports/tr51/#def_emoji_character
42 switch (type) {
43 case SequenceType::Any:
44 if (code_point_has_property(code_point, Property::Emoji))
45 return true;
46 break;
47 case SequenceType::EmojiPresentation:
48 if (code_point_has_property(code_point, Property::Emoji_Presentation))
49 return true;
50 if (next_code_point == zero_width_joiner && code_point_has_property(code_point, Property::Emoji))
51 return true;
52 break;
53 }
54
55 // https://unicode.org/reports/tr51/#def_emoji_presentation_sequence
56 // emoji_presentation_sequence := emoji_character emoji_presentation_selector
57 if (next_code_point == emoji_presentation_selector)
58 return true;
59
60 // https://unicode.org/reports/tr51/#def_emoji_modifier_sequence
61 // emoji_modifier_sequence := emoji_modifier_base emoji_modifier
62 if (code_point_has_property(code_point, Property::Emoji_Modifier_Base))
63 return true;
64
65 // https://unicode.org/reports/tr51/#def_emoji_flag_sequence
66 // emoji_flag_sequence := regional_indicator regional_indicator
67 if (code_point_has_property(code_point, Property::Regional_Indicator))
68 return true;
69
70 return false;
71}
72
73static bool could_be_start_of_serenity_emoji(u32 code_point)
74{
75 // We use Supplementary Private Use Area-B for custom Serenity emoji, starting at U+10CD00.
76 static constexpr auto first_custom_serenity_emoji_code_point = 0x10CD00u;
77
78 return code_point >= first_custom_serenity_emoji_code_point;
79}
80
81#endif
82
83// https://unicode.org/reports/tr51/#def_emoji_sequence
84template<typename CodePointIterator>
85static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it, [[maybe_unused]] SequenceType type)
86{
87 // emoji_sequence := emoji_core_sequence | emoji_zwj_sequence | emoji_tag_sequence
88
89 if (it.done())
90 return false;
91
92#if ENABLE_UNICODE_DATA
93 // The purpose of this method is to quickly filter out code points that cannot be the start of
94 // an emoji. The emoji_core_sequence definition alone captures the start of all possible
95 // emoji_zwj_sequence and emoji_tag_sequence emojis, because:
96 //
97 // * emoji_zwj_sequence must begin with emoji_zwj_element, which is:
98 // emoji_zwj_element := emoji_core_sequence | emoji_tag_sequence
99 //
100 // * emoji_tag_sequence must begin with tag_base, which is:
101 // tag_base := emoji_character | emoji_modifier_sequence | emoji_presentation_sequence
102 // Note that this is a subset of emoji_core_sequence.
103 auto code_point = *it;
104 auto next_code_point = it.peek(1);
105
106 if (could_be_start_of_emoji_core_sequence(code_point, next_code_point, type))
107 return true;
108 if (could_be_start_of_serenity_emoji(code_point))
109 return true;
110 return false;
111#else
112 return true;
113#endif
114}
115
116bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const& it, SequenceType type)
117{
118 return could_be_start_of_emoji_sequence_impl(it, type);
119}
120
121bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const& it, SequenceType type)
122{
123 return could_be_start_of_emoji_sequence_impl(it, type);
124}
125
126}