Serenity Operating System
at master 126 lines 4.7 kB view raw
1/* 2 * Copyright (c) 2022-2023, Tim Flynn <trflynn89@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/CharacterTypes.h> 8#include <AK/Utf32View.h> 9#include <AK/Utf8View.h> 10#include <LibUnicode/CharacterTypes.h> 11#include <LibUnicode/Emoji.h> 12 13#if ENABLE_UNICODE_DATA 14# include <LibUnicode/UnicodeData.h> 15#endif 16 17namespace Unicode { 18 19Optional<Emoji> __attribute__((weak)) find_emoji_for_code_points(ReadonlySpan<u32>) { return {}; } 20 21#if ENABLE_UNICODE_DATA 22 23// https://unicode.org/reports/tr51/#def_emoji_core_sequence 24static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional<u32> const& next_code_point, SequenceType type) 25{ 26 // emoji_core_sequence := emoji_character | emoji_presentation_sequence | emoji_keycap_sequence | emoji_modifier_sequence | emoji_flag_sequence 27 28 static constexpr auto emoji_presentation_selector = 0xFE0Fu; 29 static constexpr auto combining_enclosing_keycap = 0x20E3u; 30 static constexpr auto zero_width_joiner = 0x200Du; 31 32 // https://unicode.org/reports/tr51/#def_emoji_keycap_sequence 33 // emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3} 34 if (is_ascii_digit(code_point) || code_point == '#' || code_point == '*') 35 return next_code_point == emoji_presentation_selector || next_code_point == combining_enclosing_keycap; 36 37 // A little non-standard, but all other ASCII code points are not the beginning of any emoji sequence. 38 if (is_ascii(code_point)) 39 return false; 40 41 // https://unicode.org/reports/tr51/#def_emoji_character 42 switch (type) { 43 case SequenceType::Any: 44 if (code_point_has_property(code_point, Property::Emoji)) 45 return true; 46 break; 47 case SequenceType::EmojiPresentation: 48 if (code_point_has_property(code_point, Property::Emoji_Presentation)) 49 return true; 50 if (next_code_point == zero_width_joiner && code_point_has_property(code_point, Property::Emoji)) 51 return true; 52 break; 53 } 54 55 // https://unicode.org/reports/tr51/#def_emoji_presentation_sequence 56 // emoji_presentation_sequence := emoji_character emoji_presentation_selector 57 if (next_code_point == emoji_presentation_selector) 58 return true; 59 60 // https://unicode.org/reports/tr51/#def_emoji_modifier_sequence 61 // emoji_modifier_sequence := emoji_modifier_base emoji_modifier 62 if (code_point_has_property(code_point, Property::Emoji_Modifier_Base)) 63 return true; 64 65 // https://unicode.org/reports/tr51/#def_emoji_flag_sequence 66 // emoji_flag_sequence := regional_indicator regional_indicator 67 if (code_point_has_property(code_point, Property::Regional_Indicator)) 68 return true; 69 70 return false; 71} 72 73static bool could_be_start_of_serenity_emoji(u32 code_point) 74{ 75 // We use Supplementary Private Use Area-B for custom Serenity emoji, starting at U+10CD00. 76 static constexpr auto first_custom_serenity_emoji_code_point = 0x10CD00u; 77 78 return code_point >= first_custom_serenity_emoji_code_point; 79} 80 81#endif 82 83// https://unicode.org/reports/tr51/#def_emoji_sequence 84template<typename CodePointIterator> 85static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it, [[maybe_unused]] SequenceType type) 86{ 87 // emoji_sequence := emoji_core_sequence | emoji_zwj_sequence | emoji_tag_sequence 88 89 if (it.done()) 90 return false; 91 92#if ENABLE_UNICODE_DATA 93 // The purpose of this method is to quickly filter out code points that cannot be the start of 94 // an emoji. The emoji_core_sequence definition alone captures the start of all possible 95 // emoji_zwj_sequence and emoji_tag_sequence emojis, because: 96 // 97 // * emoji_zwj_sequence must begin with emoji_zwj_element, which is: 98 // emoji_zwj_element := emoji_core_sequence | emoji_tag_sequence 99 // 100 // * emoji_tag_sequence must begin with tag_base, which is: 101 // tag_base := emoji_character | emoji_modifier_sequence | emoji_presentation_sequence 102 // Note that this is a subset of emoji_core_sequence. 103 auto code_point = *it; 104 auto next_code_point = it.peek(1); 105 106 if (could_be_start_of_emoji_core_sequence(code_point, next_code_point, type)) 107 return true; 108 if (could_be_start_of_serenity_emoji(code_point)) 109 return true; 110 return false; 111#else 112 return true; 113#endif 114} 115 116bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const& it, SequenceType type) 117{ 118 return could_be_start_of_emoji_sequence_impl(it, type); 119} 120 121bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const& it, SequenceType type) 122{ 123 return could_be_start_of_emoji_sequence_impl(it, type); 124} 125 126}