Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v6.18-rc6 251 lines 6.3 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * ucs.c - Universal Character Set processing 4 */ 5 6#include <linux/array_size.h> 7#include <linux/bsearch.h> 8#include <linux/consolemap.h> 9#include <linux/minmax.h> 10 11struct ucs_interval16 { 12 u16 first; 13 u16 last; 14}; 15 16struct ucs_interval32 { 17 u32 first; 18 u32 last; 19}; 20 21#include "ucs_width_table.h" 22 23static int interval16_cmp(const void *key, const void *element) 24{ 25 u16 cp = *(u16 *)key; 26 const struct ucs_interval16 *entry = element; 27 28 if (cp < entry->first) 29 return -1; 30 if (cp > entry->last) 31 return 1; 32 return 0; 33} 34 35static int interval32_cmp(const void *key, const void *element) 36{ 37 u32 cp = *(u32 *)key; 38 const struct ucs_interval32 *entry = element; 39 40 if (cp < entry->first) 41 return -1; 42 if (cp > entry->last) 43 return 1; 44 return 0; 45} 46 47static bool cp_in_range16(u16 cp, const struct ucs_interval16 *ranges, size_t size) 48{ 49 if (cp < ranges[0].first || cp > ranges[size - 1].last) 50 return false; 51 52 return __inline_bsearch(&cp, ranges, size, sizeof(*ranges), 53 interval16_cmp) != NULL; 54} 55 56static bool cp_in_range32(u32 cp, const struct ucs_interval32 *ranges, size_t size) 57{ 58 if (cp < ranges[0].first || cp > ranges[size - 1].last) 59 return false; 60 61 return __inline_bsearch(&cp, ranges, size, sizeof(*ranges), 62 interval32_cmp) != NULL; 63} 64 65#define UCS_IS_BMP(cp) ((cp) <= 0xffff) 66 67/** 68 * ucs_is_zero_width() - Determine if a Unicode code point is zero-width. 69 * @cp: Unicode code point (UCS-4) 70 * 71 * Return: true if the character is zero-width, false otherwise 72 */ 73bool ucs_is_zero_width(u32 cp) 74{ 75 if (UCS_IS_BMP(cp)) 76 return cp_in_range16(cp, ucs_zero_width_bmp_ranges, 77 ARRAY_SIZE(ucs_zero_width_bmp_ranges)); 78 else 79 return cp_in_range32(cp, ucs_zero_width_non_bmp_ranges, 80 ARRAY_SIZE(ucs_zero_width_non_bmp_ranges)); 81} 82 83/** 84 * ucs_is_double_width() - Determine if a Unicode code point is double-width. 85 * @cp: Unicode code point (UCS-4) 86 * 87 * Return: true if the character is double-width, false otherwise 88 */ 89bool ucs_is_double_width(u32 cp) 90{ 91 if (UCS_IS_BMP(cp)) 92 return cp_in_range16(cp, ucs_double_width_bmp_ranges, 93 ARRAY_SIZE(ucs_double_width_bmp_ranges)); 94 else 95 return cp_in_range32(cp, ucs_double_width_non_bmp_ranges, 96 ARRAY_SIZE(ucs_double_width_non_bmp_ranges)); 97} 98 99/* 100 * Structure for base with combining mark pairs and resulting recompositions. 101 * Using u16 to save space since all values are within BMP range. 102 */ 103struct ucs_recomposition { 104 u16 base; /* base character */ 105 u16 mark; /* combining mark */ 106 u16 recomposed; /* corresponding recomposed character */ 107}; 108 109#include "ucs_recompose_table.h" 110 111struct compare_key { 112 u16 base; 113 u16 mark; 114}; 115 116static int recomposition_cmp(const void *key, const void *element) 117{ 118 const struct compare_key *search_key = key; 119 const struct ucs_recomposition *entry = element; 120 121 /* Compare base character first */ 122 if (search_key->base < entry->base) 123 return -1; 124 if (search_key->base > entry->base) 125 return 1; 126 127 /* Base characters match, now compare combining character */ 128 if (search_key->mark < entry->mark) 129 return -1; 130 if (search_key->mark > entry->mark) 131 return 1; 132 133 /* Both match */ 134 return 0; 135} 136 137/** 138 * ucs_recompose() - Attempt to recompose two Unicode characters into a single character. 139 * @base: Base Unicode code point (UCS-4) 140 * @mark: Combining mark Unicode code point (UCS-4) 141 * 142 * Return: Recomposed Unicode code point, or 0 if no recomposition is possible 143 */ 144u32 ucs_recompose(u32 base, u32 mark) 145{ 146 /* Check if characters are within the range of our table */ 147 if (base < UCS_RECOMPOSE_MIN_BASE || base > UCS_RECOMPOSE_MAX_BASE || 148 mark < UCS_RECOMPOSE_MIN_MARK || mark > UCS_RECOMPOSE_MAX_MARK) 149 return 0; 150 151 struct compare_key key = { base, mark }; 152 struct ucs_recomposition *result = 153 __inline_bsearch(&key, ucs_recomposition_table, 154 ARRAY_SIZE(ucs_recomposition_table), 155 sizeof(*ucs_recomposition_table), 156 recomposition_cmp); 157 158 return result ? result->recomposed : 0; 159} 160 161/* 162 * The fallback table structures implement a 2-level lookup. 163 */ 164 165struct ucs_page_desc { 166 u8 page; /* Page index (high byte of code points) */ 167 u8 count; /* Number of entries in this page */ 168 u16 start; /* Start index in entries array */ 169}; 170 171struct ucs_page_entry { 172 u8 offset; /* Offset within page (0-255) */ 173 u8 fallback; /* Fallback character or range start marker */ 174}; 175 176#include "ucs_fallback_table.h" 177 178static int ucs_page_desc_cmp(const void *key, const void *element) 179{ 180 u8 page = *(u8 *)key; 181 const struct ucs_page_desc *entry = element; 182 183 if (page < entry->page) 184 return -1; 185 if (page > entry->page) 186 return 1; 187 return 0; 188} 189 190static int ucs_page_entry_cmp(const void *key, const void *element) 191{ 192 u8 offset = *(u8 *)key; 193 const struct ucs_page_entry *entry = element; 194 195 if (offset < entry->offset) 196 return -1; 197 if (entry->fallback == UCS_PAGE_ENTRY_RANGE_MARKER) { 198 if (offset > entry[1].offset) 199 return 1; 200 } else { 201 if (offset > entry->offset) 202 return 1; 203 } 204 return 0; 205} 206 207/** 208 * ucs_get_fallback() - Get a substitution for the provided Unicode character 209 * @cp: Unicode code point (UCS-4) 210 * 211 * Get a simpler fallback character for the provided Unicode character. 212 * This is used for terminal display when corresponding glyph is unavailable. 213 * The substitution may not be as good as the actual glyph for the original 214 * character but still way more helpful than a squared question mark. 215 * 216 * Return: Fallback Unicode code point, or 0 if none is available 217 */ 218u32 ucs_get_fallback(u32 cp) 219{ 220 const struct ucs_page_desc *page; 221 const struct ucs_page_entry *entry; 222 u8 page_idx = cp >> 8, offset = cp; 223 224 if (!UCS_IS_BMP(cp)) 225 return 0; 226 227 /* 228 * Full-width to ASCII mapping (covering all printable ASCII 33-126) 229 * 0xFF01 (!) to 0xFF5E (~) -> ASCII 33 (!) to 126 (~) 230 * We process them programmatically to reduce the table size. 231 */ 232 if (cp >= 0xFF01 && cp <= 0xFF5E) 233 return cp - 0xFF01 + 33; 234 235 page = __inline_bsearch(&page_idx, ucs_fallback_pages, 236 ARRAY_SIZE(ucs_fallback_pages), 237 sizeof(*ucs_fallback_pages), 238 ucs_page_desc_cmp); 239 if (!page) 240 return 0; 241 242 entry = __inline_bsearch(&offset, ucs_fallback_entries + page->start, 243 page->count, sizeof(*ucs_fallback_entries), 244 ucs_page_entry_cmp); 245 if (!entry) 246 return 0; 247 248 if (entry->fallback == UCS_PAGE_ENTRY_RANGE_MARKER) 249 entry++; 250 return entry->fallback; 251}