a modern tui library written in zig
at main 8.1 kB view raw
1const std = @import("std"); 2const unicode = std.unicode; 3const testing = std.testing; 4const uucode = @import("uucode"); 5 6/// the method to use when calculating the width of a grapheme 7pub const Method = enum { 8 unicode, 9 wcwidth, 10 no_zwj, 11}; 12 13/// Calculate width from east asian width property and Unicode properties 14fn eawToWidth(cp: u21, eaw: uucode.types.EastAsianWidth) i16 { 15 // Based on wcwidth implementation 16 // Control characters 17 if (cp == 0) return 0; 18 if (cp < 32 or (cp >= 0x7f and cp < 0xa0)) return -1; 19 20 // Use general category for comprehensive zero-width detection 21 const gc = uucode.get(.general_category, cp); 22 switch (gc) { 23 .mark_nonspacing, .mark_enclosing => return 0, 24 else => {}, 25 } 26 27 // Additional zero-width characters not covered by general category 28 if (cp == 0x00ad) return 0; // soft hyphen 29 if (cp == 0x200b) return 0; // zero-width space 30 if (cp == 0x200c) return 0; // zero-width non-joiner 31 if (cp == 0x200d) return 0; // zero-width joiner 32 if (cp == 0x2060) return 0; // word joiner 33 if (cp == 0x034f) return 0; // combining grapheme joiner 34 if (cp == 0xfeff) return 0; // zero-width no-break space (BOM) 35 if (cp >= 0x180b and cp <= 0x180d) return 0; // Mongolian variation selectors 36 if (cp >= 0xfe00 and cp <= 0xfe0f) return 0; // variation selectors 37 if (cp >= 0xe0100 and cp <= 0xe01ef) return 0; // Plane-14 variation selectors 38 39 // East Asian Width: fullwidth or wide = 2 40 // ambiguous in East Asian context = 2, otherwise 1 41 // halfwidth, narrow, or neutral = 1 42 return switch (eaw) { 43 .fullwidth, .wide => 2, 44 else => 1, 45 }; 46} 47 48/// returns the width of the provided string, as measured by the method chosen 49pub fn gwidth(str: []const u8, method: Method) u16 { 50 switch (method) { 51 .unicode => { 52 var total: u16 = 0; 53 var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 54 55 var grapheme_start: usize = 0; 56 var prev_break: bool = true; 57 58 while (grapheme_iter.next()) |result| { 59 if (prev_break and !result.is_break) { 60 // Start of a new grapheme 61 const cp_len: usize = std.unicode.utf8CodepointSequenceLength(result.cp) catch 1; 62 grapheme_start = grapheme_iter.i - cp_len; 63 } 64 65 if (result.is_break) { 66 // End of a grapheme - calculate its width 67 const grapheme_end = grapheme_iter.i; 68 const grapheme_bytes = str[grapheme_start..grapheme_end]; 69 70 // Calculate grapheme width 71 var g_iter = uucode.utf8.Iterator.init(grapheme_bytes); 72 var width: i16 = 0; 73 var has_emoji_vs: bool = false; 74 var has_text_vs: bool = false; 75 var has_emoji_presentation: bool = false; 76 var ri_count: u8 = 0; 77 78 while (g_iter.next()) |cp| { 79 // Check for emoji variation selector (U+FE0F) 80 if (cp == 0xfe0f) { 81 has_emoji_vs = true; 82 continue; 83 } 84 85 // Check for text variation selector (U+FE0E) 86 if (cp == 0xfe0e) { 87 has_text_vs = true; 88 continue; 89 } 90 91 // Check if this codepoint has emoji presentation 92 if (uucode.get(.is_emoji_presentation, cp)) { 93 has_emoji_presentation = true; 94 } 95 96 // Count regional indicators (for flag emojis) 97 if (cp >= 0x1F1E6 and cp <= 0x1F1FF) { 98 ri_count += 1; 99 } 100 101 const eaw = uucode.get(.east_asian_width, cp); 102 const w = eawToWidth(cp, eaw); 103 // Take max of non-zero widths 104 if (w > 0 and w > width) width = w; 105 } 106 107 // Handle variation selectors and emoji presentation 108 if (has_text_vs) { 109 // Text presentation explicit - keep width as-is (usually 1) 110 width = @max(1, width); 111 } else if (has_emoji_vs or has_emoji_presentation or ri_count == 2) { 112 // Emoji presentation or flag pair - force width 2 113 width = @max(2, width); 114 } 115 116 total += @max(0, width); 117 118 grapheme_start = grapheme_end; 119 } 120 prev_break = result.is_break; 121 } 122 123 return total; 124 }, 125 .wcwidth => { 126 var total: u16 = 0; 127 var iter = uucode.utf8.Iterator.init(str); 128 while (iter.next()) |cp| { 129 const w: i16 = switch (cp) { 130 // undo an override in zg for emoji skintone selectors 131 0x1f3fb...0x1f3ff => 2, 132 else => blk: { 133 const eaw = uucode.get(.east_asian_width, cp); 134 break :blk eawToWidth(cp, eaw); 135 }, 136 }; 137 total += @intCast(@max(0, w)); 138 } 139 return total; 140 }, 141 .no_zwj => { 142 var iter = std.mem.splitSequence(u8, str, "\u{200D}"); 143 var result: u16 = 0; 144 while (iter.next()) |s| { 145 result += gwidth(s, .unicode); 146 } 147 return result; 148 }, 149 } 150} 151 152test "gwidth: a" { 153 try testing.expectEqual(1, gwidth("a", .unicode)); 154 try testing.expectEqual(1, gwidth("a", .wcwidth)); 155 try testing.expectEqual(1, gwidth("a", .no_zwj)); 156} 157 158test "gwidth: emoji with ZWJ" { 159 try testing.expectEqual(2, gwidth("👩‍🚀", .unicode)); 160 try testing.expectEqual(4, gwidth("👩‍🚀", .wcwidth)); 161 try testing.expectEqual(4, gwidth("👩‍🚀", .no_zwj)); 162} 163 164test "gwidth: emoji with VS16 selector" { 165 try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .unicode)); 166 try testing.expectEqual(1, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .wcwidth)); 167 try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .no_zwj)); 168} 169 170test "gwidth: emoji with skin tone selector" { 171 try testing.expectEqual(2, gwidth("👋🏿", .unicode)); 172 try testing.expectEqual(4, gwidth("👋🏿", .wcwidth)); 173 try testing.expectEqual(2, gwidth("👋🏿", .no_zwj)); 174} 175 176test "gwidth: zero-width space" { 177 try testing.expectEqual(0, gwidth("\u{200B}", .unicode)); 178 try testing.expectEqual(0, gwidth("\u{200B}", .wcwidth)); 179} 180 181test "gwidth: zero-width non-joiner" { 182 try testing.expectEqual(0, gwidth("\u{200C}", .unicode)); 183 try testing.expectEqual(0, gwidth("\u{200C}", .wcwidth)); 184} 185 186test "gwidth: combining marks" { 187 // Hebrew combining mark 188 try testing.expectEqual(0, gwidth("\u{05B0}", .unicode)); 189 // Devanagari combining mark 190 try testing.expectEqual(0, gwidth("\u{093C}", .unicode)); 191} 192 193test "gwidth: flag emoji (regional indicators)" { 194 // US flag 🇺🇸 195 try testing.expectEqual(2, gwidth("🇺🇸", .unicode)); 196 // UK flag 🇬🇧 197 try testing.expectEqual(2, gwidth("🇬🇧", .unicode)); 198} 199 200test "gwidth: text variation selector" { 201 // U+2764 (heavy black heart) + U+FE0E (text variation selector) 202 // Should be width 1 with text presentation 203 try testing.expectEqual(1, gwidth("❤︎", .unicode)); 204} 205 206test "gwidth: keycap sequence" { 207 // Digit 1 + U+FE0F + U+20E3 (combining enclosing keycap) 208 // Should be width 2 209 try testing.expectEqual(2, gwidth("1️⃣", .unicode)); 210} 211 212test "gwidth: base letter with combining mark" { 213 // 'a' + combining acute accent (NFD form) 214 // Should be width 1 (combining mark is zero-width) 215 try testing.expectEqual(1, gwidth("á", .unicode)); 216}