a modern tui library written in zig
1const std = @import("std");
2const unicode = std.unicode;
3const testing = std.testing;
4const uucode = @import("uucode");
5
6/// the method to use when calculating the width of a grapheme
7pub const Method = enum {
8 unicode,
9 wcwidth,
10 no_zwj,
11};
12
13/// Calculate width from east asian width property and Unicode properties
14fn eawToWidth(cp: u21, eaw: uucode.types.EastAsianWidth) i16 {
15 // Based on wcwidth implementation
16 // Control characters
17 if (cp == 0) return 0;
18 if (cp < 32 or (cp >= 0x7f and cp < 0xa0)) return -1;
19
20 // Use general category for comprehensive zero-width detection
21 const gc = uucode.get(.general_category, cp);
22 switch (gc) {
23 .mark_nonspacing, .mark_enclosing => return 0,
24 else => {},
25 }
26
27 // Additional zero-width characters not covered by general category
28 if (cp == 0x00ad) return 0; // soft hyphen
29 if (cp == 0x200b) return 0; // zero-width space
30 if (cp == 0x200c) return 0; // zero-width non-joiner
31 if (cp == 0x200d) return 0; // zero-width joiner
32 if (cp == 0x2060) return 0; // word joiner
33 if (cp == 0x034f) return 0; // combining grapheme joiner
34 if (cp == 0xfeff) return 0; // zero-width no-break space (BOM)
35 if (cp >= 0x180b and cp <= 0x180d) return 0; // Mongolian variation selectors
36 if (cp >= 0xfe00 and cp <= 0xfe0f) return 0; // variation selectors
37 if (cp >= 0xe0100 and cp <= 0xe01ef) return 0; // Plane-14 variation selectors
38
39 // East Asian Width: fullwidth or wide = 2
40 // ambiguous in East Asian context = 2, otherwise 1
41 // halfwidth, narrow, or neutral = 1
42 return switch (eaw) {
43 .fullwidth, .wide => 2,
44 else => 1,
45 };
46}
47
48/// returns the width of the provided string, as measured by the method chosen
49pub fn gwidth(str: []const u8, method: Method) u16 {
50 switch (method) {
51 .unicode => {
52 var total: u16 = 0;
53 var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str));
54
55 var grapheme_start: usize = 0;
56 var prev_break: bool = true;
57
58 while (grapheme_iter.next()) |result| {
59 if (prev_break and !result.is_break) {
60 // Start of a new grapheme
61 const cp_len: usize = std.unicode.utf8CodepointSequenceLength(result.cp) catch 1;
62 grapheme_start = grapheme_iter.i - cp_len;
63 }
64
65 if (result.is_break) {
66 // End of a grapheme - calculate its width
67 const grapheme_end = grapheme_iter.i;
68 const grapheme_bytes = str[grapheme_start..grapheme_end];
69
70 // Calculate grapheme width
71 var g_iter = uucode.utf8.Iterator.init(grapheme_bytes);
72 var width: i16 = 0;
73 var has_emoji_vs: bool = false;
74 var has_text_vs: bool = false;
75 var has_emoji_presentation: bool = false;
76 var ri_count: u8 = 0;
77
78 while (g_iter.next()) |cp| {
79 // Check for emoji variation selector (U+FE0F)
80 if (cp == 0xfe0f) {
81 has_emoji_vs = true;
82 continue;
83 }
84
85 // Check for text variation selector (U+FE0E)
86 if (cp == 0xfe0e) {
87 has_text_vs = true;
88 continue;
89 }
90
91 // Check if this codepoint has emoji presentation
92 if (uucode.get(.is_emoji_presentation, cp)) {
93 has_emoji_presentation = true;
94 }
95
96 // Count regional indicators (for flag emojis)
97 if (cp >= 0x1F1E6 and cp <= 0x1F1FF) {
98 ri_count += 1;
99 }
100
101 const eaw = uucode.get(.east_asian_width, cp);
102 const w = eawToWidth(cp, eaw);
103 // Take max of non-zero widths
104 if (w > 0 and w > width) width = w;
105 }
106
107 // Handle variation selectors and emoji presentation
108 if (has_text_vs) {
109 // Text presentation explicit - keep width as-is (usually 1)
110 width = @max(1, width);
111 } else if (has_emoji_vs or has_emoji_presentation or ri_count == 2) {
112 // Emoji presentation or flag pair - force width 2
113 width = @max(2, width);
114 }
115
116 total += @max(0, width);
117
118 grapheme_start = grapheme_end;
119 }
120 prev_break = result.is_break;
121 }
122
123 return total;
124 },
125 .wcwidth => {
126 var total: u16 = 0;
127 var iter = uucode.utf8.Iterator.init(str);
128 while (iter.next()) |cp| {
129 const w: i16 = switch (cp) {
130 // undo an override in zg for emoji skintone selectors
131 0x1f3fb...0x1f3ff => 2,
132 else => blk: {
133 const eaw = uucode.get(.east_asian_width, cp);
134 break :blk eawToWidth(cp, eaw);
135 },
136 };
137 total += @intCast(@max(0, w));
138 }
139 return total;
140 },
141 .no_zwj => {
142 var iter = std.mem.splitSequence(u8, str, "\u{200D}");
143 var result: u16 = 0;
144 while (iter.next()) |s| {
145 result += gwidth(s, .unicode);
146 }
147 return result;
148 },
149 }
150}
151
152test "gwidth: a" {
153 try testing.expectEqual(1, gwidth("a", .unicode));
154 try testing.expectEqual(1, gwidth("a", .wcwidth));
155 try testing.expectEqual(1, gwidth("a", .no_zwj));
156}
157
158test "gwidth: emoji with ZWJ" {
159 try testing.expectEqual(2, gwidth("👩🚀", .unicode));
160 try testing.expectEqual(4, gwidth("👩🚀", .wcwidth));
161 try testing.expectEqual(4, gwidth("👩🚀", .no_zwj));
162}
163
164test "gwidth: emoji with VS16 selector" {
165 try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .unicode));
166 try testing.expectEqual(1, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .wcwidth));
167 try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .no_zwj));
168}
169
170test "gwidth: emoji with skin tone selector" {
171 try testing.expectEqual(2, gwidth("👋🏿", .unicode));
172 try testing.expectEqual(4, gwidth("👋🏿", .wcwidth));
173 try testing.expectEqual(2, gwidth("👋🏿", .no_zwj));
174}
175
176test "gwidth: zero-width space" {
177 try testing.expectEqual(0, gwidth("\u{200B}", .unicode));
178 try testing.expectEqual(0, gwidth("\u{200B}", .wcwidth));
179}
180
181test "gwidth: zero-width non-joiner" {
182 try testing.expectEqual(0, gwidth("\u{200C}", .unicode));
183 try testing.expectEqual(0, gwidth("\u{200C}", .wcwidth));
184}
185
186test "gwidth: combining marks" {
187 // Hebrew combining mark
188 try testing.expectEqual(0, gwidth("\u{05B0}", .unicode));
189 // Devanagari combining mark
190 try testing.expectEqual(0, gwidth("\u{093C}", .unicode));
191}
192
193test "gwidth: flag emoji (regional indicators)" {
194 // US flag 🇺🇸
195 try testing.expectEqual(2, gwidth("🇺🇸", .unicode));
196 // UK flag 🇬🇧
197 try testing.expectEqual(2, gwidth("🇬🇧", .unicode));
198}
199
200test "gwidth: text variation selector" {
201 // U+2764 (heavy black heart) + U+FE0E (text variation selector)
202 // Should be width 1 with text presentation
203 try testing.expectEqual(1, gwidth("❤︎", .unicode));
204}
205
206test "gwidth: keycap sequence" {
207 // Digit 1 + U+FE0F + U+20E3 (combining enclosing keycap)
208 // Should be width 2
209 try testing.expectEqual(2, gwidth("1️⃣", .unicode));
210}
211
212test "gwidth: base letter with combining mark" {
213 // 'a' + combining acute accent (NFD form)
214 // Should be width 1 (combining mark is zero-width)
215 try testing.expectEqual(1, gwidth("á", .unicode));
216}