commit 81732720221779056739e5d7fca997c69fb8b751 · rockorager.dev/libvaxis

+349

MIGRATION_ZG_TO_UUCODE.md

··· 1 + # Migration Plan: zg → uucode 2 + 3 + ## Overview 4 + 5 + This document outlines the plan to migrate from the `zg` dependency to `uucode` for grapheme segmentation and display width measurement in libvaxis. 6 + 7 + ## Key Advantage 8 + 9 + **No allocation required** - uucode uses compile-time lookup tables instead of runtime-allocated data structures, eliminating the need to initialize, pass around, and deinitialize Unicode data. 10 + 11 + ## Current zg Usage 12 + 13 + ### Dependencies (from build.zig) 14 + - `code_point` - UTF-8 codepoint iteration 15 + - `Graphemes` - Grapheme cluster segmentation 16 + - `DisplayWidth` - Display width calculation 17 + 18 + ### Files Using zg 19 + - `src/main.zig` - Re-exports `Graphemes` and `DisplayWidth` 20 + - `src/Unicode.zig` - Wrapper around zg data (allocates) 21 + - `src/gwidth.zig` - Width calculation using `DisplayWidth` 22 + - `src/Parser.zig` - Uses `code_point` and `Graphemes` 23 + - `src/Loop.zig` - Uses `Graphemes` 24 + - `src/widgets/TextView.zig` - Uses `Graphemes` and `DisplayWidth` 25 + - `src/widgets/terminal/Terminal.zig` - Uses `code_point` and `DisplayWidth` 26 + 27 + ### Allocation Pattern (zg) 28 + ```zig 29 + // Initialize with allocator 30 + const graphemes = try Graphemes.init(alloc); 31 + defer graphemes.deinit(alloc); 32 + 33 + const width_data = try DisplayWidth.init(alloc); 34 + defer width_data.deinit(alloc); 35 + 36 + // Use 37 + var iter = graphemes.iterator(str); 38 + const width = width_data.codePointWidth(cp); 39 + ``` 40 + 41 + ## uucode API 42 + 43 + ### Available Modules 44 + - `uucode.utf8.Iterator` - UTF-8 codepoint iteration (no allocation) 45 + - `uucode.grapheme.Iterator` - Grapheme cluster iteration (no allocation) 46 + - `uucode.get()` - Compile-time Unicode property lookup (no allocation) 47 + 48 + ### Usage Pattern (uucode) 49 + ```zig 50 + // UTF-8 iteration 51 + var cp_iter = uucode.utf8.Iterator.init(str); 52 + while (cp_iter.next()) |cp| { 53 + // process codepoint 54 + } 55 + 56 + // Grapheme iteration 57 + var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 58 + while (grapheme_iter.next()) |result| { 59 + // result.cp is the codepoint 60 + // result.is_break indicates grapheme boundary 61 + } 62 + 63 + // Width lookup (requires wcwidth field in build config) 64 + const width = uucode.get(.wcwidth, cp); 65 + 66 + // Grapheme width (from uucode.x extension) 67 + const g_width = uucode.x.grapheme.unverifiedWcwidth(grapheme_iter); 68 + ``` 69 + 70 + ### Iterator Result Structure 71 + ```zig 72 + pub const IteratorResult = struct { 73 + cp: u21, // The codepoint 74 + is_break: bool, // true if this is a grapheme cluster boundary 75 + }; 76 + ``` 77 + 78 + ## Migration Steps 79 + 80 + ### 1. Update build.zig.zon 81 + 82 + Add `wcwidth` field to uucode dependency configuration: 83 + 84 + ```zig 85 + .uucode = .{ 86 + .url = "git+https://github.com/jacobsandlund/uucode#5f05f8f83a75caea201f12cc8ea32a2d82ea9732", 87 + .hash = "uucode-0.1.0-ZZjBPj96QADXyt5sqwBJUnhaDYs_qBeeKijZvlRa0eqM", 88 + }, 89 + ``` 90 + 91 + ### 2. Update build.zig 92 + 93 + In the uucode dependency configuration, update the fields array: 94 + 95 + ```zig 96 + const uucode_dep = b.dependency("uucode", .{ 97 + .target = target, 98 + .optimize = optimize, 99 + .fields = @as([]const []const u8, &.{ 100 + "grapheme_break", 101 + "wcwidth", // ADD THIS 102 + }), 103 + }); 104 + ``` 105 + 106 + Remove zg dependency: 107 + - Delete the `zg_dep` declaration 108 + - Remove all `zg_dep.module()` references 109 + - Remove `.zg` from build.zig.zon 110 + 111 + ### 3. Update Module Imports in build.zig 112 + 113 + Replace: 114 + ```zig 115 + vaxis_mod.addImport("code_point", zg_dep.module("code_point")); 116 + vaxis_mod.addImport("Graphemes", zg_dep.module("Graphemes")); 117 + vaxis_mod.addImport("DisplayWidth", zg_dep.module("DisplayWidth")); 118 + ``` 119 + 120 + No replacement needed - uucode is already imported. 121 + 122 + ### 4. Update src/main.zig 123 + 124 + Remove: 125 + ```zig 126 + pub const DisplayWidth = @import("DisplayWidth"); 127 + pub const Graphemes = @import("Graphemes"); 128 + ``` 129 + 130 + These become internal implementation details or are removed entirely. 131 + 132 + ### 5. Update src/Unicode.zig 133 + 134 + **Before:** 135 + ```zig 136 + const Graphemes = @import("Graphemes"); 137 + const DisplayWidth = @import("DisplayWidth"); 138 + 139 + const Unicode = @This(); 140 + 141 + width_data: DisplayWidth, 142 + 143 + pub fn init(alloc: std.mem.Allocator) !Unicode { 144 + return .{ 145 + .width_data = try DisplayWidth.init(alloc), 146 + }; 147 + } 148 + 149 + pub fn deinit(self: *const Unicode, alloc: std.mem.Allocator) void { 150 + self.width_data.deinit(alloc); 151 + } 152 + 153 + pub fn graphemeIterator(self: *const Unicode, str: []const u8) Graphemes.Iterator { 154 + return self.width_data.graphemes.iterator(str); 155 + } 156 + ``` 157 + 158 + **After:** 159 + ```zig 160 + const uucode = @import("uucode"); 161 + 162 + const Unicode = @This(); 163 + 164 + // No fields needed - all operations are stateless 165 + 166 + pub fn init(alloc: std.mem.Allocator) !Unicode { 167 + _ = alloc; 168 + return .{}; 169 + } 170 + 171 + pub fn deinit(self: *const Unicode, alloc: std.mem.Allocator) void { 172 + _ = self; 173 + _ = alloc; 174 + } 175 + 176 + pub fn graphemeIterator(self: *const Unicode, str: []const u8) uucode.grapheme.Iterator(uucode.utf8.Iterator) { 177 + _ = self; 178 + return uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 179 + } 180 + ``` 181 + 182 + Or consider removing the `Unicode` wrapper entirely since it no longer serves a purpose. 183 + 184 + ### 6. Update src/gwidth.zig 185 + 186 + **Before:** 187 + ```zig 188 + const DisplayWidth = @import("DisplayWidth"); 189 + const code_point = @import("code_point"); 190 + 191 + pub fn gwidth(str: []const u8, method: Method, data: *const DisplayWidth) u16 { 192 + switch (method) { 193 + .unicode => { 194 + return @intCast(data.strWidth(str)); 195 + }, 196 + .wcwidth => { 197 + var total: u16 = 0; 198 + var iter: code_point.Iterator = .{ .bytes = str }; 199 + while (iter.next()) |cp| { 200 + const w: u16 = switch (cp.code) { 201 + 0x1f3fb...0x1f3ff => 2, 202 + else => @max(0, data.codePointWidth(cp.code)), 203 + }; 204 + total += w; 205 + } 206 + return total; 207 + }, 208 + // ... 209 + } 210 + } 211 + ``` 212 + 213 + **After:** 214 + ```zig 215 + const uucode = @import("uucode"); 216 + 217 + pub fn gwidth(str: []const u8, method: Method) u16 { 218 + switch (method) { 219 + .unicode => { 220 + var total: u16 = 0; 221 + var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 222 + while (grapheme_iter.next()) |result| { 223 + if (result.is_break) { 224 + // Calculate width for previous grapheme 225 + // This requires buffering the grapheme - may need different approach 226 + } 227 + } 228 + return total; 229 + }, 230 + .wcwidth => { 231 + var total: u16 = 0; 232 + var iter = uucode.utf8.Iterator.init(str); 233 + while (iter.next()) |cp| { 234 + const w: u16 = switch (cp) { 235 + 0x1f3fb...0x1f3ff => 2, 236 + else => @max(0, uucode.get(.wcwidth, cp)), 237 + }; 238 + total += w; 239 + } 240 + return total; 241 + }, 242 + // ... 243 + } 244 + } 245 + ``` 246 + 247 + Note: Remove the `data` parameter entirely. 248 + 249 + ### 7. Update src/Parser.zig 250 + 251 + Replace: 252 + ```zig 253 + const code_point = @import("code_point"); 254 + const Graphemes = @import("Graphemes"); 255 + ``` 256 + 257 + With: 258 + ```zig 259 + const uucode = @import("uucode"); 260 + ``` 261 + 262 + Replace: 263 + ```zig 264 + grapheme_data: *const Graphemes, 265 + ``` 266 + 267 + With: 268 + ```zig 269 + // Remove this field entirely if only used for iteration 270 + ``` 271 + 272 + Replace usage: 273 + ```zig 274 + var iter: code_point.Iterator = .{ .bytes = input }; 275 + ``` 276 + 277 + With: 278 + ```zig 279 + var iter = uucode.utf8.Iterator.init(input); 280 + ``` 281 + 282 + ### 8. Update Other Files 283 + 284 + Apply similar transformations to: 285 + - `src/Loop.zig` 286 + - `src/widgets/TextView.zig` 287 + - `src/widgets/terminal/Terminal.zig` 288 + 289 + Pattern: 290 + 1. Replace imports with `const uucode = @import("uucode");` 291 + 2. Remove allocated data fields 292 + 3. Replace `code_point.Iterator` with `uucode.utf8.Iterator` 293 + 4. Replace `graphemes.iterator()` with `uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str))` 294 + 5. Replace `data.codePointWidth(cp)` with `uucode.get(.wcwidth, cp)` 295 + 296 + ### 9. Update Tests 297 + 298 + All test code that does: 299 + ```zig 300 + const data = try DisplayWidth.init(alloc); 301 + defer data.deinit(alloc); 302 + ``` 303 + 304 + Can be removed entirely. Width lookups become: 305 + ```zig 306 + const width = uucode.get(.wcwidth, cp); 307 + ``` 308 + 309 + ## API Mapping Reference 310 + 311 + | zg API | uucode API | 312 + |--------|------------| 313 + | `code_point.Iterator{ .bytes = str }` | `uucode.utf8.Iterator.init(str)` | 314 + | `iter.next().code` | `iter.next()` (returns u21 directly) | 315 + | `Graphemes.init(alloc)` | _(no initialization needed)_ | 316 + | `graphemes.iterator(str)` | `uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str))` | 317 + | `DisplayWidth.init(alloc)` | _(no initialization needed)_ | 318 + | `width_data.codePointWidth(cp)` | `uucode.get(.wcwidth, cp)` | 319 + | `width_data.strWidth(str)` | _(implement using iterator + uucode.get)_ | 320 + 321 + ## Benefits 322 + 323 + 1. **No allocations** - All Unicode data is compile-time generated 324 + 2. **Simpler API** - No init/deinit lifecycle 325 + 3. **Less state to manage** - No data structures to pass around 326 + 4. **Smaller binary** - Only requested fields are included 327 + 5. **Type-safe lookups** - Field names are compile-time checked 328 + 329 + ## Potential Challenges 330 + 331 + 1. **String width calculation** - zg's `strWidth()` is convenient; need to implement equivalent using iterator 332 + 2. **Grapheme-aware width** - May need `uucode.x.grapheme.unverifiedWcwidth()` for proper emoji/ZWJ handling 333 + 3. **Iterator API differences** - zg returns struct with `.code`, uucode returns `u21` directly 334 + 4. **Breaking API changes** - Any public APIs exposing `Graphemes` or `DisplayWidth` types will need updates 335 + 336 + ## Testing Strategy 337 + 338 + 1. Run existing tests with uucode implementation 339 + 2. Pay special attention to: 340 + - Emoji with ZWJ sequences 341 + - Skin tone modifiers 342 + - Variation selectors 343 + - Complex grapheme clusters 344 + 3. Compare width calculations with zg implementation 345 + 4. Test memory usage (should be lower without allocations) 346 + 347 + ## Rollback Plan 348 + 349 + If issues arise, the zg dependency can be re-added to build.zig.zon and the imports restored. The changes are isolated to a small number of files.

+11 -9

build.zig

··· 6 6 const root_source_file = b.path("src/main.zig"); 7 7 8 8 // Dependencies 9 - const zg_dep = b.dependency("zg", .{ 9 + const zigimg_dep = b.dependency("zigimg", .{ 10 10 .optimize = optimize, 11 11 .target = target, 12 12 }); 13 - const zigimg_dep = b.dependency("zigimg", .{ 14 - .optimize = optimize, 13 + const uucode_dep = b.dependency("uucode", .{ 15 14 .target = target, 15 + .optimize = optimize, 16 + .fields = @as([]const []const u8, &.{ 17 + "east_asian_width", 18 + "grapheme_break", 19 + "general_category", 20 + "is_emoji_presentation", 21 + }), 16 22 }); 17 23 18 24 // Module ··· 21 27 .target = target, 22 28 .optimize = optimize, 23 29 }); 24 - vaxis_mod.addImport("code_point", zg_dep.module("code_point")); 25 - vaxis_mod.addImport("Graphemes", zg_dep.module("Graphemes")); 26 - vaxis_mod.addImport("DisplayWidth", zg_dep.module("DisplayWidth")); 27 30 vaxis_mod.addImport("zigimg", zigimg_dep.module("zigimg")); 31 + vaxis_mod.addImport("uucode", uucode_dep.module("uucode")); 28 32 29 33 // Examples 30 34 const Example = enum { ··· 69 73 .target = target, 70 74 .optimize = optimize, 71 75 .imports = &.{ 72 - .{ .name = "code_point", .module = zg_dep.module("code_point") }, 73 - .{ .name = "Graphemes", .module = zg_dep.module("Graphemes") }, 74 - .{ .name = "DisplayWidth", .module = zg_dep.module("DisplayWidth") }, 75 76 .{ .name = "zigimg", .module = zigimg_dep.module("zigimg") }, 77 + .{ .name = "uucode", .module = uucode_dep.module("uucode") }, 76 78 }, 77 79 }), 78 80 });

+3 -4

build.zig.zon

··· 9 9 .url = "https://github.com/ivanstepanovftw/zigimg/archive/d7b7ab0ba0899643831ef042bd73289510b39906.tar.gz", 10 10 .hash = "zigimg-0.1.0-8_eo2vHnEwCIVW34Q14Ec-xUlzIoVg86-7FU2ypPtxms", 11 11 }, 12 - .zg = .{ 13 - // Upstream PR: https://codeberg.org/atman/zg/pulls/90/ 14 - .url = "https://codeberg.org/chaten/zg/archive/749197a3f9d25e211615960c02380a3d659b20f9.tar.gz", 15 - .hash = "zg-0.15.1-oGqU3M0-tALZCy7boQS86znlBloyKx6--JriGlY0Paa9", 12 + .uucode = .{ 13 + .url = "git+https://github.com/jacobsandlund/uucode#5f05f8f83a75caea201f12cc8ea32a2d82ea9732", 14 + .hash = "uucode-0.1.0-ZZjBPj96QADXyt5sqwBJUnhaDYs_qBeeKijZvlRa0eqM", 16 15 }, 17 16 }, 18 17 .paths = .{

+2 -10

src/Loop.zig

··· 1 1 const std = @import("std"); 2 2 const builtin = @import("builtin"); 3 3 4 - const Graphemes = @import("Graphemes"); 5 - 6 4 const GraphemeCache = @import("GraphemeCache.zig"); 7 5 const Parser = @import("Parser.zig"); 8 6 const Queue = @import("queue.zig").Queue; ··· 47 45 if (self.thread) |_| return; 48 46 self.thread = try std.Thread.spawn(.{}, Self.ttyRun, .{ 49 47 self, 50 - &self.vaxis.unicode.width_data.graphemes, 51 48 self.vaxis.opts.system_clipboard_allocator, 52 49 }); 53 50 } ··· 107 104 /// read input from the tty. This is run in a separate thread 108 105 fn ttyRun( 109 106 self: *Self, 110 - grapheme_data: *const Graphemes, 111 107 paste_allocator: ?std.mem.Allocator, 112 108 ) !void { 113 109 // Return early if we're in test mode to avoid infinite loops ··· 118 114 119 115 switch (builtin.os.tag) { 120 116 .windows => { 121 - var parser: Parser = .{ 122 - .grapheme_data = grapheme_data, 123 - }; 117 + var parser: Parser = .{}; 124 118 while (!self.should_quit) { 125 119 const event = try self.tty.nextEvent(&parser, paste_allocator); 126 120 try handleEventGeneric(self, self.vaxis, &cache, Event, event, null); ··· 133 127 self.postEvent(.{ .winsize = winsize }); 134 128 } 135 129 136 - var parser: Parser = .{ 137 - .grapheme_data = grapheme_data, 138 - }; 130 + var parser: Parser = .{}; 139 131 140 132 // initialize the read buffer 141 133 var buf: [1024]u8 = undefined;

+124 -87

src/Parser.zig

··· 4 4 const Event = @import("event.zig").Event; 5 5 const Key = @import("Key.zig"); 6 6 const Mouse = @import("Mouse.zig"); 7 - const code_point = @import("code_point"); 8 - const Graphemes = @import("Graphemes"); 7 + const uucode = @import("uucode"); 9 8 const Winsize = @import("main.zig").Winsize; 10 9 11 10 const log = std.log.scoped(.vaxis_parser); ··· 45 44 // a buffer to temporarily store text in. We need this to encode 46 45 // text-as-codepoints 47 46 buf: [128]u8 = undefined, 48 - 49 - grapheme_data: *const Graphemes, 50 47 51 48 /// Parse the first event from the input buffer. If a completion event is not 52 49 /// present, Result.event will be null and Result.n will be 0 ··· 78 75 }; 79 76 }, 80 77 } 81 - } else return parseGround(input, self.grapheme_data); 78 + } else return parseGround(input); 82 79 } 83 80 84 81 /// Parse ground state 85 - inline fn parseGround(input: []const u8, data: *const Graphemes) !Result { 82 + inline fn parseGround(input: []const u8) !Result { 86 83 std.debug.assert(input.len > 0); 87 84 88 85 const b = input[0]; ··· 109 106 }, 110 107 0x7F => .{ .codepoint = Key.backspace }, 111 108 else => blk: { 112 - var iter: code_point.Iterator = .{ .bytes = input }; 109 + var iter = uucode.utf8.Iterator.init(input); 113 110 // return null if we don't have a valid codepoint 114 - const cp = iter.next() orelse return error.InvalidUTF8; 111 + const first_cp = iter.next() orelse return error.InvalidUTF8; 115 112 116 - n = cp.len; 113 + n = std.unicode.utf8CodepointSequenceLength(first_cp) catch return error.InvalidUTF8; 117 114 118 115 // Check if we have a multi-codepoint grapheme 119 - var code = cp.code; 120 - var g_state: Graphemes.IterState = .{}; 121 - var prev_cp = code; 122 - while (iter.next()) |next_cp| { 123 - if (Graphemes.graphemeBreak(prev_cp, next_cp.code, data, &g_state)) { 116 + var code = first_cp; 117 + var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(input)); 118 + var grapheme_len: usize = 0; 119 + var cp_count: usize = 0; 120 + 121 + while (grapheme_iter.next()) |result| { 122 + cp_count += 1; 123 + if (result.is_break) { 124 + // Found the first grapheme boundary 125 + grapheme_len = grapheme_iter.i; 124 126 break; 125 127 } 126 - prev_cp = next_cp.code; 127 - code = Key.multicodepoint; 128 - n += next_cp.len; 128 + } 129 + 130 + if (grapheme_len > 0) { 131 + n = grapheme_len; 132 + if (cp_count > 1) { 133 + code = Key.multicodepoint; 134 + } 129 135 } 130 136 131 137 break :blk .{ .codepoint = code, .text = input[0..n] }; ··· 731 737 732 738 test "parse: single xterm keypress" { 733 739 const alloc = testing.allocator_instance.allocator(); 734 - const grapheme_data = try Graphemes.init(alloc); 735 - defer grapheme_data.deinit(alloc); 736 740 const input = "a"; 737 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 741 + var parser: Parser = .{}; 738 742 const result = try parser.parse(input, alloc); 739 743 const expected_key: Key = .{ 740 744 .codepoint = 'a', ··· 748 752 749 753 test "parse: single xterm keypress backspace" { 750 754 const alloc = testing.allocator_instance.allocator(); 751 - const grapheme_data = try Graphemes.init(alloc); 752 - defer grapheme_data.deinit(alloc); 753 755 const input = "\x08"; 754 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 756 + var parser: Parser = .{}; 755 757 const result = try parser.parse(input, alloc); 756 758 const expected_key: Key = .{ 757 759 .codepoint = Key.backspace, ··· 764 766 765 767 test "parse: single xterm keypress with more buffer" { 766 768 const alloc = testing.allocator_instance.allocator(); 767 - const grapheme_data = try Graphemes.init(alloc); 768 - defer grapheme_data.deinit(alloc); 769 769 const input = "ab"; 770 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 770 + var parser: Parser = .{}; 771 771 const result = try parser.parse(input, alloc); 772 772 const expected_key: Key = .{ 773 773 .codepoint = 'a', ··· 782 782 783 783 test "parse: xterm escape keypress" { 784 784 const alloc = testing.allocator_instance.allocator(); 785 - const grapheme_data = try Graphemes.init(alloc); 786 - defer grapheme_data.deinit(alloc); 787 785 const input = "\x1b"; 788 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 786 + var parser: Parser = .{}; 789 787 const result = try parser.parse(input, alloc); 790 788 const expected_key: Key = .{ .codepoint = Key.escape }; 791 789 const expected_event: Event = .{ .key_press = expected_key }; ··· 796 794 797 795 test "parse: xterm ctrl+a" { 798 796 const alloc = testing.allocator_instance.allocator(); 799 - const grapheme_data = try Graphemes.init(alloc); 800 - defer grapheme_data.deinit(alloc); 801 797 const input = "\x01"; 802 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 798 + var parser: Parser = .{}; 803 799 const result = try parser.parse(input, alloc); 804 800 const expected_key: Key = .{ .codepoint = 'a', .mods = .{ .ctrl = true } }; 805 801 const expected_event: Event = .{ .key_press = expected_key }; ··· 810 806 811 807 test "parse: xterm alt+a" { 812 808 const alloc = testing.allocator_instance.allocator(); 813 - const grapheme_data = try Graphemes.init(alloc); 814 - defer grapheme_data.deinit(alloc); 815 809 const input = "\x1ba"; 816 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 810 + var parser: Parser = .{}; 817 811 const result = try parser.parse(input, alloc); 818 812 const expected_key: Key = .{ .codepoint = 'a', .mods = .{ .alt = true } }; 819 813 const expected_event: Event = .{ .key_press = expected_key }; ··· 824 818 825 819 test "parse: xterm key up" { 826 820 const alloc = testing.allocator_instance.allocator(); 827 - const grapheme_data = try Graphemes.init(alloc); 828 - defer grapheme_data.deinit(alloc); 829 821 { 830 822 // normal version 831 823 const input = "\x1b[A"; 832 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 824 + var parser: Parser = .{}; 833 825 const result = try parser.parse(input, alloc); 834 826 const expected_key: Key = .{ .codepoint = Key.up }; 835 827 const expected_event: Event = .{ .key_press = expected_key }; ··· 841 833 { 842 834 // application keys version 843 835 const input = "\x1bOA"; 844 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 836 + var parser: Parser = .{}; 845 837 const result = try parser.parse(input, alloc); 846 838 const expected_key: Key = .{ .codepoint = Key.up }; 847 839 const expected_event: Event = .{ .key_press = expected_key }; ··· 853 845 854 846 test "parse: xterm shift+up" { 855 847 const alloc = testing.allocator_instance.allocator(); 856 - const grapheme_data = try Graphemes.init(alloc); 857 - defer grapheme_data.deinit(alloc); 858 848 const input = "\x1b[1;2A"; 859 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 849 + var parser: Parser = .{}; 860 850 const result = try parser.parse(input, alloc); 861 851 const expected_key: Key = .{ .codepoint = Key.up, .mods = .{ .shift = true } }; 862 852 const expected_event: Event = .{ .key_press = expected_key }; ··· 867 857 868 858 test "parse: xterm insert" { 869 859 const alloc = testing.allocator_instance.allocator(); 870 - const grapheme_data = try Graphemes.init(alloc); 871 - defer grapheme_data.deinit(alloc); 872 860 const input = "\x1b[2~"; 873 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 861 + var parser: Parser = .{}; 874 862 const result = try parser.parse(input, alloc); 875 863 const expected_key: Key = .{ .codepoint = Key.insert, .mods = .{} }; 876 864 const expected_event: Event = .{ .key_press = expected_key }; ··· 881 869 882 870 test "parse: paste_start" { 883 871 const alloc = testing.allocator_instance.allocator(); 884 - const grapheme_data = try Graphemes.init(alloc); 885 - defer grapheme_data.deinit(alloc); 886 872 const input = "\x1b[200~"; 887 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 873 + var parser: Parser = .{}; 888 874 const result = try parser.parse(input, alloc); 889 875 const expected_event: Event = .paste_start; 890 876 ··· 894 880 895 881 test "parse: paste_end" { 896 882 const alloc = testing.allocator_instance.allocator(); 897 - const grapheme_data = try Graphemes.init(alloc); 898 - defer grapheme_data.deinit(alloc); 899 883 const input = "\x1b[201~"; 900 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 884 + var parser: Parser = .{}; 901 885 const result = try parser.parse(input, alloc); 902 886 const expected_event: Event = .paste_end; 903 887 ··· 907 891 908 892 test "parse: osc52 paste" { 909 893 const alloc = testing.allocator_instance.allocator(); 910 - const grapheme_data = try Graphemes.init(alloc); 911 - defer grapheme_data.deinit(alloc); 912 894 const input = "\x1b]52;c;b3NjNTIgcGFzdGU=\x1b\\"; 913 895 const expected_text = "osc52 paste"; 914 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 896 + var parser: Parser = .{}; 915 897 const result = try parser.parse(input, alloc); 916 898 917 899 try testing.expectEqual(25, result.n); ··· 926 908 927 909 test "parse: focus_in" { 928 910 const alloc = testing.allocator_instance.allocator(); 929 - const grapheme_data = try Graphemes.init(alloc); 930 - defer grapheme_data.deinit(alloc); 931 911 const input = "\x1b[I"; 932 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 912 + var parser: Parser = .{}; 933 913 const result = try parser.parse(input, alloc); 934 914 const expected_event: Event = .focus_in; 935 915 ··· 939 919 940 920 test "parse: focus_out" { 941 921 const alloc = testing.allocator_instance.allocator(); 942 - const grapheme_data = try Graphemes.init(alloc); 943 - defer grapheme_data.deinit(alloc); 944 922 const input = "\x1b[O"; 945 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 923 + var parser: Parser = .{}; 946 924 const result = try parser.parse(input, alloc); 947 925 const expected_event: Event = .focus_out; 948 926 ··· 952 930 953 931 test "parse: kitty: shift+a without text reporting" { 954 932 const alloc = testing.allocator_instance.allocator(); 955 - const grapheme_data = try Graphemes.init(alloc); 956 - defer grapheme_data.deinit(alloc); 957 933 const input = "\x1b[97:65;2u"; 958 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 934 + var parser: Parser = .{}; 959 935 const result = try parser.parse(input, alloc); 960 936 const expected_key: Key = .{ 961 937 .codepoint = 'a', ··· 971 947 972 948 test "parse: kitty: alt+shift+a without text reporting" { 973 949 const alloc = testing.allocator_instance.allocator(); 974 - const grapheme_data = try Graphemes.init(alloc); 975 - defer grapheme_data.deinit(alloc); 976 950 const input = "\x1b[97:65;4u"; 977 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 951 + var parser: Parser = .{}; 978 952 const result = try parser.parse(input, alloc); 979 953 const expected_key: Key = .{ 980 954 .codepoint = 'a', ··· 989 963 990 964 test "parse: kitty: a without text reporting" { 991 965 const alloc = testing.allocator_instance.allocator(); 992 - const grapheme_data = try Graphemes.init(alloc); 993 - defer grapheme_data.deinit(alloc); 994 966 const input = "\x1b[97u"; 995 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 967 + var parser: Parser = .{}; 996 968 const result = try parser.parse(input, alloc); 997 969 const expected_key: Key = .{ 998 970 .codepoint = 'a', ··· 1005 977 1006 978 test "parse: kitty: release event" { 1007 979 const alloc = testing.allocator_instance.allocator(); 1008 - const grapheme_data = try Graphemes.init(alloc); 1009 - defer grapheme_data.deinit(alloc); 1010 980 const input = "\x1b[97;1:3u"; 1011 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 981 + var parser: Parser = .{}; 1012 982 const result = try parser.parse(input, alloc); 1013 983 const expected_key: Key = .{ 1014 984 .codepoint = 'a', ··· 1021 991 1022 992 test "parse: single codepoint" { 1023 993 const alloc = testing.allocator_instance.allocator(); 1024 - const grapheme_data = try Graphemes.init(alloc); 1025 - defer grapheme_data.deinit(alloc); 1026 994 const input = "🙂"; 1027 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 995 + var parser: Parser = .{}; 1028 996 const result = try parser.parse(input, alloc); 1029 997 const expected_key: Key = .{ 1030 998 .codepoint = 0x1F642, ··· 1038 1006 1039 1007 test "parse: single codepoint with more in buffer" { 1040 1008 const alloc = testing.allocator_instance.allocator(); 1041 - const grapheme_data = try Graphemes.init(alloc); 1042 - defer grapheme_data.deinit(alloc); 1043 1009 const input = "🙂a"; 1044 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 1010 + var parser: Parser = .{}; 1045 1011 const result = try parser.parse(input, alloc); 1046 1012 const expected_key: Key = .{ 1047 1013 .codepoint = 0x1F642, ··· 1055 1021 1056 1022 test "parse: multiple codepoint grapheme" { 1057 1023 const alloc = testing.allocator_instance.allocator(); 1058 - const grapheme_data = try Graphemes.init(alloc); 1059 - defer grapheme_data.deinit(alloc); 1060 1024 const input = "👩‍🚀"; 1061 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 1025 + var parser: Parser = .{}; 1062 1026 const result = try parser.parse(input, alloc); 1063 1027 const expected_key: Key = .{ 1064 1028 .codepoint = Key.multicodepoint, ··· 1072 1036 1073 1037 test "parse: multiple codepoint grapheme with more after" { 1074 1038 const alloc = testing.allocator_instance.allocator(); 1075 - const grapheme_data = try Graphemes.init(alloc); 1076 - defer grapheme_data.deinit(alloc); 1077 1039 const input = "👩‍🚀abc"; 1078 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 1040 + var parser: Parser = .{}; 1079 1041 const result = try parser.parse(input, alloc); 1080 1042 const expected_key: Key = .{ 1081 1043 .codepoint = Key.multicodepoint, ··· 1088 1050 try testing.expectEqual(expected_key.codepoint, actual.codepoint); 1089 1051 } 1090 1052 1053 + test "parse: flag emoji" { 1054 + const alloc = testing.allocator_instance.allocator(); 1055 + const input = "🇺🇸"; 1056 + var parser: Parser = .{}; 1057 + const result = try parser.parse(input, alloc); 1058 + const expected_key: Key = .{ 1059 + .codepoint = Key.multicodepoint, 1060 + .text = input, 1061 + }; 1062 + const expected_event: Event = .{ .key_press = expected_key }; 1063 + 1064 + try testing.expectEqual(input.len, result.n); 1065 + try testing.expectEqual(expected_event, result.event); 1066 + } 1067 + 1068 + test "parse: combining mark" { 1069 + const alloc = testing.allocator_instance.allocator(); 1070 + // a with combining acute accent (NFD form) 1071 + const input = "a\u{0301}"; 1072 + var parser: Parser = .{}; 1073 + const result = try parser.parse(input, alloc); 1074 + const expected_key: Key = .{ 1075 + .codepoint = Key.multicodepoint, 1076 + .text = input, 1077 + }; 1078 + const expected_event: Event = .{ .key_press = expected_key }; 1079 + 1080 + try testing.expectEqual(input.len, result.n); 1081 + try testing.expectEqual(expected_event, result.event); 1082 + } 1083 + 1084 + test "parse: skin tone emoji" { 1085 + const alloc = testing.allocator_instance.allocator(); 1086 + const input = "👋🏿"; 1087 + var parser: Parser = .{}; 1088 + const result = try parser.parse(input, alloc); 1089 + const expected_key: Key = .{ 1090 + .codepoint = Key.multicodepoint, 1091 + .text = input, 1092 + }; 1093 + const expected_event: Event = .{ .key_press = expected_key }; 1094 + 1095 + try testing.expectEqual(input.len, result.n); 1096 + try testing.expectEqual(expected_event, result.event); 1097 + } 1098 + 1099 + test "parse: text variation selector" { 1100 + const alloc = testing.allocator_instance.allocator(); 1101 + // Heavy black heart with text variation selector 1102 + const input = "❤︎"; 1103 + var parser: Parser = .{}; 1104 + const result = try parser.parse(input, alloc); 1105 + const expected_key: Key = .{ 1106 + .codepoint = Key.multicodepoint, 1107 + .text = input, 1108 + }; 1109 + const expected_event: Event = .{ .key_press = expected_key }; 1110 + 1111 + try testing.expectEqual(input.len, result.n); 1112 + try testing.expectEqual(expected_event, result.event); 1113 + } 1114 + 1115 + test "parse: keycap sequence" { 1116 + const alloc = testing.allocator_instance.allocator(); 1117 + const input = "1️⃣"; 1118 + var parser: Parser = .{}; 1119 + const result = try parser.parse(input, alloc); 1120 + const expected_key: Key = .{ 1121 + .codepoint = Key.multicodepoint, 1122 + .text = input, 1123 + }; 1124 + const expected_event: Event = .{ .key_press = expected_key }; 1125 + 1126 + try testing.expectEqual(input.len, result.n); 1127 + try testing.expectEqual(expected_event, result.event); 1128 + } 1129 + 1091 1130 test "parse(csi): kitty multi cursor" { 1092 1131 var buf: [1]u8 = undefined; 1093 1132 { ··· 1230 1269 1231 1270 test "parse: disambiguate shift + space" { 1232 1271 const alloc = testing.allocator_instance.allocator(); 1233 - const grapheme_data = try Graphemes.init(alloc); 1234 - defer grapheme_data.deinit(alloc); 1235 1272 const input = "\x1b[32;2u"; 1236 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 1273 + var parser: Parser = .{}; 1237 1274 const result = try parser.parse(input, alloc); 1238 1275 const expected_key: Key = .{ 1239 1276 .codepoint = ' ',

+68 -11

src/Unicode.zig

··· 1 1 const std = @import("std"); 2 - const Graphemes = @import("Graphemes"); 3 - const DisplayWidth = @import("DisplayWidth"); 2 + const uucode = @import("uucode"); 4 3 5 - /// A thin wrapper around zg data 4 + /// A thin wrapper around Unicode data - no longer needs allocation with uucode 6 5 const Unicode = @This(); 7 6 8 - width_data: DisplayWidth, 9 - 10 7 /// initialize all unicode data vaxis may possibly need 8 + /// With uucode, no initialization is needed but we keep this for API compatibility 11 9 pub fn init(alloc: std.mem.Allocator) !Unicode { 12 - return .{ 13 - .width_data = try DisplayWidth.init(alloc), 14 - }; 10 + _ = alloc; 11 + return .{}; 15 12 } 16 13 17 14 /// free all data 15 + /// With uucode, no deinitialization is needed but we keep this for API compatibility 18 16 pub fn deinit(self: *const Unicode, alloc: std.mem.Allocator) void { 19 - self.width_data.deinit(alloc); 17 + _ = self; 18 + _ = alloc; 20 19 } 21 20 21 + // Old API-compatible Grapheme value 22 + pub const Grapheme = struct { 23 + start: usize, 24 + len: usize, 25 + 26 + pub fn bytes(self: Grapheme, str: []const u8) []const u8 { 27 + return str[self.start .. self.start + self.len]; 28 + } 29 + }; 30 + 31 + // Old API-compatible iterator that yields Grapheme with .len and .bytes() 32 + pub const GraphemeIterator = struct { 33 + str: []const u8, 34 + inner: uucode.grapheme.Iterator(uucode.utf8.Iterator), 35 + start: usize = 0, 36 + prev_break: bool = true, 37 + 38 + pub fn init(str: []const u8) GraphemeIterator { 39 + return .{ 40 + .str = str, 41 + .inner = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)), 42 + }; 43 + } 44 + 45 + pub fn next(self: *GraphemeIterator) ?Grapheme { 46 + while (self.inner.next()) |res| { 47 + // When leaving a break and entering a non-break, set the start of a cluster 48 + if (self.prev_break and !res.is_break) { 49 + const cp_len: usize = std.unicode.utf8CodepointSequenceLength(res.cp) catch 1; 50 + self.start = self.inner.i - cp_len; 51 + } 52 + 53 + // A break marks the end of the current grapheme 54 + if (res.is_break) { 55 + const end = self.inner.i; 56 + const s = self.start; 57 + self.start = end; 58 + self.prev_break = true; 59 + return .{ .start = s, .len = end - s }; 60 + } 61 + 62 + self.prev_break = false; 63 + } 64 + 65 + // Flush the last grapheme if we ended mid-cluster 66 + if (!self.prev_break and self.start < self.str.len) { 67 + const s = self.start; 68 + const len = self.str.len - s; 69 + self.start = self.str.len; 70 + self.prev_break = true; 71 + return .{ .start = s, .len = len }; 72 + } 73 + 74 + return null; 75 + } 76 + }; 77 + 22 78 /// creates a grapheme iterator based on str 23 - pub fn graphemeIterator(self: *const Unicode, str: []const u8) Graphemes.Iterator { 24 - return self.width_data.graphemes.iterator(str); 79 + pub fn graphemeIterator(self: *const Unicode, str: []const u8) GraphemeIterator { 80 + _ = self; 81 + return GraphemeIterator.init(str); 25 82 }

+2 -2

src/Vaxis.zig

··· 414 414 if (cell.char.width != 0) break :blk cell.char.width; 415 415 416 416 const method: gwidth.Method = self.caps.unicode; 417 - const width: u16 = @intCast(gwidth.gwidth(cell.char.grapheme, method, &self.unicode.width_data)); 417 + const width: u16 = @intCast(gwidth.gwidth(cell.char.grapheme, method)); 418 418 break :blk @max(1, width); 419 419 }; 420 420 defer { ··· 1149 1149 if (cell.char.width != 0) break :blk cell.char.width; 1150 1150 1151 1151 const method: gwidth.Method = self.caps.unicode; 1152 - const width = gwidth.gwidth(cell.char.grapheme, method, &self.unicode.width_data); 1152 + const width = gwidth.gwidth(cell.char.grapheme, method); 1153 1153 break :blk @max(1, width); 1154 1154 }; 1155 1155 defer {

+1 -1

src/Window.zig

··· 207 207 208 208 /// returns the width of the grapheme. This depends on the terminal capabilities 209 209 pub fn gwidth(self: Window, str: []const u8) u16 { 210 - return gw.gwidth(str, self.screen.width_method, &self.unicode.width_data); 210 + return gw.gwidth(str, self.screen.width_method); 211 211 } 212 212 213 213 /// fills the window with the provided cell

+172 -35

src/gwidth.zig

··· 1 1 const std = @import("std"); 2 2 const unicode = std.unicode; 3 3 const testing = std.testing; 4 - const DisplayWidth = @import("DisplayWidth"); 5 - const code_point = @import("code_point"); 4 + const uucode = @import("uucode"); 6 5 7 6 /// the method to use when calculating the width of a grapheme 8 7 pub const Method = enum { ··· 11 10 no_zwj, 12 11 }; 13 12 13 + /// Calculate width from east asian width property and Unicode properties 14 + fn eawToWidth(cp: u21, eaw: uucode.types.EastAsianWidth) i16 { 15 + // Based on wcwidth implementation 16 + // Control characters 17 + if (cp == 0) return 0; 18 + if (cp < 32 or (cp >= 0x7f and cp < 0xa0)) return -1; 19 + 20 + // Use general category for comprehensive zero-width detection 21 + const gc = uucode.get(.general_category, cp); 22 + switch (gc) { 23 + .mark_nonspacing, .mark_enclosing => return 0, 24 + else => {}, 25 + } 26 + 27 + // Additional zero-width characters not covered by general category 28 + if (cp == 0x00ad) return 0; // soft hyphen 29 + if (cp == 0x200b) return 0; // zero-width space 30 + if (cp == 0x200c) return 0; // zero-width non-joiner 31 + if (cp == 0x200d) return 0; // zero-width joiner 32 + if (cp == 0x2060) return 0; // word joiner 33 + if (cp == 0x034f) return 0; // combining grapheme joiner 34 + if (cp == 0xfeff) return 0; // zero-width no-break space (BOM) 35 + if (cp >= 0x180b and cp <= 0x180d) return 0; // Mongolian variation selectors 36 + if (cp >= 0xfe00 and cp <= 0xfe0f) return 0; // variation selectors 37 + if (cp >= 0xe0100 and cp <= 0xe01ef) return 0; // Plane-14 variation selectors 38 + 39 + // East Asian Width: fullwidth or wide = 2 40 + // ambiguous in East Asian context = 2, otherwise 1 41 + // halfwidth, narrow, or neutral = 1 42 + return switch (eaw) { 43 + .fullwidth, .wide => 2, 44 + else => 1, 45 + }; 46 + } 47 + 14 48 /// returns the width of the provided string, as measured by the method chosen 15 - pub fn gwidth(str: []const u8, method: Method, data: *const DisplayWidth) u16 { 49 + pub fn gwidth(str: []const u8, method: Method) u16 { 16 50 switch (method) { 17 51 .unicode => { 18 - return @intCast(data.strWidth(str)); 52 + var total: u16 = 0; 53 + var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 54 + 55 + var grapheme_start: usize = 0; 56 + var prev_break: bool = true; 57 + 58 + while (grapheme_iter.next()) |result| { 59 + if (prev_break and !result.is_break) { 60 + // Start of a new grapheme 61 + const cp_len: usize = std.unicode.utf8CodepointSequenceLength(result.cp) catch 1; 62 + grapheme_start = grapheme_iter.i - cp_len; 63 + } 64 + 65 + if (result.is_break) { 66 + // End of a grapheme - calculate its width 67 + const grapheme_end = grapheme_iter.i; 68 + const grapheme_bytes = str[grapheme_start..grapheme_end]; 69 + 70 + // Calculate grapheme width 71 + var g_iter = uucode.utf8.Iterator.init(grapheme_bytes); 72 + var width: i16 = 0; 73 + var has_emoji_vs: bool = false; 74 + var has_text_vs: bool = false; 75 + var has_emoji_presentation: bool = false; 76 + var ri_count: u8 = 0; 77 + 78 + while (g_iter.next()) |cp| { 79 + // Check for emoji variation selector (U+FE0F) 80 + if (cp == 0xfe0f) { 81 + has_emoji_vs = true; 82 + continue; 83 + } 84 + 85 + // Check for text variation selector (U+FE0E) 86 + if (cp == 0xfe0e) { 87 + has_text_vs = true; 88 + continue; 89 + } 90 + 91 + // Check if this codepoint has emoji presentation 92 + if (uucode.get(.is_emoji_presentation, cp)) { 93 + has_emoji_presentation = true; 94 + } 95 + 96 + // Count regional indicators (for flag emojis) 97 + if (cp >= 0x1F1E6 and cp <= 0x1F1FF) { 98 + ri_count += 1; 99 + } 100 + 101 + const eaw = uucode.get(.east_asian_width, cp); 102 + const w = eawToWidth(cp, eaw); 103 + // Take max of non-zero widths 104 + if (w > 0 and w > width) width = w; 105 + } 106 + 107 + // Handle variation selectors and emoji presentation 108 + if (has_text_vs) { 109 + // Text presentation explicit - keep width as-is (usually 1) 110 + width = @max(1, width); 111 + } else if (has_emoji_vs or has_emoji_presentation or ri_count == 2) { 112 + // Emoji presentation or flag pair - force width 2 113 + width = @max(2, width); 114 + } 115 + 116 + total += @max(0, width); 117 + 118 + grapheme_start = grapheme_end; 119 + } 120 + prev_break = result.is_break; 121 + } 122 + 123 + return total; 19 124 }, 20 125 .wcwidth => { 21 126 var total: u16 = 0; 22 - var iter: code_point.Iterator = .{ .bytes = str }; 127 + var iter = uucode.utf8.Iterator.init(str); 23 128 while (iter.next()) |cp| { 24 - const w: u16 = switch (cp.code) { 129 + const w: i16 = switch (cp) { 25 130 // undo an override in zg for emoji skintone selectors 26 - 0x1f3fb...0x1f3ff, 27 - => 2, 28 - else => @max(0, data.codePointWidth(cp.code)), 131 + 0x1f3fb...0x1f3ff => 2, 132 + else => blk: { 133 + const eaw = uucode.get(.east_asian_width, cp); 134 + break :blk eawToWidth(cp, eaw); 135 + }, 29 136 }; 30 - total += w; 137 + total += @intCast(@max(0, w)); 31 138 } 32 139 return total; 33 140 }, ··· 35 142 var iter = std.mem.splitSequence(u8, str, "\u{200D}"); 36 143 var result: u16 = 0; 37 144 while (iter.next()) |s| { 38 - result += gwidth(s, .unicode, data); 145 + result += gwidth(s, .unicode); 39 146 } 40 147 return result; 41 148 }, ··· 43 150 } 44 151 45 152 test "gwidth: a" { 46 - const alloc = testing.allocator_instance.allocator(); 47 - const data = try DisplayWidth.init(alloc); 48 - defer data.deinit(alloc); 49 - try testing.expectEqual(1, gwidth("a", .unicode, &data)); 50 - try testing.expectEqual(1, gwidth("a", .wcwidth, &data)); 51 - try testing.expectEqual(1, gwidth("a", .no_zwj, &data)); 153 + try testing.expectEqual(1, gwidth("a", .unicode)); 154 + try testing.expectEqual(1, gwidth("a", .wcwidth)); 155 + try testing.expectEqual(1, gwidth("a", .no_zwj)); 52 156 } 53 157 54 158 test "gwidth: emoji with ZWJ" { 55 - const alloc = testing.allocator_instance.allocator(); 56 - const data = try DisplayWidth.init(alloc); 57 - defer data.deinit(alloc); 58 - try testing.expectEqual(2, gwidth("👩‍🚀", .unicode, &data)); 59 - try testing.expectEqual(4, gwidth("👩‍🚀", .wcwidth, &data)); 60 - try testing.expectEqual(4, gwidth("👩‍🚀", .no_zwj, &data)); 159 + try testing.expectEqual(2, gwidth("👩‍🚀", .unicode)); 160 + try testing.expectEqual(4, gwidth("👩‍🚀", .wcwidth)); 161 + try testing.expectEqual(4, gwidth("👩‍🚀", .no_zwj)); 61 162 } 62 163 63 164 test "gwidth: emoji with VS16 selector" { 64 - const alloc = testing.allocator_instance.allocator(); 65 - const data = try DisplayWidth.init(alloc); 66 - defer data.deinit(alloc); 67 - try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .unicode, &data)); 68 - try testing.expectEqual(1, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .wcwidth, &data)); 69 - try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .no_zwj, &data)); 165 + try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .unicode)); 166 + try testing.expectEqual(1, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .wcwidth)); 167 + try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .no_zwj)); 70 168 } 71 169 72 170 test "gwidth: emoji with skin tone selector" { 73 - const alloc = testing.allocator_instance.allocator(); 74 - const data = try DisplayWidth.init(alloc); 75 - defer data.deinit(alloc); 76 - try testing.expectEqual(2, gwidth("👋🏿", .unicode, &data)); 77 - try testing.expectEqual(4, gwidth("👋🏿", .wcwidth, &data)); 78 - try testing.expectEqual(2, gwidth("👋🏿", .no_zwj, &data)); 171 + try testing.expectEqual(2, gwidth("👋🏿", .unicode)); 172 + try testing.expectEqual(4, gwidth("👋🏿", .wcwidth)); 173 + try testing.expectEqual(2, gwidth("👋🏿", .no_zwj)); 174 + } 175 + 176 + test "gwidth: zero-width space" { 177 + try testing.expectEqual(0, gwidth("\u{200B}", .unicode)); 178 + try testing.expectEqual(0, gwidth("\u{200B}", .wcwidth)); 179 + } 180 + 181 + test "gwidth: zero-width non-joiner" { 182 + try testing.expectEqual(0, gwidth("\u{200C}", .unicode)); 183 + try testing.expectEqual(0, gwidth("\u{200C}", .wcwidth)); 184 + } 185 + 186 + test "gwidth: combining marks" { 187 + // Hebrew combining mark 188 + try testing.expectEqual(0, gwidth("\u{05B0}", .unicode)); 189 + // Devanagari combining mark 190 + try testing.expectEqual(0, gwidth("\u{093C}", .unicode)); 191 + } 192 + 193 + test "gwidth: flag emoji (regional indicators)" { 194 + // US flag 🇺🇸 195 + try testing.expectEqual(2, gwidth("🇺🇸", .unicode)); 196 + // UK flag 🇬🇧 197 + try testing.expectEqual(2, gwidth("🇬🇧", .unicode)); 198 + } 199 + 200 + test "gwidth: text variation selector" { 201 + // U+2764 (heavy black heart) + U+FE0E (text variation selector) 202 + // Should be width 1 with text presentation 203 + try testing.expectEqual(1, gwidth("❤︎", .unicode)); 204 + } 205 + 206 + test "gwidth: keycap sequence" { 207 + // Digit 1 + U+FE0F + U+20E3 (combining enclosing keycap) 208 + // Should be width 2 209 + try testing.expectEqual(2, gwidth("1️⃣", .unicode)); 210 + } 211 + 212 + test "gwidth: base letter with combining mark" { 213 + // 'a' + combining acute accent (NFD form) 214 + // Should be width 1 (combining mark is zero-width) 215 + try testing.expectEqual(1, gwidth("á", .unicode)); 79 216 }

-2

src/main.zig

··· 26 26 pub const widgets = @import("widgets.zig"); 27 27 pub const gwidth = @import("gwidth.zig"); 28 28 pub const ctlseqs = @import("ctlseqs.zig"); 29 - pub const DisplayWidth = @import("DisplayWidth"); 30 29 pub const GraphemeCache = @import("GraphemeCache.zig"); 31 - pub const Graphemes = @import("Graphemes"); 32 30 pub const Event = @import("event.zig").Event; 33 31 pub const Unicode = @import("Unicode.zig"); 34 32

+2 -4

src/vxfw/vxfw.zig

··· 1 1 const std = @import("std"); 2 2 const vaxis = @import("../main.zig"); 3 + const uucode = @import("uucode"); 3 4 4 - const Graphemes = vaxis.Graphemes; 5 5 const testing = std.testing; 6 6 7 7 const assert = std.debug.assert; ··· 200 200 } 201 201 202 202 pub fn stringWidth(_: DrawContext, str: []const u8) usize { 203 - assert(DrawContext.unicode != null); // DrawContext not initialized 204 203 return vaxis.gwidth.gwidth( 205 204 str, 206 205 DrawContext.width_method, 207 - &DrawContext.unicode.?.width_data, 208 206 ); 209 207 } 210 208 211 - pub fn graphemeIterator(_: DrawContext, str: []const u8) Graphemes.Iterator { 209 + pub fn graphemeIterator(_: DrawContext, str: []const u8) vaxis.Unicode.GraphemeIterator { 212 210 assert(DrawContext.unicode != null); // DrawContext not initialized 213 211 return DrawContext.unicode.?.graphemeIterator(str); 214 212 }

+56 -23

src/widgets/TextView.zig

··· 1 1 const std = @import("std"); 2 2 const vaxis = @import("../main.zig"); 3 - const Graphemes = @import("Graphemes"); 4 - const DisplayWidth = @import("DisplayWidth"); 3 + const uucode = @import("uucode"); 5 4 const ScrollView = vaxis.widgets.ScrollView; 6 5 6 + /// Simple grapheme representation to replace Graphemes.Grapheme 7 + const Grapheme = struct { 8 + len: u16, 9 + offset: u32, 10 + }; 11 + 7 12 pub const BufferWriter = struct { 8 13 pub const Error = error{OutOfMemory}; 9 14 pub const Writer = std.io.GenericWriter(@This(), Error, write); 10 15 11 16 allocator: std.mem.Allocator, 12 17 buffer: *Buffer, 13 - gd: *const Graphemes, 14 - wd: *const DisplayWidth, 15 18 16 19 pub fn write(self: @This(), bytes: []const u8) Error!usize { 17 20 try self.buffer.append(self.allocator, .{ 18 21 .bytes = bytes, 19 - .gd = self.gd, 20 - .wd = self.wd, 21 22 }); 22 23 return bytes.len; 23 24 } ··· 33 34 34 35 pub const Content = struct { 35 36 bytes: []const u8, 36 - gd: *const Graphemes, 37 - wd: *const DisplayWidth, 38 37 }; 39 38 40 39 pub const Style = struct { ··· 45 44 46 45 pub const Error = error{OutOfMemory}; 47 46 48 - grapheme: std.MultiArrayList(Graphemes.Grapheme) = .{}, 47 + grapheme: std.MultiArrayList(Grapheme) = .{}, 49 48 content: std.ArrayListUnmanaged(u8) = .{}, 50 49 style_list: StyleList = .{}, 51 50 style_map: StyleMap = .{}, ··· 78 77 /// Appends content to the buffer. 79 78 pub fn append(self: *@This(), allocator: std.mem.Allocator, content: Content) Error!void { 80 79 var cols: usize = self.last_cols; 81 - var iter = Graphemes.Iterator.init(content.bytes, content.gd); 82 - while (iter.next()) |g| { 80 + var iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(content.bytes)); 81 + 82 + var grapheme_start: usize = 0; 83 + var prev_break: bool = true; 84 + 85 + while (iter.next()) |result| { 86 + if (prev_break and !result.is_break) { 87 + // Start of a new grapheme 88 + grapheme_start = iter.i - std.unicode.utf8CodepointSequenceLength(result.cp) catch 1; 89 + } 90 + 91 + if (result.is_break) { 92 + // End of a grapheme 93 + const grapheme_end = iter.i; 94 + const grapheme_len = grapheme_end - grapheme_start; 95 + 96 + try self.grapheme.append(allocator, .{ 97 + .len = @intCast(grapheme_len), 98 + .offset = @intCast(self.content.items.len + grapheme_start), 99 + }); 100 + 101 + const cluster = content.bytes[grapheme_start..grapheme_end]; 102 + if (std.mem.eql(u8, cluster, "\n")) { 103 + self.cols = @max(self.cols, cols); 104 + cols = 0; 105 + } else { 106 + // Calculate width using gwidth 107 + const w = vaxis.gwidth.gwidth(cluster, .unicode); 108 + cols +|= w; 109 + } 110 + 111 + grapheme_start = grapheme_end; 112 + } 113 + prev_break = result.is_break; 114 + } 115 + 116 + // Flush the last grapheme if we ended mid-cluster 117 + if (!prev_break and grapheme_start < content.bytes.len) { 118 + const grapheme_len = content.bytes.len - grapheme_start; 119 + 83 120 try self.grapheme.append(allocator, .{ 84 - .len = g.len, 85 - .offset = @as(u32, @intCast(self.content.items.len)) + g.offset, 121 + .len = @intCast(grapheme_len), 122 + .offset = @intCast(self.content.items.len + grapheme_start), 86 123 }); 87 - const cluster = g.bytes(content.bytes); 88 - if (std.mem.eql(u8, cluster, "\n")) { 89 - self.cols = @max(self.cols, cols); 90 - cols = 0; 91 - continue; 124 + 125 + const cluster = content.bytes[grapheme_start..]; 126 + if (!std.mem.eql(u8, cluster, "\n")) { 127 + const w = vaxis.gwidth.gwidth(cluster, .unicode); 128 + cols +|= w; 92 129 } 93 - cols +|= content.wd.strWidth(cluster); 94 130 } 131 + 95 132 try self.content.appendSlice(allocator, content.bytes); 96 133 self.last_cols = cols; 97 134 self.cols = @max(self.cols, cols); ··· 123 160 pub fn writer( 124 161 self: *@This(), 125 162 allocator: std.mem.Allocator, 126 - gd: *const Graphemes, 127 - wd: *const DisplayWidth, 128 163 ) BufferWriter.Writer { 129 164 return .{ 130 165 .context = .{ 131 166 .allocator = allocator, 132 167 .buffer = self, 133 - .gd = gd, 134 - .wd = wd, 135 168 }, 136 169 }; 137 170 }

+1 -1

src/widgets/View.zig

··· 141 141 142 142 /// Returns the width of the grapheme. This depends on the terminal capabilities 143 143 pub fn gwidth(self: View, str: []const u8) u16 { 144 - return gw.gwidth(str, self.screen.width_method, &self.unicode.width_data); 144 + return gw.gwidth(str, self.screen.width_method); 145 145 } 146 146 147 147 /// Fills the View with the provided cell

+4 -6

src/widgets/terminal/Terminal.zig

··· 10 10 const vaxis = @import("../../main.zig"); 11 11 const Winsize = vaxis.Winsize; 12 12 const Screen = @import("Screen.zig"); 13 - const DisplayWidth = @import("DisplayWidth"); 14 13 const Key = vaxis.Key; 15 14 const Queue = vaxis.Queue(Event, 16); 16 - const code_point = @import("code_point"); 17 15 const key = @import("key.zig"); 18 16 19 17 pub const Event = union(enum) { ··· 279 277 switch (event) { 280 278 .print => |str| { 281 279 var iter = self.unicode.graphemeIterator(str); 282 - while (iter.next()) |g| { 283 - const gr = g.bytes(str); 280 + while (iter.next()) |grapheme| { 281 + const gr = grapheme.bytes(str); 284 282 // TODO: use actual instead of .unicode 285 - const w = vaxis.gwidth.gwidth(gr, .unicode, &self.unicode.width_data); 283 + const w = vaxis.gwidth.gwidth(gr, .unicode); 286 284 try self.back_screen.print(gr, @truncate(w), self.mode.autowrap); 287 285 } 288 286 }, ··· 498 496 var iter = seq.iterator(u16); 499 497 const n = iter.next() orelse 1; 500 498 // TODO: maybe not .unicode 501 - const w = vaxis.gwidth.gwidth(self.last_printed, .unicode, &self.unicode.width_data); 499 + const w = vaxis.gwidth.gwidth(self.last_printed, .unicode); 502 500 var i: usize = 0; 503 501 while (i < n) : (i += 1) { 504 502 try self.back_screen.print(self.last_printed, @truncate(w), self.mode.autowrap);