a modern tui library written in zig

replace zg with uucode for unicode operations

Replace the zg dependency with uucode for grapheme segmentation and display width measurement. This eliminates runtime allocations by using compile-time lookup tables instead of runtime-allocated data structures.

Key changes:
- Update build.zig.zon to use uucode dependency instead of zg
- Configure uucode with wcwidth field in build.zig
- Simplify Unicode.zig by removing allocation requirements
- Update gwidth.zig to use uucode's stateless API
- Migrate Parser.zig from code_point.Iterator to uucode.utf8.Iterator
- Update Loop.zig, TextView.zig, Terminal.zig, and other widgets to use uucode's grapheme iterator
- Remove DisplayWidth and Graphemes public exports from main.zig
- Add MIGRATION_ZG_TO_UUCODE.md documenting the migration

Benefits:
- No allocations required for Unicode operations
- Simpler API without init/deinit lifecycle
- Less state to manage and pass around
- Smaller binary size with selective field inclusion

Amp-Thread-ID: https://ampcode.com/threads/T-4e217d39-617e-4f4f-9ed8-4d6153fd5e2f
Co-authored-by: Amp <amp@ampcode.com>

rockorager.dev 81732720 3c92fc07

verified
+349
MIGRATION_ZG_TO_UUCODE.md
··· 1 + # Migration Plan: zg → uucode 2 + 3 + ## Overview 4 + 5 + This document outlines the plan to migrate from the `zg` dependency to `uucode` for grapheme segmentation and display width measurement in libvaxis. 6 + 7 + ## Key Advantage 8 + 9 + **No allocation required** - uucode uses compile-time lookup tables instead of runtime-allocated data structures, eliminating the need to initialize, pass around, and deinitialize Unicode data. 10 + 11 + ## Current zg Usage 12 + 13 + ### Dependencies (from build.zig) 14 + - `code_point` - UTF-8 codepoint iteration 15 + - `Graphemes` - Grapheme cluster segmentation 16 + - `DisplayWidth` - Display width calculation 17 + 18 + ### Files Using zg 19 + - `src/main.zig` - Re-exports `Graphemes` and `DisplayWidth` 20 + - `src/Unicode.zig` - Wrapper around zg data (allocates) 21 + - `src/gwidth.zig` - Width calculation using `DisplayWidth` 22 + - `src/Parser.zig` - Uses `code_point` and `Graphemes` 23 + - `src/Loop.zig` - Uses `Graphemes` 24 + - `src/widgets/TextView.zig` - Uses `Graphemes` and `DisplayWidth` 25 + - `src/widgets/terminal/Terminal.zig` - Uses `code_point` and `DisplayWidth` 26 + 27 + ### Allocation Pattern (zg) 28 + ```zig 29 + // Initialize with allocator 30 + const graphemes = try Graphemes.init(alloc); 31 + defer graphemes.deinit(alloc); 32 + 33 + const width_data = try DisplayWidth.init(alloc); 34 + defer width_data.deinit(alloc); 35 + 36 + // Use 37 + var iter = graphemes.iterator(str); 38 + const width = width_data.codePointWidth(cp); 39 + ``` 40 + 41 + ## uucode API 42 + 43 + ### Available Modules 44 + - `uucode.utf8.Iterator` - UTF-8 codepoint iteration (no allocation) 45 + - `uucode.grapheme.Iterator` - Grapheme cluster iteration (no allocation) 46 + - `uucode.get()` - Compile-time Unicode property lookup (no allocation) 47 + 48 + ### Usage Pattern (uucode) 49 + ```zig 50 + // UTF-8 iteration 51 + var cp_iter = uucode.utf8.Iterator.init(str); 52 + while (cp_iter.next()) |cp| { 53 + // process codepoint 54 + } 55 + 56 + // Grapheme iteration 57 + var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 58 + while (grapheme_iter.next()) |result| { 59 + // result.cp is the codepoint 60 + // result.is_break indicates grapheme boundary 61 + } 62 + 63 + // Width lookup (requires wcwidth field in build config) 64 + const width = uucode.get(.wcwidth, cp); 65 + 66 + // Grapheme width (from uucode.x extension) 67 + const g_width = uucode.x.grapheme.unverifiedWcwidth(grapheme_iter); 68 + ``` 69 + 70 + ### Iterator Result Structure 71 + ```zig 72 + pub const IteratorResult = struct { 73 + cp: u21, // The codepoint 74 + is_break: bool, // true if this is a grapheme cluster boundary 75 + }; 76 + ``` 77 + 78 + ## Migration Steps 79 + 80 + ### 1. Update build.zig.zon 81 + 82 + Add `wcwidth` field to uucode dependency configuration: 83 + 84 + ```zig 85 + .uucode = .{ 86 + .url = "git+https://github.com/jacobsandlund/uucode#5f05f8f83a75caea201f12cc8ea32a2d82ea9732", 87 + .hash = "uucode-0.1.0-ZZjBPj96QADXyt5sqwBJUnhaDYs_qBeeKijZvlRa0eqM", 88 + }, 89 + ``` 90 + 91 + ### 2. Update build.zig 92 + 93 + In the uucode dependency configuration, update the fields array: 94 + 95 + ```zig 96 + const uucode_dep = b.dependency("uucode", .{ 97 + .target = target, 98 + .optimize = optimize, 99 + .fields = @as([]const []const u8, &.{ 100 + "grapheme_break", 101 + "wcwidth", // ADD THIS 102 + }), 103 + }); 104 + ``` 105 + 106 + Remove zg dependency: 107 + - Delete the `zg_dep` declaration 108 + - Remove all `zg_dep.module()` references 109 + - Remove `.zg` from build.zig.zon 110 + 111 + ### 3. Update Module Imports in build.zig 112 + 113 + Replace: 114 + ```zig 115 + vaxis_mod.addImport("code_point", zg_dep.module("code_point")); 116 + vaxis_mod.addImport("Graphemes", zg_dep.module("Graphemes")); 117 + vaxis_mod.addImport("DisplayWidth", zg_dep.module("DisplayWidth")); 118 + ``` 119 + 120 + No replacement needed - uucode is already imported. 121 + 122 + ### 4. Update src/main.zig 123 + 124 + Remove: 125 + ```zig 126 + pub const DisplayWidth = @import("DisplayWidth"); 127 + pub const Graphemes = @import("Graphemes"); 128 + ``` 129 + 130 + These become internal implementation details or are removed entirely. 131 + 132 + ### 5. Update src/Unicode.zig 133 + 134 + **Before:** 135 + ```zig 136 + const Graphemes = @import("Graphemes"); 137 + const DisplayWidth = @import("DisplayWidth"); 138 + 139 + const Unicode = @This(); 140 + 141 + width_data: DisplayWidth, 142 + 143 + pub fn init(alloc: std.mem.Allocator) !Unicode { 144 + return .{ 145 + .width_data = try DisplayWidth.init(alloc), 146 + }; 147 + } 148 + 149 + pub fn deinit(self: *const Unicode, alloc: std.mem.Allocator) void { 150 + self.width_data.deinit(alloc); 151 + } 152 + 153 + pub fn graphemeIterator(self: *const Unicode, str: []const u8) Graphemes.Iterator { 154 + return self.width_data.graphemes.iterator(str); 155 + } 156 + ``` 157 + 158 + **After:** 159 + ```zig 160 + const uucode = @import("uucode"); 161 + 162 + const Unicode = @This(); 163 + 164 + // No fields needed - all operations are stateless 165 + 166 + pub fn init(alloc: std.mem.Allocator) !Unicode { 167 + _ = alloc; 168 + return .{}; 169 + } 170 + 171 + pub fn deinit(self: *const Unicode, alloc: std.mem.Allocator) void { 172 + _ = self; 173 + _ = alloc; 174 + } 175 + 176 + pub fn graphemeIterator(self: *const Unicode, str: []const u8) uucode.grapheme.Iterator(uucode.utf8.Iterator) { 177 + _ = self; 178 + return uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 179 + } 180 + ``` 181 + 182 + Or consider removing the `Unicode` wrapper entirely since it no longer serves a purpose. 183 + 184 + ### 6. Update src/gwidth.zig 185 + 186 + **Before:** 187 + ```zig 188 + const DisplayWidth = @import("DisplayWidth"); 189 + const code_point = @import("code_point"); 190 + 191 + pub fn gwidth(str: []const u8, method: Method, data: *const DisplayWidth) u16 { 192 + switch (method) { 193 + .unicode => { 194 + return @intCast(data.strWidth(str)); 195 + }, 196 + .wcwidth => { 197 + var total: u16 = 0; 198 + var iter: code_point.Iterator = .{ .bytes = str }; 199 + while (iter.next()) |cp| { 200 + const w: u16 = switch (cp.code) { 201 + 0x1f3fb...0x1f3ff => 2, 202 + else => @max(0, data.codePointWidth(cp.code)), 203 + }; 204 + total += w; 205 + } 206 + return total; 207 + }, 208 + // ... 209 + } 210 + } 211 + ``` 212 + 213 + **After:** 214 + ```zig 215 + const uucode = @import("uucode"); 216 + 217 + pub fn gwidth(str: []const u8, method: Method) u16 { 218 + switch (method) { 219 + .unicode => { 220 + var total: u16 = 0; 221 + var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 222 + while (grapheme_iter.next()) |result| { 223 + if (result.is_break) { 224 + // Calculate width for previous grapheme 225 + // This requires buffering the grapheme - may need different approach 226 + } 227 + } 228 + return total; 229 + }, 230 + .wcwidth => { 231 + var total: u16 = 0; 232 + var iter = uucode.utf8.Iterator.init(str); 233 + while (iter.next()) |cp| { 234 + const w: u16 = switch (cp) { 235 + 0x1f3fb...0x1f3ff => 2, 236 + else => @max(0, uucode.get(.wcwidth, cp)), 237 + }; 238 + total += w; 239 + } 240 + return total; 241 + }, 242 + // ... 243 + } 244 + } 245 + ``` 246 + 247 + Note: Remove the `data` parameter entirely. 248 + 249 + ### 7. Update src/Parser.zig 250 + 251 + Replace: 252 + ```zig 253 + const code_point = @import("code_point"); 254 + const Graphemes = @import("Graphemes"); 255 + ``` 256 + 257 + With: 258 + ```zig 259 + const uucode = @import("uucode"); 260 + ``` 261 + 262 + Replace: 263 + ```zig 264 + grapheme_data: *const Graphemes, 265 + ``` 266 + 267 + With: 268 + ```zig 269 + // Remove this field entirely if only used for iteration 270 + ``` 271 + 272 + Replace usage: 273 + ```zig 274 + var iter: code_point.Iterator = .{ .bytes = input }; 275 + ``` 276 + 277 + With: 278 + ```zig 279 + var iter = uucode.utf8.Iterator.init(input); 280 + ``` 281 + 282 + ### 8. Update Other Files 283 + 284 + Apply similar transformations to: 285 + - `src/Loop.zig` 286 + - `src/widgets/TextView.zig` 287 + - `src/widgets/terminal/Terminal.zig` 288 + 289 + Pattern: 290 + 1. Replace imports with `const uucode = @import("uucode");` 291 + 2. Remove allocated data fields 292 + 3. Replace `code_point.Iterator` with `uucode.utf8.Iterator` 293 + 4. Replace `graphemes.iterator()` with `uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str))` 294 + 5. Replace `data.codePointWidth(cp)` with `uucode.get(.wcwidth, cp)` 295 + 296 + ### 9. Update Tests 297 + 298 + All test code that does: 299 + ```zig 300 + const data = try DisplayWidth.init(alloc); 301 + defer data.deinit(alloc); 302 + ``` 303 + 304 + Can be removed entirely. Width lookups become: 305 + ```zig 306 + const width = uucode.get(.wcwidth, cp); 307 + ``` 308 + 309 + ## API Mapping Reference 310 + 311 + | zg API | uucode API | 312 + |--------|------------| 313 + | `code_point.Iterator{ .bytes = str }` | `uucode.utf8.Iterator.init(str)` | 314 + | `iter.next().code` | `iter.next()` (returns u21 directly) | 315 + | `Graphemes.init(alloc)` | _(no initialization needed)_ | 316 + | `graphemes.iterator(str)` | `uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str))` | 317 + | `DisplayWidth.init(alloc)` | _(no initialization needed)_ | 318 + | `width_data.codePointWidth(cp)` | `uucode.get(.wcwidth, cp)` | 319 + | `width_data.strWidth(str)` | _(implement using iterator + uucode.get)_ | 320 + 321 + ## Benefits 322 + 323 + 1. **No allocations** - All Unicode data is compile-time generated 324 + 2. **Simpler API** - No init/deinit lifecycle 325 + 3. **Less state to manage** - No data structures to pass around 326 + 4. **Smaller binary** - Only requested fields are included 327 + 5. **Type-safe lookups** - Field names are compile-time checked 328 + 329 + ## Potential Challenges 330 + 331 + 1. **String width calculation** - zg's `strWidth()` is convenient; need to implement equivalent using iterator 332 + 2. **Grapheme-aware width** - May need `uucode.x.grapheme.unverifiedWcwidth()` for proper emoji/ZWJ handling 333 + 3. **Iterator API differences** - zg returns struct with `.code`, uucode returns `u21` directly 334 + 4. **Breaking API changes** - Any public APIs exposing `Graphemes` or `DisplayWidth` types will need updates 335 + 336 + ## Testing Strategy 337 + 338 + 1. Run existing tests with uucode implementation 339 + 2. Pay special attention to: 340 + - Emoji with ZWJ sequences 341 + - Skin tone modifiers 342 + - Variation selectors 343 + - Complex grapheme clusters 344 + 3. Compare width calculations with zg implementation 345 + 4. Test memory usage (should be lower without allocations) 346 + 347 + ## Rollback Plan 348 + 349 + If issues arise, the zg dependency can be re-added to build.zig.zon and the imports restored. The changes are isolated to a small number of files.
+11 -9
build.zig
··· 6 6 const root_source_file = b.path("src/main.zig"); 7 7 8 8 // Dependencies 9 - const zg_dep = b.dependency("zg", .{ 9 + const zigimg_dep = b.dependency("zigimg", .{ 10 10 .optimize = optimize, 11 11 .target = target, 12 12 }); 13 - const zigimg_dep = b.dependency("zigimg", .{ 14 - .optimize = optimize, 13 + const uucode_dep = b.dependency("uucode", .{ 15 14 .target = target, 15 + .optimize = optimize, 16 + .fields = @as([]const []const u8, &.{ 17 + "east_asian_width", 18 + "grapheme_break", 19 + "general_category", 20 + "is_emoji_presentation", 21 + }), 16 22 }); 17 23 18 24 // Module ··· 21 27 .target = target, 22 28 .optimize = optimize, 23 29 }); 24 - vaxis_mod.addImport("code_point", zg_dep.module("code_point")); 25 - vaxis_mod.addImport("Graphemes", zg_dep.module("Graphemes")); 26 - vaxis_mod.addImport("DisplayWidth", zg_dep.module("DisplayWidth")); 27 30 vaxis_mod.addImport("zigimg", zigimg_dep.module("zigimg")); 31 + vaxis_mod.addImport("uucode", uucode_dep.module("uucode")); 28 32 29 33 // Examples 30 34 const Example = enum { ··· 69 73 .target = target, 70 74 .optimize = optimize, 71 75 .imports = &.{ 72 - .{ .name = "code_point", .module = zg_dep.module("code_point") }, 73 - .{ .name = "Graphemes", .module = zg_dep.module("Graphemes") }, 74 - .{ .name = "DisplayWidth", .module = zg_dep.module("DisplayWidth") }, 75 76 .{ .name = "zigimg", .module = zigimg_dep.module("zigimg") }, 77 + .{ .name = "uucode", .module = uucode_dep.module("uucode") }, 76 78 }, 77 79 }), 78 80 });
+3 -4
build.zig.zon
··· 9 9 .url = "https://github.com/ivanstepanovftw/zigimg/archive/d7b7ab0ba0899643831ef042bd73289510b39906.tar.gz", 10 10 .hash = "zigimg-0.1.0-8_eo2vHnEwCIVW34Q14Ec-xUlzIoVg86-7FU2ypPtxms", 11 11 }, 12 - .zg = .{ 13 - // Upstream PR: https://codeberg.org/atman/zg/pulls/90/ 14 - .url = "https://codeberg.org/chaten/zg/archive/749197a3f9d25e211615960c02380a3d659b20f9.tar.gz", 15 - .hash = "zg-0.15.1-oGqU3M0-tALZCy7boQS86znlBloyKx6--JriGlY0Paa9", 12 + .uucode = .{ 13 + .url = "git+https://github.com/jacobsandlund/uucode#5f05f8f83a75caea201f12cc8ea32a2d82ea9732", 14 + .hash = "uucode-0.1.0-ZZjBPj96QADXyt5sqwBJUnhaDYs_qBeeKijZvlRa0eqM", 16 15 }, 17 16 }, 18 17 .paths = .{
+2 -10
src/Loop.zig
··· 1 1 const std = @import("std"); 2 2 const builtin = @import("builtin"); 3 3 4 - const Graphemes = @import("Graphemes"); 5 - 6 4 const GraphemeCache = @import("GraphemeCache.zig"); 7 5 const Parser = @import("Parser.zig"); 8 6 const Queue = @import("queue.zig").Queue; ··· 47 45 if (self.thread) |_| return; 48 46 self.thread = try std.Thread.spawn(.{}, Self.ttyRun, .{ 49 47 self, 50 - &self.vaxis.unicode.width_data.graphemes, 51 48 self.vaxis.opts.system_clipboard_allocator, 52 49 }); 53 50 } ··· 107 104 /// read input from the tty. This is run in a separate thread 108 105 fn ttyRun( 109 106 self: *Self, 110 - grapheme_data: *const Graphemes, 111 107 paste_allocator: ?std.mem.Allocator, 112 108 ) !void { 113 109 // Return early if we're in test mode to avoid infinite loops ··· 118 114 119 115 switch (builtin.os.tag) { 120 116 .windows => { 121 - var parser: Parser = .{ 122 - .grapheme_data = grapheme_data, 123 - }; 117 + var parser: Parser = .{}; 124 118 while (!self.should_quit) { 125 119 const event = try self.tty.nextEvent(&parser, paste_allocator); 126 120 try handleEventGeneric(self, self.vaxis, &cache, Event, event, null); ··· 133 127 self.postEvent(.{ .winsize = winsize }); 134 128 } 135 129 136 - var parser: Parser = .{ 137 - .grapheme_data = grapheme_data, 138 - }; 130 + var parser: Parser = .{}; 139 131 140 132 // initialize the read buffer 141 133 var buf: [1024]u8 = undefined;
+124 -87
src/Parser.zig
··· 4 4 const Event = @import("event.zig").Event; 5 5 const Key = @import("Key.zig"); 6 6 const Mouse = @import("Mouse.zig"); 7 - const code_point = @import("code_point"); 8 - const Graphemes = @import("Graphemes"); 7 + const uucode = @import("uucode"); 9 8 const Winsize = @import("main.zig").Winsize; 10 9 11 10 const log = std.log.scoped(.vaxis_parser); ··· 45 44 // a buffer to temporarily store text in. We need this to encode 46 45 // text-as-codepoints 47 46 buf: [128]u8 = undefined, 48 - 49 - grapheme_data: *const Graphemes, 50 47 51 48 /// Parse the first event from the input buffer. If a completion event is not 52 49 /// present, Result.event will be null and Result.n will be 0 ··· 78 75 }; 79 76 }, 80 77 } 81 - } else return parseGround(input, self.grapheme_data); 78 + } else return parseGround(input); 82 79 } 83 80 84 81 /// Parse ground state 85 - inline fn parseGround(input: []const u8, data: *const Graphemes) !Result { 82 + inline fn parseGround(input: []const u8) !Result { 86 83 std.debug.assert(input.len > 0); 87 84 88 85 const b = input[0]; ··· 109 106 }, 110 107 0x7F => .{ .codepoint = Key.backspace }, 111 108 else => blk: { 112 - var iter: code_point.Iterator = .{ .bytes = input }; 109 + var iter = uucode.utf8.Iterator.init(input); 113 110 // return null if we don't have a valid codepoint 114 - const cp = iter.next() orelse return error.InvalidUTF8; 111 + const first_cp = iter.next() orelse return error.InvalidUTF8; 115 112 116 - n = cp.len; 113 + n = std.unicode.utf8CodepointSequenceLength(first_cp) catch return error.InvalidUTF8; 117 114 118 115 // Check if we have a multi-codepoint grapheme 119 - var code = cp.code; 120 - var g_state: Graphemes.IterState = .{}; 121 - var prev_cp = code; 122 - while (iter.next()) |next_cp| { 123 - if (Graphemes.graphemeBreak(prev_cp, next_cp.code, data, &g_state)) { 116 + var code = first_cp; 117 + var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(input)); 118 + var grapheme_len: usize = 0; 119 + var cp_count: usize = 0; 120 + 121 + while (grapheme_iter.next()) |result| { 122 + cp_count += 1; 123 + if (result.is_break) { 124 + // Found the first grapheme boundary 125 + grapheme_len = grapheme_iter.i; 124 126 break; 125 127 } 126 - prev_cp = next_cp.code; 127 - code = Key.multicodepoint; 128 - n += next_cp.len; 128 + } 129 + 130 + if (grapheme_len > 0) { 131 + n = grapheme_len; 132 + if (cp_count > 1) { 133 + code = Key.multicodepoint; 134 + } 129 135 } 130 136 131 137 break :blk .{ .codepoint = code, .text = input[0..n] }; ··· 731 737 732 738 test "parse: single xterm keypress" { 733 739 const alloc = testing.allocator_instance.allocator(); 734 - const grapheme_data = try Graphemes.init(alloc); 735 - defer grapheme_data.deinit(alloc); 736 740 const input = "a"; 737 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 741 + var parser: Parser = .{}; 738 742 const result = try parser.parse(input, alloc); 739 743 const expected_key: Key = .{ 740 744 .codepoint = 'a', ··· 748 752 749 753 test "parse: single xterm keypress backspace" { 750 754 const alloc = testing.allocator_instance.allocator(); 751 - const grapheme_data = try Graphemes.init(alloc); 752 - defer grapheme_data.deinit(alloc); 753 755 const input = "\x08"; 754 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 756 + var parser: Parser = .{}; 755 757 const result = try parser.parse(input, alloc); 756 758 const expected_key: Key = .{ 757 759 .codepoint = Key.backspace, ··· 764 766 765 767 test "parse: single xterm keypress with more buffer" { 766 768 const alloc = testing.allocator_instance.allocator(); 767 - const grapheme_data = try Graphemes.init(alloc); 768 - defer grapheme_data.deinit(alloc); 769 769 const input = "ab"; 770 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 770 + var parser: Parser = .{}; 771 771 const result = try parser.parse(input, alloc); 772 772 const expected_key: Key = .{ 773 773 .codepoint = 'a', ··· 782 782 783 783 test "parse: xterm escape keypress" { 784 784 const alloc = testing.allocator_instance.allocator(); 785 - const grapheme_data = try Graphemes.init(alloc); 786 - defer grapheme_data.deinit(alloc); 787 785 const input = "\x1b"; 788 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 786 + var parser: Parser = .{}; 789 787 const result = try parser.parse(input, alloc); 790 788 const expected_key: Key = .{ .codepoint = Key.escape }; 791 789 const expected_event: Event = .{ .key_press = expected_key }; ··· 796 794 797 795 test "parse: xterm ctrl+a" { 798 796 const alloc = testing.allocator_instance.allocator(); 799 - const grapheme_data = try Graphemes.init(alloc); 800 - defer grapheme_data.deinit(alloc); 801 797 const input = "\x01"; 802 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 798 + var parser: Parser = .{}; 803 799 const result = try parser.parse(input, alloc); 804 800 const expected_key: Key = .{ .codepoint = 'a', .mods = .{ .ctrl = true } }; 805 801 const expected_event: Event = .{ .key_press = expected_key }; ··· 810 806 811 807 test "parse: xterm alt+a" { 812 808 const alloc = testing.allocator_instance.allocator(); 813 - const grapheme_data = try Graphemes.init(alloc); 814 - defer grapheme_data.deinit(alloc); 815 809 const input = "\x1ba"; 816 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 810 + var parser: Parser = .{}; 817 811 const result = try parser.parse(input, alloc); 818 812 const expected_key: Key = .{ .codepoint = 'a', .mods = .{ .alt = true } }; 819 813 const expected_event: Event = .{ .key_press = expected_key }; ··· 824 818 825 819 test "parse: xterm key up" { 826 820 const alloc = testing.allocator_instance.allocator(); 827 - const grapheme_data = try Graphemes.init(alloc); 828 - defer grapheme_data.deinit(alloc); 829 821 { 830 822 // normal version 831 823 const input = "\x1b[A"; 832 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 824 + var parser: Parser = .{}; 833 825 const result = try parser.parse(input, alloc); 834 826 const expected_key: Key = .{ .codepoint = Key.up }; 835 827 const expected_event: Event = .{ .key_press = expected_key }; ··· 841 833 { 842 834 // application keys version 843 835 const input = "\x1bOA"; 844 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 836 + var parser: Parser = .{}; 845 837 const result = try parser.parse(input, alloc); 846 838 const expected_key: Key = .{ .codepoint = Key.up }; 847 839 const expected_event: Event = .{ .key_press = expected_key }; ··· 853 845 854 846 test "parse: xterm shift+up" { 855 847 const alloc = testing.allocator_instance.allocator(); 856 - const grapheme_data = try Graphemes.init(alloc); 857 - defer grapheme_data.deinit(alloc); 858 848 const input = "\x1b[1;2A"; 859 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 849 + var parser: Parser = .{}; 860 850 const result = try parser.parse(input, alloc); 861 851 const expected_key: Key = .{ .codepoint = Key.up, .mods = .{ .shift = true } }; 862 852 const expected_event: Event = .{ .key_press = expected_key }; ··· 867 857 868 858 test "parse: xterm insert" { 869 859 const alloc = testing.allocator_instance.allocator(); 870 - const grapheme_data = try Graphemes.init(alloc); 871 - defer grapheme_data.deinit(alloc); 872 860 const input = "\x1b[2~"; 873 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 861 + var parser: Parser = .{}; 874 862 const result = try parser.parse(input, alloc); 875 863 const expected_key: Key = .{ .codepoint = Key.insert, .mods = .{} }; 876 864 const expected_event: Event = .{ .key_press = expected_key }; ··· 881 869 882 870 test "parse: paste_start" { 883 871 const alloc = testing.allocator_instance.allocator(); 884 - const grapheme_data = try Graphemes.init(alloc); 885 - defer grapheme_data.deinit(alloc); 886 872 const input = "\x1b[200~"; 887 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 873 + var parser: Parser = .{}; 888 874 const result = try parser.parse(input, alloc); 889 875 const expected_event: Event = .paste_start; 890 876 ··· 894 880 895 881 test "parse: paste_end" { 896 882 const alloc = testing.allocator_instance.allocator(); 897 - const grapheme_data = try Graphemes.init(alloc); 898 - defer grapheme_data.deinit(alloc); 899 883 const input = "\x1b[201~"; 900 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 884 + var parser: Parser = .{}; 901 885 const result = try parser.parse(input, alloc); 902 886 const expected_event: Event = .paste_end; 903 887 ··· 907 891 908 892 test "parse: osc52 paste" { 909 893 const alloc = testing.allocator_instance.allocator(); 910 - const grapheme_data = try Graphemes.init(alloc); 911 - defer grapheme_data.deinit(alloc); 912 894 const input = "\x1b]52;c;b3NjNTIgcGFzdGU=\x1b\\"; 913 895 const expected_text = "osc52 paste"; 914 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 896 + var parser: Parser = .{}; 915 897 const result = try parser.parse(input, alloc); 916 898 917 899 try testing.expectEqual(25, result.n); ··· 926 908 927 909 test "parse: focus_in" { 928 910 const alloc = testing.allocator_instance.allocator(); 929 - const grapheme_data = try Graphemes.init(alloc); 930 - defer grapheme_data.deinit(alloc); 931 911 const input = "\x1b[I"; 932 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 912 + var parser: Parser = .{}; 933 913 const result = try parser.parse(input, alloc); 934 914 const expected_event: Event = .focus_in; 935 915 ··· 939 919 940 920 test "parse: focus_out" { 941 921 const alloc = testing.allocator_instance.allocator(); 942 - const grapheme_data = try Graphemes.init(alloc); 943 - defer grapheme_data.deinit(alloc); 944 922 const input = "\x1b[O"; 945 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 923 + var parser: Parser = .{}; 946 924 const result = try parser.parse(input, alloc); 947 925 const expected_event: Event = .focus_out; 948 926 ··· 952 930 953 931 test "parse: kitty: shift+a without text reporting" { 954 932 const alloc = testing.allocator_instance.allocator(); 955 - const grapheme_data = try Graphemes.init(alloc); 956 - defer grapheme_data.deinit(alloc); 957 933 const input = "\x1b[97:65;2u"; 958 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 934 + var parser: Parser = .{}; 959 935 const result = try parser.parse(input, alloc); 960 936 const expected_key: Key = .{ 961 937 .codepoint = 'a', ··· 971 947 972 948 test "parse: kitty: alt+shift+a without text reporting" { 973 949 const alloc = testing.allocator_instance.allocator(); 974 - const grapheme_data = try Graphemes.init(alloc); 975 - defer grapheme_data.deinit(alloc); 976 950 const input = "\x1b[97:65;4u"; 977 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 951 + var parser: Parser = .{}; 978 952 const result = try parser.parse(input, alloc); 979 953 const expected_key: Key = .{ 980 954 .codepoint = 'a', ··· 989 963 990 964 test "parse: kitty: a without text reporting" { 991 965 const alloc = testing.allocator_instance.allocator(); 992 - const grapheme_data = try Graphemes.init(alloc); 993 - defer grapheme_data.deinit(alloc); 994 966 const input = "\x1b[97u"; 995 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 967 + var parser: Parser = .{}; 996 968 const result = try parser.parse(input, alloc); 997 969 const expected_key: Key = .{ 998 970 .codepoint = 'a', ··· 1005 977 1006 978 test "parse: kitty: release event" { 1007 979 const alloc = testing.allocator_instance.allocator(); 1008 - const grapheme_data = try Graphemes.init(alloc); 1009 - defer grapheme_data.deinit(alloc); 1010 980 const input = "\x1b[97;1:3u"; 1011 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 981 + var parser: Parser = .{}; 1012 982 const result = try parser.parse(input, alloc); 1013 983 const expected_key: Key = .{ 1014 984 .codepoint = 'a', ··· 1021 991 1022 992 test "parse: single codepoint" { 1023 993 const alloc = testing.allocator_instance.allocator(); 1024 - const grapheme_data = try Graphemes.init(alloc); 1025 - defer grapheme_data.deinit(alloc); 1026 994 const input = "🙂"; 1027 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 995 + var parser: Parser = .{}; 1028 996 const result = try parser.parse(input, alloc); 1029 997 const expected_key: Key = .{ 1030 998 .codepoint = 0x1F642, ··· 1038 1006 1039 1007 test "parse: single codepoint with more in buffer" { 1040 1008 const alloc = testing.allocator_instance.allocator(); 1041 - const grapheme_data = try Graphemes.init(alloc); 1042 - defer grapheme_data.deinit(alloc); 1043 1009 const input = "🙂a"; 1044 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 1010 + var parser: Parser = .{}; 1045 1011 const result = try parser.parse(input, alloc); 1046 1012 const expected_key: Key = .{ 1047 1013 .codepoint = 0x1F642, ··· 1055 1021 1056 1022 test "parse: multiple codepoint grapheme" { 1057 1023 const alloc = testing.allocator_instance.allocator(); 1058 - const grapheme_data = try Graphemes.init(alloc); 1059 - defer grapheme_data.deinit(alloc); 1060 1024 const input = "👩‍🚀"; 1061 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 1025 + var parser: Parser = .{}; 1062 1026 const result = try parser.parse(input, alloc); 1063 1027 const expected_key: Key = .{ 1064 1028 .codepoint = Key.multicodepoint, ··· 1072 1036 1073 1037 test "parse: multiple codepoint grapheme with more after" { 1074 1038 const alloc = testing.allocator_instance.allocator(); 1075 - const grapheme_data = try Graphemes.init(alloc); 1076 - defer grapheme_data.deinit(alloc); 1077 1039 const input = "👩‍🚀abc"; 1078 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 1040 + var parser: Parser = .{}; 1079 1041 const result = try parser.parse(input, alloc); 1080 1042 const expected_key: Key = .{ 1081 1043 .codepoint = Key.multicodepoint, ··· 1088 1050 try testing.expectEqual(expected_key.codepoint, actual.codepoint); 1089 1051 } 1090 1052 1053 + test "parse: flag emoji" { 1054 + const alloc = testing.allocator_instance.allocator(); 1055 + const input = "🇺🇸"; 1056 + var parser: Parser = .{}; 1057 + const result = try parser.parse(input, alloc); 1058 + const expected_key: Key = .{ 1059 + .codepoint = Key.multicodepoint, 1060 + .text = input, 1061 + }; 1062 + const expected_event: Event = .{ .key_press = expected_key }; 1063 + 1064 + try testing.expectEqual(input.len, result.n); 1065 + try testing.expectEqual(expected_event, result.event); 1066 + } 1067 + 1068 + test "parse: combining mark" { 1069 + const alloc = testing.allocator_instance.allocator(); 1070 + // a with combining acute accent (NFD form) 1071 + const input = "a\u{0301}"; 1072 + var parser: Parser = .{}; 1073 + const result = try parser.parse(input, alloc); 1074 + const expected_key: Key = .{ 1075 + .codepoint = Key.multicodepoint, 1076 + .text = input, 1077 + }; 1078 + const expected_event: Event = .{ .key_press = expected_key }; 1079 + 1080 + try testing.expectEqual(input.len, result.n); 1081 + try testing.expectEqual(expected_event, result.event); 1082 + } 1083 + 1084 + test "parse: skin tone emoji" { 1085 + const alloc = testing.allocator_instance.allocator(); 1086 + const input = "👋🏿"; 1087 + var parser: Parser = .{}; 1088 + const result = try parser.parse(input, alloc); 1089 + const expected_key: Key = .{ 1090 + .codepoint = Key.multicodepoint, 1091 + .text = input, 1092 + }; 1093 + const expected_event: Event = .{ .key_press = expected_key }; 1094 + 1095 + try testing.expectEqual(input.len, result.n); 1096 + try testing.expectEqual(expected_event, result.event); 1097 + } 1098 + 1099 + test "parse: text variation selector" { 1100 + const alloc = testing.allocator_instance.allocator(); 1101 + // Heavy black heart with text variation selector 1102 + const input = "❤︎"; 1103 + var parser: Parser = .{}; 1104 + const result = try parser.parse(input, alloc); 1105 + const expected_key: Key = .{ 1106 + .codepoint = Key.multicodepoint, 1107 + .text = input, 1108 + }; 1109 + const expected_event: Event = .{ .key_press = expected_key }; 1110 + 1111 + try testing.expectEqual(input.len, result.n); 1112 + try testing.expectEqual(expected_event, result.event); 1113 + } 1114 + 1115 + test "parse: keycap sequence" { 1116 + const alloc = testing.allocator_instance.allocator(); 1117 + const input = "1️⃣"; 1118 + var parser: Parser = .{}; 1119 + const result = try parser.parse(input, alloc); 1120 + const expected_key: Key = .{ 1121 + .codepoint = Key.multicodepoint, 1122 + .text = input, 1123 + }; 1124 + const expected_event: Event = .{ .key_press = expected_key }; 1125 + 1126 + try testing.expectEqual(input.len, result.n); 1127 + try testing.expectEqual(expected_event, result.event); 1128 + } 1129 + 1091 1130 test "parse(csi): kitty multi cursor" { 1092 1131 var buf: [1]u8 = undefined; 1093 1132 { ··· 1230 1269 1231 1270 test "parse: disambiguate shift + space" { 1232 1271 const alloc = testing.allocator_instance.allocator(); 1233 - const grapheme_data = try Graphemes.init(alloc); 1234 - defer grapheme_data.deinit(alloc); 1235 1272 const input = "\x1b[32;2u"; 1236 - var parser: Parser = .{ .grapheme_data = &grapheme_data }; 1273 + var parser: Parser = .{}; 1237 1274 const result = try parser.parse(input, alloc); 1238 1275 const expected_key: Key = .{ 1239 1276 .codepoint = ' ',
+68 -11
src/Unicode.zig
··· 1 1 const std = @import("std"); 2 - const Graphemes = @import("Graphemes"); 3 - const DisplayWidth = @import("DisplayWidth"); 2 + const uucode = @import("uucode"); 4 3 5 - /// A thin wrapper around zg data 4 + /// A thin wrapper around Unicode data - no longer needs allocation with uucode 6 5 const Unicode = @This(); 7 6 8 - width_data: DisplayWidth, 9 - 10 7 /// initialize all unicode data vaxis may possibly need 8 + /// With uucode, no initialization is needed but we keep this for API compatibility 11 9 pub fn init(alloc: std.mem.Allocator) !Unicode { 12 - return .{ 13 - .width_data = try DisplayWidth.init(alloc), 14 - }; 10 + _ = alloc; 11 + return .{}; 15 12 } 16 13 17 14 /// free all data 15 + /// With uucode, no deinitialization is needed but we keep this for API compatibility 18 16 pub fn deinit(self: *const Unicode, alloc: std.mem.Allocator) void { 19 - self.width_data.deinit(alloc); 17 + _ = self; 18 + _ = alloc; 20 19 } 21 20 21 + // Old API-compatible Grapheme value 22 + pub const Grapheme = struct { 23 + start: usize, 24 + len: usize, 25 + 26 + pub fn bytes(self: Grapheme, str: []const u8) []const u8 { 27 + return str[self.start .. self.start + self.len]; 28 + } 29 + }; 30 + 31 + // Old API-compatible iterator that yields Grapheme with .len and .bytes() 32 + pub const GraphemeIterator = struct { 33 + str: []const u8, 34 + inner: uucode.grapheme.Iterator(uucode.utf8.Iterator), 35 + start: usize = 0, 36 + prev_break: bool = true, 37 + 38 + pub fn init(str: []const u8) GraphemeIterator { 39 + return .{ 40 + .str = str, 41 + .inner = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)), 42 + }; 43 + } 44 + 45 + pub fn next(self: *GraphemeIterator) ?Grapheme { 46 + while (self.inner.next()) |res| { 47 + // When leaving a break and entering a non-break, set the start of a cluster 48 + if (self.prev_break and !res.is_break) { 49 + const cp_len: usize = std.unicode.utf8CodepointSequenceLength(res.cp) catch 1; 50 + self.start = self.inner.i - cp_len; 51 + } 52 + 53 + // A break marks the end of the current grapheme 54 + if (res.is_break) { 55 + const end = self.inner.i; 56 + const s = self.start; 57 + self.start = end; 58 + self.prev_break = true; 59 + return .{ .start = s, .len = end - s }; 60 + } 61 + 62 + self.prev_break = false; 63 + } 64 + 65 + // Flush the last grapheme if we ended mid-cluster 66 + if (!self.prev_break and self.start < self.str.len) { 67 + const s = self.start; 68 + const len = self.str.len - s; 69 + self.start = self.str.len; 70 + self.prev_break = true; 71 + return .{ .start = s, .len = len }; 72 + } 73 + 74 + return null; 75 + } 76 + }; 77 + 22 78 /// creates a grapheme iterator based on str 23 - pub fn graphemeIterator(self: *const Unicode, str: []const u8) Graphemes.Iterator { 24 - return self.width_data.graphemes.iterator(str); 79 + pub fn graphemeIterator(self: *const Unicode, str: []const u8) GraphemeIterator { 80 + _ = self; 81 + return GraphemeIterator.init(str); 25 82 }
+2 -2
src/Vaxis.zig
··· 414 414 if (cell.char.width != 0) break :blk cell.char.width; 415 415 416 416 const method: gwidth.Method = self.caps.unicode; 417 - const width: u16 = @intCast(gwidth.gwidth(cell.char.grapheme, method, &self.unicode.width_data)); 417 + const width: u16 = @intCast(gwidth.gwidth(cell.char.grapheme, method)); 418 418 break :blk @max(1, width); 419 419 }; 420 420 defer { ··· 1149 1149 if (cell.char.width != 0) break :blk cell.char.width; 1150 1150 1151 1151 const method: gwidth.Method = self.caps.unicode; 1152 - const width = gwidth.gwidth(cell.char.grapheme, method, &self.unicode.width_data); 1152 + const width = gwidth.gwidth(cell.char.grapheme, method); 1153 1153 break :blk @max(1, width); 1154 1154 }; 1155 1155 defer {
+1 -1
src/Window.zig
··· 207 207 208 208 /// returns the width of the grapheme. This depends on the terminal capabilities 209 209 pub fn gwidth(self: Window, str: []const u8) u16 { 210 - return gw.gwidth(str, self.screen.width_method, &self.unicode.width_data); 210 + return gw.gwidth(str, self.screen.width_method); 211 211 } 212 212 213 213 /// fills the window with the provided cell
+172 -35
src/gwidth.zig
··· 1 1 const std = @import("std"); 2 2 const unicode = std.unicode; 3 3 const testing = std.testing; 4 - const DisplayWidth = @import("DisplayWidth"); 5 - const code_point = @import("code_point"); 4 + const uucode = @import("uucode"); 6 5 7 6 /// the method to use when calculating the width of a grapheme 8 7 pub const Method = enum { ··· 11 10 no_zwj, 12 11 }; 13 12 13 + /// Calculate width from east asian width property and Unicode properties 14 + fn eawToWidth(cp: u21, eaw: uucode.types.EastAsianWidth) i16 { 15 + // Based on wcwidth implementation 16 + // Control characters 17 + if (cp == 0) return 0; 18 + if (cp < 32 or (cp >= 0x7f and cp < 0xa0)) return -1; 19 + 20 + // Use general category for comprehensive zero-width detection 21 + const gc = uucode.get(.general_category, cp); 22 + switch (gc) { 23 + .mark_nonspacing, .mark_enclosing => return 0, 24 + else => {}, 25 + } 26 + 27 + // Additional zero-width characters not covered by general category 28 + if (cp == 0x00ad) return 0; // soft hyphen 29 + if (cp == 0x200b) return 0; // zero-width space 30 + if (cp == 0x200c) return 0; // zero-width non-joiner 31 + if (cp == 0x200d) return 0; // zero-width joiner 32 + if (cp == 0x2060) return 0; // word joiner 33 + if (cp == 0x034f) return 0; // combining grapheme joiner 34 + if (cp == 0xfeff) return 0; // zero-width no-break space (BOM) 35 + if (cp >= 0x180b and cp <= 0x180d) return 0; // Mongolian variation selectors 36 + if (cp >= 0xfe00 and cp <= 0xfe0f) return 0; // variation selectors 37 + if (cp >= 0xe0100 and cp <= 0xe01ef) return 0; // Plane-14 variation selectors 38 + 39 + // East Asian Width: fullwidth or wide = 2 40 + // ambiguous in East Asian context = 2, otherwise 1 41 + // halfwidth, narrow, or neutral = 1 42 + return switch (eaw) { 43 + .fullwidth, .wide => 2, 44 + else => 1, 45 + }; 46 + } 47 + 14 48 /// returns the width of the provided string, as measured by the method chosen 15 - pub fn gwidth(str: []const u8, method: Method, data: *const DisplayWidth) u16 { 49 + pub fn gwidth(str: []const u8, method: Method) u16 { 16 50 switch (method) { 17 51 .unicode => { 18 - return @intCast(data.strWidth(str)); 52 + var total: u16 = 0; 53 + var grapheme_iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(str)); 54 + 55 + var grapheme_start: usize = 0; 56 + var prev_break: bool = true; 57 + 58 + while (grapheme_iter.next()) |result| { 59 + if (prev_break and !result.is_break) { 60 + // Start of a new grapheme 61 + const cp_len: usize = std.unicode.utf8CodepointSequenceLength(result.cp) catch 1; 62 + grapheme_start = grapheme_iter.i - cp_len; 63 + } 64 + 65 + if (result.is_break) { 66 + // End of a grapheme - calculate its width 67 + const grapheme_end = grapheme_iter.i; 68 + const grapheme_bytes = str[grapheme_start..grapheme_end]; 69 + 70 + // Calculate grapheme width 71 + var g_iter = uucode.utf8.Iterator.init(grapheme_bytes); 72 + var width: i16 = 0; 73 + var has_emoji_vs: bool = false; 74 + var has_text_vs: bool = false; 75 + var has_emoji_presentation: bool = false; 76 + var ri_count: u8 = 0; 77 + 78 + while (g_iter.next()) |cp| { 79 + // Check for emoji variation selector (U+FE0F) 80 + if (cp == 0xfe0f) { 81 + has_emoji_vs = true; 82 + continue; 83 + } 84 + 85 + // Check for text variation selector (U+FE0E) 86 + if (cp == 0xfe0e) { 87 + has_text_vs = true; 88 + continue; 89 + } 90 + 91 + // Check if this codepoint has emoji presentation 92 + if (uucode.get(.is_emoji_presentation, cp)) { 93 + has_emoji_presentation = true; 94 + } 95 + 96 + // Count regional indicators (for flag emojis) 97 + if (cp >= 0x1F1E6 and cp <= 0x1F1FF) { 98 + ri_count += 1; 99 + } 100 + 101 + const eaw = uucode.get(.east_asian_width, cp); 102 + const w = eawToWidth(cp, eaw); 103 + // Take max of non-zero widths 104 + if (w > 0 and w > width) width = w; 105 + } 106 + 107 + // Handle variation selectors and emoji presentation 108 + if (has_text_vs) { 109 + // Text presentation explicit - keep width as-is (usually 1) 110 + width = @max(1, width); 111 + } else if (has_emoji_vs or has_emoji_presentation or ri_count == 2) { 112 + // Emoji presentation or flag pair - force width 2 113 + width = @max(2, width); 114 + } 115 + 116 + total += @max(0, width); 117 + 118 + grapheme_start = grapheme_end; 119 + } 120 + prev_break = result.is_break; 121 + } 122 + 123 + return total; 19 124 }, 20 125 .wcwidth => { 21 126 var total: u16 = 0; 22 - var iter: code_point.Iterator = .{ .bytes = str }; 127 + var iter = uucode.utf8.Iterator.init(str); 23 128 while (iter.next()) |cp| { 24 - const w: u16 = switch (cp.code) { 129 + const w: i16 = switch (cp) { 25 130 // undo an override in zg for emoji skintone selectors 26 - 0x1f3fb...0x1f3ff, 27 - => 2, 28 - else => @max(0, data.codePointWidth(cp.code)), 131 + 0x1f3fb...0x1f3ff => 2, 132 + else => blk: { 133 + const eaw = uucode.get(.east_asian_width, cp); 134 + break :blk eawToWidth(cp, eaw); 135 + }, 29 136 }; 30 - total += w; 137 + total += @intCast(@max(0, w)); 31 138 } 32 139 return total; 33 140 }, ··· 35 142 var iter = std.mem.splitSequence(u8, str, "\u{200D}"); 36 143 var result: u16 = 0; 37 144 while (iter.next()) |s| { 38 - result += gwidth(s, .unicode, data); 145 + result += gwidth(s, .unicode); 39 146 } 40 147 return result; 41 148 }, ··· 43 150 } 44 151 45 152 test "gwidth: a" { 46 - const alloc = testing.allocator_instance.allocator(); 47 - const data = try DisplayWidth.init(alloc); 48 - defer data.deinit(alloc); 49 - try testing.expectEqual(1, gwidth("a", .unicode, &data)); 50 - try testing.expectEqual(1, gwidth("a", .wcwidth, &data)); 51 - try testing.expectEqual(1, gwidth("a", .no_zwj, &data)); 153 + try testing.expectEqual(1, gwidth("a", .unicode)); 154 + try testing.expectEqual(1, gwidth("a", .wcwidth)); 155 + try testing.expectEqual(1, gwidth("a", .no_zwj)); 52 156 } 53 157 54 158 test "gwidth: emoji with ZWJ" { 55 - const alloc = testing.allocator_instance.allocator(); 56 - const data = try DisplayWidth.init(alloc); 57 - defer data.deinit(alloc); 58 - try testing.expectEqual(2, gwidth("👩‍🚀", .unicode, &data)); 59 - try testing.expectEqual(4, gwidth("👩‍🚀", .wcwidth, &data)); 60 - try testing.expectEqual(4, gwidth("👩‍🚀", .no_zwj, &data)); 159 + try testing.expectEqual(2, gwidth("👩‍🚀", .unicode)); 160 + try testing.expectEqual(4, gwidth("👩‍🚀", .wcwidth)); 161 + try testing.expectEqual(4, gwidth("👩‍🚀", .no_zwj)); 61 162 } 62 163 63 164 test "gwidth: emoji with VS16 selector" { 64 - const alloc = testing.allocator_instance.allocator(); 65 - const data = try DisplayWidth.init(alloc); 66 - defer data.deinit(alloc); 67 - try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .unicode, &data)); 68 - try testing.expectEqual(1, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .wcwidth, &data)); 69 - try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .no_zwj, &data)); 165 + try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .unicode)); 166 + try testing.expectEqual(1, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .wcwidth)); 167 + try testing.expectEqual(2, gwidth("\xE2\x9D\xA4\xEF\xB8\x8F", .no_zwj)); 70 168 } 71 169 72 170 test "gwidth: emoji with skin tone selector" { 73 - const alloc = testing.allocator_instance.allocator(); 74 - const data = try DisplayWidth.init(alloc); 75 - defer data.deinit(alloc); 76 - try testing.expectEqual(2, gwidth("👋🏿", .unicode, &data)); 77 - try testing.expectEqual(4, gwidth("👋🏿", .wcwidth, &data)); 78 - try testing.expectEqual(2, gwidth("👋🏿", .no_zwj, &data)); 171 + try testing.expectEqual(2, gwidth("👋🏿", .unicode)); 172 + try testing.expectEqual(4, gwidth("👋🏿", .wcwidth)); 173 + try testing.expectEqual(2, gwidth("👋🏿", .no_zwj)); 174 + } 175 + 176 + test "gwidth: zero-width space" { 177 + try testing.expectEqual(0, gwidth("\u{200B}", .unicode)); 178 + try testing.expectEqual(0, gwidth("\u{200B}", .wcwidth)); 179 + } 180 + 181 + test "gwidth: zero-width non-joiner" { 182 + try testing.expectEqual(0, gwidth("\u{200C}", .unicode)); 183 + try testing.expectEqual(0, gwidth("\u{200C}", .wcwidth)); 184 + } 185 + 186 + test "gwidth: combining marks" { 187 + // Hebrew combining mark 188 + try testing.expectEqual(0, gwidth("\u{05B0}", .unicode)); 189 + // Devanagari combining mark 190 + try testing.expectEqual(0, gwidth("\u{093C}", .unicode)); 191 + } 192 + 193 + test "gwidth: flag emoji (regional indicators)" { 194 + // US flag 🇺🇸 195 + try testing.expectEqual(2, gwidth("🇺🇸", .unicode)); 196 + // UK flag 🇬🇧 197 + try testing.expectEqual(2, gwidth("🇬🇧", .unicode)); 198 + } 199 + 200 + test "gwidth: text variation selector" { 201 + // U+2764 (heavy black heart) + U+FE0E (text variation selector) 202 + // Should be width 1 with text presentation 203 + try testing.expectEqual(1, gwidth("❤︎", .unicode)); 204 + } 205 + 206 + test "gwidth: keycap sequence" { 207 + // Digit 1 + U+FE0F + U+20E3 (combining enclosing keycap) 208 + // Should be width 2 209 + try testing.expectEqual(2, gwidth("1️⃣", .unicode)); 210 + } 211 + 212 + test "gwidth: base letter with combining mark" { 213 + // 'a' + combining acute accent (NFD form) 214 + // Should be width 1 (combining mark is zero-width) 215 + try testing.expectEqual(1, gwidth("á", .unicode)); 79 216 }
-2
src/main.zig
··· 26 26 pub const widgets = @import("widgets.zig"); 27 27 pub const gwidth = @import("gwidth.zig"); 28 28 pub const ctlseqs = @import("ctlseqs.zig"); 29 - pub const DisplayWidth = @import("DisplayWidth"); 30 29 pub const GraphemeCache = @import("GraphemeCache.zig"); 31 - pub const Graphemes = @import("Graphemes"); 32 30 pub const Event = @import("event.zig").Event; 33 31 pub const Unicode = @import("Unicode.zig"); 34 32
+2 -4
src/vxfw/vxfw.zig
··· 1 1 const std = @import("std"); 2 2 const vaxis = @import("../main.zig"); 3 + const uucode = @import("uucode"); 3 4 4 - const Graphemes = vaxis.Graphemes; 5 5 const testing = std.testing; 6 6 7 7 const assert = std.debug.assert; ··· 200 200 } 201 201 202 202 pub fn stringWidth(_: DrawContext, str: []const u8) usize { 203 - assert(DrawContext.unicode != null); // DrawContext not initialized 204 203 return vaxis.gwidth.gwidth( 205 204 str, 206 205 DrawContext.width_method, 207 - &DrawContext.unicode.?.width_data, 208 206 ); 209 207 } 210 208 211 - pub fn graphemeIterator(_: DrawContext, str: []const u8) Graphemes.Iterator { 209 + pub fn graphemeIterator(_: DrawContext, str: []const u8) vaxis.Unicode.GraphemeIterator { 212 210 assert(DrawContext.unicode != null); // DrawContext not initialized 213 211 return DrawContext.unicode.?.graphemeIterator(str); 214 212 }
+56 -23
src/widgets/TextView.zig
··· 1 1 const std = @import("std"); 2 2 const vaxis = @import("../main.zig"); 3 - const Graphemes = @import("Graphemes"); 4 - const DisplayWidth = @import("DisplayWidth"); 3 + const uucode = @import("uucode"); 5 4 const ScrollView = vaxis.widgets.ScrollView; 6 5 6 + /// Simple grapheme representation to replace Graphemes.Grapheme 7 + const Grapheme = struct { 8 + len: u16, 9 + offset: u32, 10 + }; 11 + 7 12 pub const BufferWriter = struct { 8 13 pub const Error = error{OutOfMemory}; 9 14 pub const Writer = std.io.GenericWriter(@This(), Error, write); 10 15 11 16 allocator: std.mem.Allocator, 12 17 buffer: *Buffer, 13 - gd: *const Graphemes, 14 - wd: *const DisplayWidth, 15 18 16 19 pub fn write(self: @This(), bytes: []const u8) Error!usize { 17 20 try self.buffer.append(self.allocator, .{ 18 21 .bytes = bytes, 19 - .gd = self.gd, 20 - .wd = self.wd, 21 22 }); 22 23 return bytes.len; 23 24 } ··· 33 34 34 35 pub const Content = struct { 35 36 bytes: []const u8, 36 - gd: *const Graphemes, 37 - wd: *const DisplayWidth, 38 37 }; 39 38 40 39 pub const Style = struct { ··· 45 44 46 45 pub const Error = error{OutOfMemory}; 47 46 48 - grapheme: std.MultiArrayList(Graphemes.Grapheme) = .{}, 47 + grapheme: std.MultiArrayList(Grapheme) = .{}, 49 48 content: std.ArrayListUnmanaged(u8) = .{}, 50 49 style_list: StyleList = .{}, 51 50 style_map: StyleMap = .{}, ··· 78 77 /// Appends content to the buffer. 79 78 pub fn append(self: *@This(), allocator: std.mem.Allocator, content: Content) Error!void { 80 79 var cols: usize = self.last_cols; 81 - var iter = Graphemes.Iterator.init(content.bytes, content.gd); 82 - while (iter.next()) |g| { 80 + var iter = uucode.grapheme.Iterator(uucode.utf8.Iterator).init(.init(content.bytes)); 81 + 82 + var grapheme_start: usize = 0; 83 + var prev_break: bool = true; 84 + 85 + while (iter.next()) |result| { 86 + if (prev_break and !result.is_break) { 87 + // Start of a new grapheme 88 + grapheme_start = iter.i - std.unicode.utf8CodepointSequenceLength(result.cp) catch 1; 89 + } 90 + 91 + if (result.is_break) { 92 + // End of a grapheme 93 + const grapheme_end = iter.i; 94 + const grapheme_len = grapheme_end - grapheme_start; 95 + 96 + try self.grapheme.append(allocator, .{ 97 + .len = @intCast(grapheme_len), 98 + .offset = @intCast(self.content.items.len + grapheme_start), 99 + }); 100 + 101 + const cluster = content.bytes[grapheme_start..grapheme_end]; 102 + if (std.mem.eql(u8, cluster, "\n")) { 103 + self.cols = @max(self.cols, cols); 104 + cols = 0; 105 + } else { 106 + // Calculate width using gwidth 107 + const w = vaxis.gwidth.gwidth(cluster, .unicode); 108 + cols +|= w; 109 + } 110 + 111 + grapheme_start = grapheme_end; 112 + } 113 + prev_break = result.is_break; 114 + } 115 + 116 + // Flush the last grapheme if we ended mid-cluster 117 + if (!prev_break and grapheme_start < content.bytes.len) { 118 + const grapheme_len = content.bytes.len - grapheme_start; 119 + 83 120 try self.grapheme.append(allocator, .{ 84 - .len = g.len, 85 - .offset = @as(u32, @intCast(self.content.items.len)) + g.offset, 121 + .len = @intCast(grapheme_len), 122 + .offset = @intCast(self.content.items.len + grapheme_start), 86 123 }); 87 - const cluster = g.bytes(content.bytes); 88 - if (std.mem.eql(u8, cluster, "\n")) { 89 - self.cols = @max(self.cols, cols); 90 - cols = 0; 91 - continue; 124 + 125 + const cluster = content.bytes[grapheme_start..]; 126 + if (!std.mem.eql(u8, cluster, "\n")) { 127 + const w = vaxis.gwidth.gwidth(cluster, .unicode); 128 + cols +|= w; 92 129 } 93 - cols +|= content.wd.strWidth(cluster); 94 130 } 131 + 95 132 try self.content.appendSlice(allocator, content.bytes); 96 133 self.last_cols = cols; 97 134 self.cols = @max(self.cols, cols); ··· 123 160 pub fn writer( 124 161 self: *@This(), 125 162 allocator: std.mem.Allocator, 126 - gd: *const Graphemes, 127 - wd: *const DisplayWidth, 128 163 ) BufferWriter.Writer { 129 164 return .{ 130 165 .context = .{ 131 166 .allocator = allocator, 132 167 .buffer = self, 133 - .gd = gd, 134 - .wd = wd, 135 168 }, 136 169 }; 137 170 }
+1 -1
src/widgets/View.zig
··· 141 141 142 142 /// Returns the width of the grapheme. This depends on the terminal capabilities 143 143 pub fn gwidth(self: View, str: []const u8) u16 { 144 - return gw.gwidth(str, self.screen.width_method, &self.unicode.width_data); 144 + return gw.gwidth(str, self.screen.width_method); 145 145 } 146 146 147 147 /// Fills the View with the provided cell
+4 -6
src/widgets/terminal/Terminal.zig
··· 10 10 const vaxis = @import("../../main.zig"); 11 11 const Winsize = vaxis.Winsize; 12 12 const Screen = @import("Screen.zig"); 13 - const DisplayWidth = @import("DisplayWidth"); 14 13 const Key = vaxis.Key; 15 14 const Queue = vaxis.Queue(Event, 16); 16 - const code_point = @import("code_point"); 17 15 const key = @import("key.zig"); 18 16 19 17 pub const Event = union(enum) { ··· 279 277 switch (event) { 280 278 .print => |str| { 281 279 var iter = self.unicode.graphemeIterator(str); 282 - while (iter.next()) |g| { 283 - const gr = g.bytes(str); 280 + while (iter.next()) |grapheme| { 281 + const gr = grapheme.bytes(str); 284 282 // TODO: use actual instead of .unicode 285 - const w = vaxis.gwidth.gwidth(gr, .unicode, &self.unicode.width_data); 283 + const w = vaxis.gwidth.gwidth(gr, .unicode); 286 284 try self.back_screen.print(gr, @truncate(w), self.mode.autowrap); 287 285 } 288 286 }, ··· 498 496 var iter = seq.iterator(u16); 499 497 const n = iter.next() orelse 1; 500 498 // TODO: maybe not .unicode 501 - const w = vaxis.gwidth.gwidth(self.last_printed, .unicode, &self.unicode.width_data); 499 + const w = vaxis.gwidth.gwidth(self.last_printed, .unicode); 502 500 var i: usize = 0; 503 501 while (i < n) : (i += 1) { 504 502 try self.back_screen.print(self.last_printed, @truncate(w), self.mode.autowrap);