parser: handle non-ascii input · rockorager.dev/libvaxis@0f12881

+61 -3

2 changed files

expand all

build.zig

src

parser.zig

build.zig

··· 38 38 .target = target, 39 39 .optimize = optimize, 40 40 }); 41 + lib_unit_tests.root_module.addImport("ziglyph", ziglyph.module("ziglyph")); 41 42 42 43 const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); 43 44

+60 -3

src/parser.zig

··· 2 2 const testing = std.testing; 3 3 const Event = @import("event.zig").Event; 4 4 const Key = @import("Key.zig"); 5 + const CodePointIterator = @import("ziglyph").CodePointIterator; 6 + const graphemeBreak = @import("ziglyph").graphemeBreak; 5 7 6 8 const log = std.log.scoped(.parser); 7 9 ··· 77 79 state = .escape; 78 80 continue; 79 81 }, 80 - 0x20...0x7E => .{ .codepoint = b }, 82 + // 0x20...0x7E => .{ .codepoint = b }, 81 83 0x7F => .{ .codepoint = Key.backspace }, 82 - // TODO: graphemes 83 - else => .{ .codepoint = b }, 84 + else => blk: { 85 + // TODO: iterate codepoints to find a complete grapheme. 86 + // For now we are just taking the first codepoint and 87 + // throwing a warning. I think we'll end up mapping a 88 + // u21 to a look-aside table of graphemes, I just need 89 + // to implement that table somewhere and give access to 90 + // it here. 91 + var iter: CodePointIterator = .{ .bytes = input[i..] }; 92 + // return null if we don't have a valid codepoint 93 + const cp = iter.next() orelse return .{ .event = null, .n = 0 }; 94 + if (iter.next()) |next_cp| { 95 + var break_state: u3 = 0; 96 + if (!graphemeBreak(cp.code, next_cp.code, &break_state)) { 97 + log.warn("grapheme support not implemented yet", .{}); 98 + } 99 + } 100 + i += cp.len - 1; 101 + break :blk .{ .codepoint = cp.code }; 102 + }, 84 103 }; 85 104 return .{ 86 105 .event = .{ .key_press = key }, ··· 512 531 try testing.expectEqual(5, result.n); 513 532 try testing.expectEqual(expected_event, result.event); 514 533 } 534 + 535 + test "parse: single codepoint" { 536 + const input = "🙂"; 537 + const result = try parse(input); 538 + const expected_key: Key = .{ 539 + .codepoint = 0x1F642, 540 + }; 541 + const expected_event: Event = .{ .key_press = expected_key }; 542 + 543 + try testing.expectEqual(4, result.n); 544 + try testing.expectEqual(expected_event, result.event); 545 + } 546 + 547 + test "parse: single codepoint with more in buffer" { 548 + const input = "🙂a"; 549 + const result = try parse(input); 550 + const expected_key: Key = .{ 551 + .codepoint = 0x1F642, 552 + }; 553 + const expected_event: Event = .{ .key_press = expected_key }; 554 + 555 + try testing.expectEqual(4, result.n); 556 + try testing.expectEqual(expected_event, result.event); 557 + } 558 + 559 + test "parse: multiple codepoint grapheme" { 560 + // TODO: this test is passing but throws a warning. Not sure how we'll 561 + // handle graphemes yet 562 + const input = "👩‍🚀"; 563 + const result = try parse(input); 564 + const expected_key: Key = .{ 565 + .codepoint = 0x1F469, 566 + }; 567 + const expected_event: Event = .{ .key_press = expected_key }; 568 + 569 + try testing.expectEqual(4, result.n); 570 + try testing.expectEqual(expected_event, result.event); 571 + }

Configure Feed

Configure Feed