Template repo for tiny cross-platform apps that can be modified on phone, tablet or computer.
at main 47 lines 1.5 kB view raw
1local utf8 = require 'utf8' 2 3local my_utf8 = {} 4 5function my_utf8.offset(s, pos) 6 if pos == 1 then return 1 end 7 local result = utf8.offset(s, pos) 8 if result == nil then 9 assert(false, ('my_utf8.offset(%d) called on a string of length %d (byte size %d); this is likely a failure to handle utf8\n\n^%s$\n'):format(pos, utf8.len(s), #s, s)) 10 end 11 return result 12end 13 14function my_utf8.codepoint(s, pos) 15 return my_utf8.sub(s, pos, pos+1) 16end 17 18-- end_pos is exclusive 19function my_utf8.sub(s, start_pos, end_pos) 20 local start_offset = my_utf8.offset(s, start_pos) 21 local end_offset = my_utf8.offset(s, end_pos) 22 return s:sub(start_offset, end_offset-1) 23end 24 25function my_utf8.match_at(s, pos, pat) 26 return my_utf8.codepoint(s, pos):match(pat) 27end 28 29-- create a new iterator for s which provides the index and UTF-8 bytes corresponding to each codepoint 30function my_utf8.chars(s, startpos) 31 local next_pos = startpos or 1 -- in code points 32 local next_offset = utf8.offset(s, next_pos) -- in bytes 33 return function() 34 assert(next_offset) -- never call the iterator after it returns nil 35 local curr_pos = next_pos 36 next_pos = next_pos+1 37 local curr_offset = next_offset 38 if curr_offset > #s then return end 39 local codepoint = utf8.codepoint(s, curr_offset) 40 next_offset = utf8.offset(s, 2, next_offset) 41 assert(next_offset) 42 local curr_char = s:sub(curr_offset, next_offset-1) 43 return curr_pos, codepoint, curr_char 44 end 45end 46 47return my_utf8