Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

speakup: Turn i18n files utf-8

i18n currently assume latin1 encoding, which is not enough for most
languages.

This separates out the utf-8 processing of /dev/synthu, and uses it for
a new synth_writeu, which we make synth_printf now use. This has the
effect of making all the i18 messages processed in utf-8.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20240327115051.ng7xqnhozyii4ik2@begin
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Samuel Thibault and committed by
Greg Kroah-Hartman
4bc4634e fec50db7

+101 -50
+12 -45
drivers/accessibility/speakup/devsynth.c
··· 39 39 static ssize_t speakup_file_writeu(struct file *fp, const char __user *buffer, 40 40 size_t nbytes, loff_t *ppos) 41 41 { 42 - size_t count = nbytes, want; 42 + size_t count = nbytes, consumed, want; 43 43 const char __user *ptr = buffer; 44 44 size_t bytes; 45 45 unsigned long flags; 46 46 unsigned char buf[256]; 47 47 u16 ubuf[256]; 48 - size_t in, in2, out; 48 + size_t in, out; 49 49 50 50 if (!synth) 51 51 return -ENODEV; ··· 58 58 return -EFAULT; 59 59 60 60 /* Convert to u16 */ 61 - for (in = 0, out = 0; in < bytes; in++) { 62 - unsigned char c = buf[in]; 63 - int nbytes = 8 - fls(c ^ 0xff); 64 - u32 value; 61 + for (in = 0, out = 0; in < bytes; in += consumed) { 62 + s32 value; 65 63 66 - switch (nbytes) { 67 - case 8: /* 0xff */ 68 - case 7: /* 0xfe */ 69 - case 1: /* 0x80 */ 70 - /* Invalid, drop */ 71 - goto drop; 64 + value = synth_utf8_get(buf + in, bytes - in, &consumed, &want); 65 + if (value == -1) { 66 + /* Invalid or incomplete */ 72 67 73 - case 0: 74 - /* ASCII, copy */ 75 - ubuf[out++] = c; 76 - continue; 77 - 78 - default: 79 - /* 2..6-byte UTF-8 */ 80 - 81 - if (bytes - in < nbytes) { 68 + if (want > bytes - in) 82 69 /* We don't have it all yet, stop here 83 70 * and wait for the rest 84 71 */ 85 72 bytes = in; 86 - want = nbytes; 87 - continue; 88 - } 89 73 90 - /* First byte */ 91 - value = c & ((1u << (7 - nbytes)) - 1); 92 - 93 - /* Other bytes */ 94 - for (in2 = 2; in2 <= nbytes; in2++) { 95 - c = buf[in + 1]; 96 - if ((c & 0xc0) != 0x80) { 97 - /* Invalid, drop the head */ 98 - want = 1; 99 - goto drop; 100 - } 101 - value = (value << 6) | (c & 0x3f); 102 - in++; 103 - } 104 - 105 - if (value < 0x10000) 106 - ubuf[out++] = value; 107 - want = 1; 108 - break; 74 + continue; 109 75 } 110 - drop: 111 - /* empty statement */; 76 + 77 + if (value < 0x10000) 78 + ubuf[out++] = value; 112 79 } 113 80 114 81 count -= bytes;
+2
drivers/accessibility/speakup/speakup.h
··· 76 76 void speakup_cancel_paste(void); 77 77 void speakup_register_devsynth(void); 78 78 void speakup_unregister_devsynth(void); 79 + s32 synth_utf8_get(const char *buf, size_t count, size_t *consumed, size_t *want); 79 80 void synth_write(const char *buf, size_t count); 81 + void synth_writeu(const char *buf, size_t count); 80 82 int synth_supports_indexing(void); 81 83 82 84 extern struct vc_data *spk_sel_cons;
+87 -5
drivers/accessibility/speakup/synth.c
··· 217 217 synth_start(); 218 218 } 219 219 220 + /* Consume one utf-8 character from buf (that contains up to count bytes), 221 + * returns the unicode codepoint if valid, -1 otherwise. 222 + * In all cases, returns the number of consumed bytes in *consumed, 223 + * and the minimum number of bytes that would be needed for the next character 224 + * in *want. 225 + */ 226 + s32 synth_utf8_get(const char *buf, size_t count, size_t *consumed, size_t *want) 227 + { 228 + unsigned char c = buf[0]; 229 + int nbytes = 8 - fls(c ^ 0xff); 230 + u32 value; 231 + size_t i; 232 + 233 + switch (nbytes) { 234 + case 8: /* 0xff */ 235 + case 7: /* 0xfe */ 236 + case 1: /* 0x80 */ 237 + /* Invalid, drop */ 238 + *consumed = 1; 239 + *want = 1; 240 + return -1; 241 + 242 + case 0: 243 + /* ASCII, take as such */ 244 + *consumed = 1; 245 + *want = 1; 246 + return c; 247 + 248 + default: 249 + /* 2..6-byte UTF-8 */ 250 + 251 + if (count < nbytes) { 252 + /* We don't have it all */ 253 + *consumed = 0; 254 + *want = nbytes; 255 + return -1; 256 + } 257 + 258 + /* First byte */ 259 + value = c & ((1u << (7 - nbytes)) - 1); 260 + 261 + /* Other bytes */ 262 + for (i = 1; i < nbytes; i++) { 263 + c = buf[i]; 264 + if ((c & 0xc0) != 0x80) { 265 + /* Invalid, drop the head */ 266 + *consumed = i; 267 + *want = 1; 268 + return -1; 269 + } 270 + value = (value << 6) | (c & 0x3f); 271 + } 272 + 273 + *consumed = nbytes; 274 + *want = 1; 275 + return value; 276 + } 277 + } 278 + 279 + void synth_writeu(const char *buf, size_t count) 280 + { 281 + size_t i, consumed, want; 282 + 283 + /* Convert to u16 */ 284 + for (i = 0; i < count; i++) { 285 + s32 value; 286 + 287 + value = synth_utf8_get(buf + i, count - i, &consumed, &want); 288 + if (value == -1) { 289 + /* Invalid or incomplete */ 290 + 291 + if (want > count - i) 292 + /* We don't have it all, stop */ 293 + count = i; 294 + 295 + continue; 296 + } 297 + 298 + if (value < 0x10000) 299 + synth_buffer_add(value); 300 + } 301 + 302 + synth_start(); 303 + } 304 + 220 305 void synth_printf(const char *fmt, ...) 221 306 { 222 307 va_list args; 223 - unsigned char buf[160], *p; 308 + unsigned char buf[160]; 224 309 int r; 225 310 226 311 va_start(args, fmt); ··· 314 229 if (r > sizeof(buf) - 1) 315 230 r = sizeof(buf) - 1; 316 231 317 - p = buf; 318 - while (r--) 319 - synth_buffer_add(*p++); 320 - synth_start(); 232 + synth_writeu(buf, r); 321 233 } 322 234 EXPORT_SYMBOL_GPL(synth_printf); 323 235