Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/fs/hfsplus/unicode.c
4 *
5 * Copyright (C) 2001
6 * Brad Boyer (flar@allandria.com)
7 * (C) 2003 Ardis Technologies <roman@ardistech.com>
8 *
9 * Handler routines for unicode strings
10 */
11
12#include <linux/types.h>
13#include <linux/nls.h>
14
15#include <kunit/visibility.h>
16
17#include "hfsplus_fs.h"
18#include "hfsplus_raw.h"
19
20/* Fold the case of a unicode char, given the 16 bit value */
21/* Returns folded char, or 0 if ignorable */
22static inline u16 case_fold(u16 c)
23{
24 u16 tmp;
25
26 tmp = hfsplus_case_fold_table[c >> 8];
27 if (tmp)
28 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
29 else
30 tmp = c;
31 return tmp;
32}
33
34/* Compare unicode strings, return values like normal strcmp */
35int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
36 const struct hfsplus_unistr *s2)
37{
38 u16 len1, len2, c1, c2;
39 const hfsplus_unichr *p1, *p2;
40
41 len1 = be16_to_cpu(s1->length);
42 len2 = be16_to_cpu(s2->length);
43 p1 = s1->unicode;
44 p2 = s2->unicode;
45
46 if (len1 > HFSPLUS_MAX_STRLEN) {
47 len1 = HFSPLUS_MAX_STRLEN;
48 pr_err("invalid length %u has been corrected to %d\n",
49 be16_to_cpu(s1->length), len1);
50 }
51
52 if (len2 > HFSPLUS_MAX_STRLEN) {
53 len2 = HFSPLUS_MAX_STRLEN;
54 pr_err("invalid length %u has been corrected to %d\n",
55 be16_to_cpu(s2->length), len2);
56 }
57
58 while (1) {
59 c1 = c2 = 0;
60
61 while (len1 && !c1) {
62 c1 = case_fold(be16_to_cpu(*p1));
63 p1++;
64 len1--;
65 }
66 while (len2 && !c2) {
67 c2 = case_fold(be16_to_cpu(*p2));
68 p2++;
69 len2--;
70 }
71
72 if (c1 != c2)
73 return (c1 < c2) ? -1 : 1;
74 if (!c1 && !c2)
75 return 0;
76 }
77}
78EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcasecmp);
79
80/* Compare names as a sequence of 16-bit unsigned integers */
81int hfsplus_strcmp(const struct hfsplus_unistr *s1,
82 const struct hfsplus_unistr *s2)
83{
84 u16 len1, len2, c1, c2;
85 const hfsplus_unichr *p1, *p2;
86 int len;
87
88 len1 = be16_to_cpu(s1->length);
89 len2 = be16_to_cpu(s2->length);
90 p1 = s1->unicode;
91 p2 = s2->unicode;
92
93 if (len1 > HFSPLUS_MAX_STRLEN) {
94 len1 = HFSPLUS_MAX_STRLEN;
95 pr_err("invalid length %u has been corrected to %d\n",
96 be16_to_cpu(s1->length), len1);
97 }
98
99 if (len2 > HFSPLUS_MAX_STRLEN) {
100 len2 = HFSPLUS_MAX_STRLEN;
101 pr_err("invalid length %u has been corrected to %d\n",
102 be16_to_cpu(s2->length), len2);
103 }
104
105 for (len = min(len1, len2); len > 0; len--) {
106 c1 = be16_to_cpu(*p1);
107 c2 = be16_to_cpu(*p2);
108 if (c1 != c2)
109 return c1 < c2 ? -1 : 1;
110 p1++;
111 p2++;
112 }
113
114 return len1 < len2 ? -1 :
115 len1 > len2 ? 1 : 0;
116}
117EXPORT_SYMBOL_IF_KUNIT(hfsplus_strcmp);
118
119#define Hangul_SBase 0xac00
120#define Hangul_LBase 0x1100
121#define Hangul_VBase 0x1161
122#define Hangul_TBase 0x11a7
123#define Hangul_SCount 11172
124#define Hangul_LCount 19
125#define Hangul_VCount 21
126#define Hangul_TCount 28
127#define Hangul_NCount (Hangul_VCount * Hangul_TCount)
128
129
130static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
131{
132 int i, s, e;
133
134 s = 1;
135 e = p[1];
136 if (!e || cc < p[s * 2] || cc > p[e * 2])
137 return NULL;
138 do {
139 i = (s + e) / 2;
140 if (cc > p[i * 2])
141 s = i + 1;
142 else if (cc < p[i * 2])
143 e = i - 1;
144 else
145 return hfsplus_compose_table + p[i * 2 + 1];
146 } while (s <= e);
147 return NULL;
148}
149
150static int hfsplus_uni2asc(struct super_block *sb,
151 const struct hfsplus_unistr *ustr,
152 int max_len, char *astr, int *len_p)
153{
154 const hfsplus_unichr *ip;
155 struct nls_table *nls = HFSPLUS_SB(sb)->nls;
156 u8 *op;
157 u16 cc, c0, c1;
158 u16 *ce1, *ce2;
159 int i, len, ustrlen, res, compose;
160
161 op = astr;
162 ip = ustr->unicode;
163
164 ustrlen = be16_to_cpu(ustr->length);
165 if (ustrlen > max_len) {
166 ustrlen = max_len;
167 pr_err("invalid length %u has been corrected to %d\n",
168 be16_to_cpu(ustr->length), ustrlen);
169 }
170
171 len = *len_p;
172 ce1 = NULL;
173 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
174
175 while (ustrlen > 0) {
176 c0 = be16_to_cpu(*ip++);
177 ustrlen--;
178 /* search for single decomposed char */
179 if (likely(compose))
180 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
181 if (ce1)
182 cc = ce1[0];
183 else
184 cc = 0;
185 if (cc) {
186 /* start of a possibly decomposed Hangul char */
187 if (cc != 0xffff)
188 goto done;
189 if (!ustrlen)
190 goto same;
191 c1 = be16_to_cpu(*ip) - Hangul_VBase;
192 if (c1 < Hangul_VCount) {
193 /* compose the Hangul char */
194 cc = (c0 - Hangul_LBase) * Hangul_VCount;
195 cc = (cc + c1) * Hangul_TCount;
196 cc += Hangul_SBase;
197 ip++;
198 ustrlen--;
199 if (!ustrlen)
200 goto done;
201 c1 = be16_to_cpu(*ip) - Hangul_TBase;
202 if (c1 > 0 && c1 < Hangul_TCount) {
203 cc += c1;
204 ip++;
205 ustrlen--;
206 }
207 goto done;
208 }
209 }
210 while (1) {
211 /* main loop for common case of not composed chars */
212 if (!ustrlen)
213 goto same;
214 c1 = be16_to_cpu(*ip);
215 if (likely(compose))
216 ce1 = hfsplus_compose_lookup(
217 hfsplus_compose_table, c1);
218 if (ce1)
219 break;
220 switch (c0) {
221 case 0:
222 c0 = 0x2400;
223 break;
224 case '/':
225 c0 = ':';
226 break;
227 }
228 res = nls->uni2char(c0, op, len);
229 if (res < 0) {
230 if (res == -ENAMETOOLONG)
231 goto out;
232 *op = '?';
233 res = 1;
234 }
235 op += res;
236 len -= res;
237 c0 = c1;
238 ip++;
239 ustrlen--;
240 }
241 ce2 = hfsplus_compose_lookup(ce1, c0);
242 if (ce2) {
243 i = 1;
244 while (i < ustrlen) {
245 ce1 = hfsplus_compose_lookup(ce2,
246 be16_to_cpu(ip[i]));
247 if (!ce1)
248 break;
249 i++;
250 ce2 = ce1;
251 }
252 cc = ce2[0];
253 if (cc) {
254 ip += i;
255 ustrlen -= i;
256 goto done;
257 }
258 }
259same:
260 switch (c0) {
261 case 0:
262 cc = 0x2400;
263 break;
264 case '/':
265 cc = ':';
266 break;
267 default:
268 cc = c0;
269 }
270done:
271 res = nls->uni2char(cc, op, len);
272 if (res < 0) {
273 if (res == -ENAMETOOLONG)
274 goto out;
275 *op = '?';
276 res = 1;
277 }
278 op += res;
279 len -= res;
280 }
281 res = 0;
282out:
283 *len_p = (char *)op - astr;
284 return res;
285}
286
287inline int hfsplus_uni2asc_str(struct super_block *sb,
288 const struct hfsplus_unistr *ustr, char *astr,
289 int *len_p)
290{
291 return hfsplus_uni2asc(sb, ustr, HFSPLUS_MAX_STRLEN, astr, len_p);
292}
293EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_str);
294
295inline int hfsplus_uni2asc_xattr_str(struct super_block *sb,
296 const struct hfsplus_attr_unistr *ustr,
297 char *astr, int *len_p)
298{
299 return hfsplus_uni2asc(sb, (const struct hfsplus_unistr *)ustr,
300 HFSPLUS_ATTR_MAX_STRLEN, astr, len_p);
301}
302EXPORT_SYMBOL_IF_KUNIT(hfsplus_uni2asc_xattr_str);
303
304/*
305 * Convert one or more ASCII characters into a single unicode character.
306 * Returns the number of ASCII characters corresponding to the unicode char.
307 */
308static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
309 wchar_t *uc)
310{
311 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
312 if (size <= 0) {
313 *uc = '?';
314 size = 1;
315 }
316 switch (*uc) {
317 case 0x2400:
318 *uc = 0;
319 break;
320 case ':':
321 *uc = '/';
322 break;
323 }
324 return size;
325}
326
327/* Decomposes a non-Hangul unicode character. */
328static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
329{
330 int off;
331
332 off = hfsplus_decompose_table[(uc >> 12) & 0xf];
333 if (off == 0 || off == 0xffff)
334 return NULL;
335
336 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
337 if (!off)
338 return NULL;
339
340 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
341 if (!off)
342 return NULL;
343
344 off = hfsplus_decompose_table[off + (uc & 0xf)];
345 *size = off & 3;
346 if (*size == 0)
347 return NULL;
348 return hfsplus_decompose_table + (off / 4);
349}
350
351/*
352 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
353 * precomposed Hangul, otherwise return the length of the decomposition.
354 *
355 * This function was adapted from sample code from the Unicode Standard
356 * Annex #15: Unicode Normalization Forms, version 3.2.0.
357 *
358 * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
359 * under the Terms of Use in http://www.unicode.org/copyright.html.
360 */
361static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
362{
363 int index;
364 int l, v, t;
365
366 index = uc - Hangul_SBase;
367 if (index < 0 || index >= Hangul_SCount)
368 return 0;
369
370 l = Hangul_LBase + index / Hangul_NCount;
371 v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
372 t = Hangul_TBase + index % Hangul_TCount;
373
374 result[0] = l;
375 result[1] = v;
376 if (t != Hangul_TBase) {
377 result[2] = t;
378 return 3;
379 }
380 return 2;
381}
382
383/* Decomposes a single unicode character. */
384static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
385{
386 u16 *result;
387
388 /* Hangul is handled separately */
389 result = hangul_buffer;
390 *size = hfsplus_try_decompose_hangul(uc, result);
391 if (*size == 0)
392 result = hfsplus_decompose_nonhangul(uc, size);
393 return result;
394}
395
396int hfsplus_asc2uni(struct super_block *sb,
397 struct hfsplus_unistr *ustr, int max_unistr_len,
398 const char *astr, int len)
399{
400 int size, dsize, decompose;
401 u16 *dstr, outlen = 0;
402 wchar_t c;
403 u16 dhangul[3];
404
405 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
406 while (outlen < max_unistr_len && len > 0) {
407 size = asc2unichar(sb, astr, len, &c);
408
409 if (decompose)
410 dstr = decompose_unichar(c, &dsize, dhangul);
411 else
412 dstr = NULL;
413 if (dstr) {
414 if (outlen + dsize > max_unistr_len)
415 break;
416 do {
417 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
418 } while (--dsize > 0);
419 } else
420 ustr->unicode[outlen++] = cpu_to_be16(c);
421
422 astr += size;
423 len -= size;
424 }
425 ustr->length = cpu_to_be16(outlen);
426 if (len > 0)
427 return -ENAMETOOLONG;
428 return 0;
429}
430EXPORT_SYMBOL_IF_KUNIT(hfsplus_asc2uni);
431
432/*
433 * Hash a string to an integer as appropriate for the HFS+ filesystem.
434 * Composed unicode characters are decomposed and case-folding is performed
435 * if the appropriate bits are (un)set on the superblock.
436 */
437int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
438{
439 struct super_block *sb = dentry->d_sb;
440 const char *astr;
441 const u16 *dstr;
442 int casefold, decompose, size, len;
443 unsigned long hash;
444 wchar_t c;
445 u16 c2;
446 u16 dhangul[3];
447
448 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
449 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
450 hash = init_name_hash(dentry);
451 astr = str->name;
452 len = str->len;
453 while (len > 0) {
454 int dsize;
455 size = asc2unichar(sb, astr, len, &c);
456 astr += size;
457 len -= size;
458
459 if (decompose)
460 dstr = decompose_unichar(c, &dsize, dhangul);
461 else
462 dstr = NULL;
463 if (dstr) {
464 do {
465 c2 = *dstr++;
466 if (casefold)
467 c2 = case_fold(c2);
468 if (!casefold || c2)
469 hash = partial_name_hash(c2, hash);
470 } while (--dsize > 0);
471 } else {
472 c2 = c;
473 if (casefold)
474 c2 = case_fold(c2);
475 if (!casefold || c2)
476 hash = partial_name_hash(c2, hash);
477 }
478 }
479 str->hash = end_name_hash(hash);
480
481 return 0;
482}
483EXPORT_SYMBOL_IF_KUNIT(hfsplus_hash_dentry);
484
485/*
486 * Compare strings with HFS+ filename ordering.
487 * Composed unicode characters are decomposed and case-folding is performed
488 * if the appropriate bits are (un)set on the superblock.
489 */
490int hfsplus_compare_dentry(const struct dentry *dentry,
491 unsigned int len, const char *str, const struct qstr *name)
492{
493 struct super_block *sb = dentry->d_sb;
494 int casefold, decompose, size;
495 int dsize1, dsize2, len1, len2;
496 const u16 *dstr1, *dstr2;
497 const char *astr1, *astr2;
498 u16 c1, c2;
499 wchar_t c;
500 u16 dhangul_1[3], dhangul_2[3];
501
502 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
503 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
504 astr1 = str;
505 len1 = len;
506 astr2 = name->name;
507 len2 = name->len;
508 dsize1 = dsize2 = 0;
509 dstr1 = dstr2 = NULL;
510
511 while (len1 > 0 && len2 > 0) {
512 if (!dsize1) {
513 size = asc2unichar(sb, astr1, len1, &c);
514 astr1 += size;
515 len1 -= size;
516
517 if (decompose)
518 dstr1 = decompose_unichar(c, &dsize1,
519 dhangul_1);
520 if (!decompose || !dstr1) {
521 c1 = c;
522 dstr1 = &c1;
523 dsize1 = 1;
524 }
525 }
526
527 if (!dsize2) {
528 size = asc2unichar(sb, astr2, len2, &c);
529 astr2 += size;
530 len2 -= size;
531
532 if (decompose)
533 dstr2 = decompose_unichar(c, &dsize2,
534 dhangul_2);
535 if (!decompose || !dstr2) {
536 c2 = c;
537 dstr2 = &c2;
538 dsize2 = 1;
539 }
540 }
541
542 c1 = *dstr1;
543 c2 = *dstr2;
544 if (casefold) {
545 c1 = case_fold(c1);
546 if (!c1) {
547 dstr1++;
548 dsize1--;
549 continue;
550 }
551 c2 = case_fold(c2);
552 if (!c2) {
553 dstr2++;
554 dsize2--;
555 continue;
556 }
557 }
558 if (c1 < c2)
559 return -1;
560 else if (c1 > c2)
561 return 1;
562
563 dstr1++;
564 dsize1--;
565 dstr2++;
566 dsize2--;
567 }
568
569 if (len1 < len2)
570 return -1;
571 if (len1 > len2)
572 return 1;
573 return 0;
574}
575EXPORT_SYMBOL_IF_KUNIT(hfsplus_compare_dentry);