Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'udf_for_v4.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull udf updates from Jan Kara:
"UDF support for UTF-16 characters in file names"

* tag 'udf_for_v4.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
udf: Add support for decoding UTF-16 characters
udf: Add support for encoding UTF-16 characters
udf: Push sb argument to udf_name_[to|from]_CS0()
udf: Convert ident strings to proper charset
udf: Use UTF-32 <-> UTF-8 conversion functions from NLS
udf: Always require NLS support

+132 -151
+1 -5
fs/udf/Kconfig
··· 1 1 config UDF_FS 2 2 tristate "UDF file system support" 3 3 select CRC_ITU_T 4 + select NLS 4 5 help 5 6 This is a file system used on some CD-ROMs and DVDs. Since the 6 7 file system is supported by multiple operating systems and is more ··· 14 13 module will be called udf. 15 14 16 15 If unsure, say N. 17 - 18 - config UDF_NLS 19 - bool 20 - default y 21 - depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y)
+2 -10
fs/udf/super.c
··· 572 572 case Opt_utf8: 573 573 uopt->flags |= (1 << UDF_FLAG_UTF8); 574 574 break; 575 - #ifdef CONFIG_UDF_NLS 576 575 case Opt_iocharset: 577 576 if (!remount) { 578 577 if (uopt->nls_map) ··· 580 581 uopt->flags |= (1 << UDF_FLAG_NLS_MAP); 581 582 } 582 583 break; 583 - #endif 584 584 case Opt_uforget: 585 585 uopt->flags |= (1 << UDF_FLAG_UID_FORGET); 586 586 break; ··· 890 892 #endif 891 893 } 892 894 893 - ret = udf_dstrCS0toUTF8(outstr, 31, pvoldesc->volIdent, 32); 895 + ret = udf_dstrCS0toChar(sb, outstr, 31, pvoldesc->volIdent, 32); 894 896 if (ret < 0) 895 897 goto out_bh; 896 898 897 899 strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret); 898 900 udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident); 899 901 900 - ret = udf_dstrCS0toUTF8(outstr, 127, pvoldesc->volSetIdent, 128); 902 + ret = udf_dstrCS0toChar(sb, outstr, 127, pvoldesc->volSetIdent, 128); 901 903 if (ret < 0) 902 904 goto out_bh; 903 905 ··· 2115 2117 udf_err(sb, "utf8 cannot be combined with iocharset\n"); 2116 2118 goto parse_options_failure; 2117 2119 } 2118 - #ifdef CONFIG_UDF_NLS 2119 2120 if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) { 2120 2121 uopt.nls_map = load_nls_default(); 2121 2122 if (!uopt.nls_map) ··· 2122 2125 else 2123 2126 udf_debug("Using default NLS map\n"); 2124 2127 } 2125 - #endif 2126 2128 if (!(uopt.flags & (1 << UDF_FLAG_NLS_MAP))) 2127 2129 uopt.flags |= (1 << UDF_FLAG_UTF8); 2128 2130 ··· 2275 2279 error_out: 2276 2280 iput(sbi->s_vat_inode); 2277 2281 parse_options_failure: 2278 - #ifdef CONFIG_UDF_NLS 2279 2282 if (uopt.nls_map) 2280 2283 unload_nls(uopt.nls_map); 2281 - #endif 2282 2284 if (lvid_open) 2283 2285 udf_close_lvid(sb); 2284 2286 brelse(sbi->s_lvid_bh); ··· 2326 2332 sbi = UDF_SB(sb); 2327 2333 2328 2334 iput(sbi->s_vat_inode); 2329 - #ifdef CONFIG_UDF_NLS 2330 2335 if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) 2331 2336 unload_nls(sbi->s_nls_map); 2332 - #endif 2333 2337 if (!sb_rdonly(sb)) 2334 2338 udf_close_lvid(sb); 2335 2339 brelse(sbi->s_lvid_bh);
+2 -1
fs/udf/udfdecl.h
··· 220 220 uint8_t *, int); 221 221 extern int udf_put_filename(struct super_block *, const uint8_t *, int, 222 222 uint8_t *, int); 223 - extern int udf_dstrCS0toUTF8(uint8_t *, int, const uint8_t *, int); 223 + extern int udf_dstrCS0toChar(struct super_block *, uint8_t *, int, 224 + const uint8_t *, int); 224 225 225 226 /* ialloc.c */ 226 227 extern void udf_free_inode(struct inode *);
+127 -135
fs/udf/unicode.c
··· 28 28 29 29 #include "udf_sb.h" 30 30 31 + #define PLANE_SIZE 0x10000 32 + #define UNICODE_MAX 0x10ffff 31 33 #define SURROGATE_MASK 0xfffff800 32 34 #define SURROGATE_PAIR 0x0000d800 33 - 34 - static int udf_uni2char_utf8(wchar_t uni, 35 - unsigned char *out, 36 - int boundlen) 37 - { 38 - int u_len = 0; 39 - 40 - if (boundlen <= 0) 41 - return -ENAMETOOLONG; 42 - 43 - if ((uni & SURROGATE_MASK) == SURROGATE_PAIR) 44 - return -EINVAL; 45 - 46 - if (uni < 0x80) { 47 - out[u_len++] = (unsigned char)uni; 48 - } else if (uni < 0x800) { 49 - if (boundlen < 2) 50 - return -ENAMETOOLONG; 51 - out[u_len++] = (unsigned char)(0xc0 | (uni >> 6)); 52 - out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); 53 - } else { 54 - if (boundlen < 3) 55 - return -ENAMETOOLONG; 56 - out[u_len++] = (unsigned char)(0xe0 | (uni >> 12)); 57 - out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f)); 58 - out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); 59 - } 60 - return u_len; 61 - } 62 - 63 - static int udf_char2uni_utf8(const unsigned char *in, 64 - int boundlen, 65 - wchar_t *uni) 66 - { 67 - unsigned int utf_char; 68 - unsigned char c; 69 - int utf_cnt, u_len; 70 - 71 - utf_char = 0; 72 - utf_cnt = 0; 73 - for (u_len = 0; u_len < boundlen;) { 74 - c = in[u_len++]; 75 - 76 - /* Complete a multi-byte UTF-8 character */ 77 - if (utf_cnt) { 78 - utf_char = (utf_char << 6) | (c & 0x3f); 79 - if (--utf_cnt) 80 - continue; 81 - } else { 82 - /* Check for a multi-byte UTF-8 character */ 83 - if (c & 0x80) { 84 - /* Start a multi-byte UTF-8 character */ 85 - if ((c & 0xe0) == 0xc0) { 86 - utf_char = c & 0x1f; 87 - utf_cnt = 1; 88 - } else if ((c & 0xf0) == 0xe0) { 89 - utf_char = c & 0x0f; 90 - utf_cnt = 2; 91 - } else if ((c & 0xf8) == 0xf0) { 92 - utf_char = c & 0x07; 93 - utf_cnt = 3; 94 - } else if ((c & 0xfc) == 0xf8) { 95 - utf_char = c & 0x03; 96 - utf_cnt = 4; 97 - } else if ((c & 0xfe) == 0xfc) { 98 - utf_char = c & 0x01; 99 - utf_cnt = 5; 100 - } else { 101 - utf_cnt = -1; 102 - break; 103 - } 104 - continue; 105 - } else { 106 - /* Single byte UTF-8 character (most common) */ 107 - utf_char = c; 108 - } 109 - } 110 - *uni = utf_char; 111 - break; 112 - } 113 - if (utf_cnt) { 114 - *uni = '?'; 115 - return -EINVAL; 116 - } 117 - return u_len; 118 - } 35 + #define SURROGATE_LOW 0x00000400 36 + #define SURROGATE_CHAR_BITS 10 37 + #define SURROGATE_CHAR_MASK ((1 << SURROGATE_CHAR_BITS) - 1) 119 38 120 39 #define ILLEGAL_CHAR_MARK '_' 121 40 #define EXT_MARK '.' ··· 42 123 #define EXT_SIZE 5 43 124 /* Number of chars we need to store generated CRC to make filename unique */ 44 125 #define CRC_LEN 5 126 + 127 + static unicode_t get_utf16_char(const uint8_t *str_i, int str_i_max_len, 128 + int str_i_idx, int u_ch, unicode_t *ret) 129 + { 130 + unicode_t c; 131 + int start_idx = str_i_idx; 132 + 133 + /* Expand OSTA compressed Unicode to Unicode */ 134 + c = str_i[str_i_idx++]; 135 + if (u_ch > 1) 136 + c = (c << 8) | str_i[str_i_idx++]; 137 + if ((c & SURROGATE_MASK) == SURROGATE_PAIR) { 138 + unicode_t next; 139 + 140 + /* Trailing surrogate char */ 141 + if (str_i_idx >= str_i_max_len) { 142 + c = UNICODE_MAX + 1; 143 + goto out; 144 + } 145 + 146 + /* Low surrogate must follow the high one... */ 147 + if (c & SURROGATE_LOW) { 148 + c = UNICODE_MAX + 1; 149 + goto out; 150 + } 151 + 152 + WARN_ON_ONCE(u_ch != 2); 153 + next = str_i[str_i_idx++] << 8; 154 + next |= str_i[str_i_idx++]; 155 + if ((next & SURROGATE_MASK) != SURROGATE_PAIR || 156 + !(next & SURROGATE_LOW)) { 157 + c = UNICODE_MAX + 1; 158 + goto out; 159 + } 160 + 161 + c = PLANE_SIZE + 162 + ((c & SURROGATE_CHAR_MASK) << SURROGATE_CHAR_BITS) + 163 + (next & SURROGATE_CHAR_MASK); 164 + } 165 + out: 166 + *ret = c; 167 + return str_i_idx - start_idx; 168 + } 169 + 45 170 46 171 static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len, 47 172 int *str_o_idx, ··· 95 132 int (*conv_f)(wchar_t, unsigned char *, int), 96 133 int translate) 97 134 { 98 - uint32_t c; 135 + unicode_t c; 99 136 int illChar = 0; 100 137 int len, gotch = 0; 101 138 102 - for (; (!gotch) && (*str_i_idx < str_i_max_len); *str_i_idx += u_ch) { 139 + while (!gotch && *str_i_idx < str_i_max_len) { 103 140 if (*str_o_idx >= str_o_max_len) { 104 141 *needsCRC = 1; 105 142 return gotch; 106 143 } 107 144 108 - /* Expand OSTA compressed Unicode to Unicode */ 109 - c = str_i[*str_i_idx]; 110 - if (u_ch > 1) 111 - c = (c << 8) | str_i[*str_i_idx + 1]; 112 - 113 - if (translate && (c == '/' || c == 0)) 145 + len = get_utf16_char(str_i, str_i_max_len, *str_i_idx, u_ch, 146 + &c); 147 + /* These chars cannot be converted. Replace them. */ 148 + if (c == 0 || c > UNICODE_MAX || (conv_f && c > MAX_WCHAR_T) || 149 + (translate && c == '/')) { 114 150 illChar = 1; 115 - else if (illChar) 151 + if (!translate) 152 + gotch = 1; 153 + } else if (illChar) 116 154 break; 117 155 else 118 156 gotch = 1; 157 + *str_i_idx += len; 119 158 } 120 159 if (illChar) { 121 160 *needsCRC = 1; ··· 125 160 gotch = 1; 126 161 } 127 162 if (gotch) { 128 - len = conv_f(c, &str_o[*str_o_idx], str_o_max_len - *str_o_idx); 163 + if (conv_f) { 164 + len = conv_f(c, &str_o[*str_o_idx], 165 + str_o_max_len - *str_o_idx); 166 + } else { 167 + len = utf32_to_utf8(c, &str_o[*str_o_idx], 168 + str_o_max_len - *str_o_idx); 169 + if (len < 0) 170 + len = -ENAMETOOLONG; 171 + } 129 172 /* Valid character? */ 130 173 if (len >= 0) 131 174 *str_o_idx += len; ··· 141 168 *needsCRC = 1; 142 169 gotch = 0; 143 170 } else { 144 - str_o[(*str_o_idx)++] = '?'; 171 + str_o[(*str_o_idx)++] = ILLEGAL_CHAR_MARK; 145 172 *needsCRC = 1; 146 173 } 147 174 } 148 175 return gotch; 149 176 } 150 177 151 - static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, 178 + static int udf_name_from_CS0(struct super_block *sb, 179 + uint8_t *str_o, int str_max_len, 152 180 const uint8_t *ocu, int ocu_len, 153 - int (*conv_f)(wchar_t, unsigned char *, int), 154 181 int translate) 155 182 { 156 183 uint32_t c; ··· 167 194 unsigned short valueCRC; 168 195 uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1]; 169 196 uint8_t crc[CRC_LEN]; 197 + int (*conv_f)(wchar_t, unsigned char *, int); 170 198 171 199 if (str_max_len <= 0) 172 200 return 0; ··· 176 202 memset(str_o, 0, str_max_len); 177 203 return 0; 178 204 } 205 + 206 + if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) 207 + conv_f = UDF_SB(sb)->s_nls_map->uni2char; 208 + else 209 + conv_f = NULL; 179 210 180 211 cmp_id = ocu[0]; 181 212 if (cmp_id != 8 && cmp_id != 16) { ··· 272 293 return str_o_len; 273 294 } 274 295 275 - static int udf_name_to_CS0(uint8_t *ocu, int ocu_max_len, 276 - const uint8_t *str_i, int str_len, 277 - int (*conv_f)(const unsigned char *, int, wchar_t *)) 296 + static int udf_name_to_CS0(struct super_block *sb, 297 + uint8_t *ocu, int ocu_max_len, 298 + const uint8_t *str_i, int str_len) 278 299 { 279 300 int i, len; 280 301 unsigned int max_val; 281 - wchar_t uni_char; 282 302 int u_len, u_ch; 303 + unicode_t uni_char; 304 + int (*conv_f)(const unsigned char *, int, wchar_t *); 283 305 284 306 if (ocu_max_len <= 0) 285 307 return 0; 308 + 309 + if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) 310 + conv_f = UDF_SB(sb)->s_nls_map->char2uni; 311 + else 312 + conv_f = NULL; 286 313 287 314 memset(ocu, 0, ocu_max_len); 288 315 ocu[0] = 8; ··· 297 312 298 313 try_again: 299 314 u_len = 1; 300 - for (i = 0; i < str_len; i++) { 315 + for (i = 0; i < str_len; i += len) { 301 316 /* Name didn't fit? */ 302 317 if (u_len + u_ch > ocu_max_len) 303 318 return 0; 304 - len = conv_f(&str_i[i], str_len - i, &uni_char); 305 - if (!len) 306 - continue; 319 + if (conv_f) { 320 + wchar_t wchar; 321 + 322 + len = conv_f(&str_i[i], str_len - i, &wchar); 323 + if (len > 0) 324 + uni_char = wchar; 325 + } else { 326 + len = utf8_to_utf32(&str_i[i], str_len - i, 327 + &uni_char); 328 + } 307 329 /* Invalid character, deal with it */ 308 - if (len < 0) { 330 + if (len <= 0 || uni_char > UNICODE_MAX) { 309 331 len = 1; 310 332 uni_char = '?'; 311 333 } 312 334 313 335 if (uni_char > max_val) { 314 - max_val = 0xffff; 315 - ocu[0] = 0x10; 316 - u_ch = 2; 317 - goto try_again; 336 + unicode_t c; 337 + 338 + if (max_val == 0xff) { 339 + max_val = 0xffff; 340 + ocu[0] = 0x10; 341 + u_ch = 2; 342 + goto try_again; 343 + } 344 + /* 345 + * Use UTF-16 encoding for chars outside we 346 + * cannot encode directly. 347 + */ 348 + if (u_len + 2 * u_ch > ocu_max_len) 349 + return 0; 350 + 351 + uni_char -= PLANE_SIZE; 352 + c = SURROGATE_PAIR | 353 + ((uni_char >> SURROGATE_CHAR_BITS) & 354 + SURROGATE_CHAR_MASK); 355 + ocu[u_len++] = (uint8_t)(c >> 8); 356 + ocu[u_len++] = (uint8_t)(c & 0xff); 357 + uni_char = SURROGATE_PAIR | SURROGATE_LOW | 358 + (uni_char & SURROGATE_CHAR_MASK); 318 359 } 319 360 320 361 if (max_val == 0xffff) 321 362 ocu[u_len++] = (uint8_t)(uni_char >> 8); 322 363 ocu[u_len++] = (uint8_t)(uni_char & 0xff); 323 - i += len - 1; 324 364 } 325 365 326 366 return u_len; 327 367 } 328 368 329 - int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len, 369 + int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len, 330 370 const uint8_t *ocu_i, int i_len) 331 371 { 332 372 int s_len = 0; ··· 365 355 } 366 356 } 367 357 368 - return udf_name_from_CS0(utf_o, o_len, ocu_i, s_len, 369 - udf_uni2char_utf8, 0); 358 + return udf_name_from_CS0(sb, utf_o, o_len, ocu_i, s_len, 0); 370 359 } 371 360 372 361 int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, 373 362 uint8_t *dname, int dlen) 374 363 { 375 - int (*conv_f)(wchar_t, unsigned char *, int); 376 364 int ret; 377 365 378 366 if (!slen) ··· 379 371 if (dlen <= 0) 380 372 return 0; 381 373 382 - if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { 383 - conv_f = udf_uni2char_utf8; 384 - } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { 385 - conv_f = UDF_SB(sb)->s_nls_map->uni2char; 386 - } else 387 - BUG(); 388 - 389 - ret = udf_name_from_CS0(dname, dlen, sname, slen, conv_f, 1); 374 + ret = udf_name_from_CS0(sb, dname, dlen, sname, slen, 1); 390 375 /* Zero length filename isn't valid... */ 391 376 if (ret == 0) 392 377 ret = -EINVAL; ··· 389 388 int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, 390 389 uint8_t *dname, int dlen) 391 390 { 392 - int (*conv_f)(const unsigned char *, int, wchar_t *); 393 - 394 - if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { 395 - conv_f = udf_char2uni_utf8; 396 - } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { 397 - conv_f = UDF_SB(sb)->s_nls_map->char2uni; 398 - } else 399 - BUG(); 400 - 401 - return udf_name_to_CS0(dname, dlen, sname, slen, conv_f); 391 + return udf_name_to_CS0(sb, dname, dlen, sname, slen); 402 392 } 403 393