Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] fs: make nls_cp936.c handle some U00XY characters and U20AC correctly

Twenty characters in cp936 are not correctly handled. They're all in the
U00 plane. nls_cp936 converts all U00XY to XY but this is not correct for
some characters.(e.g. U00B7 -> A1A4, U00A8 -> A1A7).

This problem is fixed by generating u2c_00 based on all c2u_xx and changing
uni2char() to give U00 plane a special handling. The "€"(U20AC,80 in
cp936) is also be handled properly.

Acked-by: Gang Chen <cgdlut@gmail.com>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Jun Chen and committed by
Linus Torvalds
f46ba223 15ad7cdc

+103 -10
+103 -10
fs/nls/nls_cp936.c
··· 4421 4421 c2u_F8, c2u_F9, c2u_FA, c2u_FB, c2u_FC, c2u_FD, c2u_FE, NULL, 4422 4422 }; 4423 4423 4424 + static unsigned char u2c_00[512] = { 4425 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 4426 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 4427 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 4428 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 4429 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 4430 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 4431 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 4432 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 4433 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 4434 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 4435 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 4436 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 4437 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ 4438 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 4439 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 4440 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 4441 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 4442 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ 4443 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ 4444 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 4445 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ 4446 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ 4447 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 4448 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 4449 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */ 4450 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ 4451 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ 4452 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ 4453 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 4454 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ 4455 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 4456 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 4457 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ 4458 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ 4459 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */ 4460 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */ 4461 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */ 4462 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ 4463 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ 4464 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */ 4465 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */ 4466 + 0xA1, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xEC, /* 0xA4-0xA7 */ 4467 + 0xA1, 0xA7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 4468 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */ 4469 + 0xA1, 0xE3, 0xA1, 0xC0, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */ 4470 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xA4, /* 0xB4-0xB7 */ 4471 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ 4472 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */ 4473 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */ 4474 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC4-0xC7 */ 4475 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */ 4476 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */ 4477 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */ 4478 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC1, /* 0xD4-0xD7 */ 4479 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */ 4480 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xDC-0xDF */ 4481 + 0xA8, 0xA4, 0xA8, 0xA2, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */ 4482 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */ 4483 + 0xA8, 0xA8, 0xA8, 0xA6, 0xA8, 0xBA, 0x00, 0x00, /* 0xE8-0xEB */ 4484 + 0xA8, 0xAC, 0xA8, 0xAA, 0x00, 0x00, 0x00, 0x00, /* 0xEC-0xEF */ 4485 + 0x00, 0x00, 0x00, 0x00, 0xA8, 0xB0, 0xA8, 0xAE, /* 0xF0-0xF3 */ 4486 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC2, /* 0xF4-0xF7 */ 4487 + 0x00, 0x00, 0xA8, 0xB4, 0xA8, 0xB2, 0x00, 0x00, /* 0xF8-0xFB */ 4488 + 0xA8, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */ 4489 + }; 4490 + 4424 4491 static unsigned char u2c_01[512] = { 4425 4492 0xA8, 0xA1, 0xA8, 0xA1, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 4426 4493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ ··· 10892 10825 }; 10893 10826 10894 10827 static unsigned char *page_uni2charset[256] = { 10895 - NULL, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL, 10828 + u2c_00, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL, 10896 10829 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 10897 10830 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 10898 10831 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ··· 11003 10936 unsigned char *uni2charset; 11004 10937 unsigned char cl = uni&0xFF; 11005 10938 unsigned char ch = (uni>>8)&0xFF; 11006 - int n; 10939 + unsigned char out0,out1; 11007 10940 11008 10941 if (boundlen <= 0) 11009 10942 return -ENAMETOOLONG; 11010 10943 10944 + if (uni == 0x20ac) {/* Euro symbol.The only exception with a non-ascii unicode */ 10945 + out[0] = 0x80; 10946 + return 1; 10947 + } 10948 + 10949 + if (ch == 0) { /* handle the U00 plane*/ 10950 + /* if (cl == 0) return -EINVAL;*/ /*U0000 is legal in cp936*/ 10951 + out0 = u2c_00[cl*2]; 10952 + out1 = u2c_00[cl*2+1]; 10953 + if (out0 == 0x00 && out1 == 0x00) { 10954 + if (cl<0x80) { 10955 + out[0] = cl; 10956 + return 1; 10957 + } 10958 + return -EINVAL; 10959 + } else { 10960 + if (boundlen <= 1) 10961 + return -ENAMETOOLONG; 10962 + out[0] = out0; 10963 + out[1] = out1; 10964 + return 2; 10965 + } 10966 + } 11011 10967 11012 10968 uni2charset = page_uni2charset[ch]; 11013 10969 if (uni2charset) { ··· 11040 10950 out[1] = uni2charset[cl*2+1]; 11041 10951 if (out[0] == 0x00 && out[1] == 0x00) 11042 10952 return -EINVAL; 11043 - n = 2; 11044 - } else if (ch==0 && cl) { 11045 - out[0] = cl; 11046 - n = 1; 10953 + return 2; 11047 10954 } 11048 10955 else 11049 10956 return -EINVAL; 11050 - 11051 - return n; 11052 10957 } 11053 10958 11054 10959 static int char2uni(const unsigned char *rawstring, int boundlen, ··· 11057 10972 return -ENAMETOOLONG; 11058 10973 11059 10974 if (boundlen == 1) { 11060 - *uni = rawstring[0]; 10975 + if (rawstring[0]==0x80) { /* Euro symbol.The only exception with a non-ascii unicode */ 10976 + *uni = 0x20ac; 10977 + } else { 10978 + *uni = rawstring[0]; 10979 + } 11061 10980 return 1; 11062 10981 } 11063 10982 ··· 11075 10986 return -EINVAL; 11076 10987 n = 2; 11077 10988 } else{ 11078 - *uni = ch; 10989 + if (ch==0x80) {/* Euro symbol.The only exception with a non-ascii unicode */ 10990 + *uni = 0x20ac; 10991 + } else { 10992 + *uni = ch; 10993 + } 11079 10994 n = 1; 11080 10995 } 11081 10996 return n;