mutt stable branch with some hacks
at jcs 291 lines 6.6 kB view raw
1/* 2 * Copyright (C) 2000,2003 Edmund Grimley Evans <edmundo@rano.org> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 */ 18 19#if HAVE_CONFIG_H 20# include "config.h" 21#endif 22 23#include "mutt.h" 24#include "charset.h" 25#include "imap_private.h" 26 27static const int Index_64[128] = { 28 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 29 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 30 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, 63,-1,-1,-1, 31 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, 32 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 33 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 34 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 35 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 36}; 37 38static const char B64Chars[64] = { 39 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 40 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 41 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 42 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', 43 '8', '9', '+', ',' 44}; 45 46/* 47 * Convert the data (u7,u7len) from RFC 2060's UTF-7 to UTF-8. 48 * The result is null-terminated and returned, and also stored 49 * in (*u8,*u8len) if u8 or u8len is non-zero. 50 * If input data is invalid, return 0 and don't store anything. 51 * RFC 2060 obviously intends the encoding to be unique (see 52 * point 5 in section 5.1.3), so we reject any non-canonical 53 * form, such as &ACY- (instead of &-) or &AMA-&AMA- (instead 54 * of &AMAAwA-). 55 */ 56static char *utf7_to_utf8 (const char *u7, size_t u7len, char **u8, 57 size_t *u8len) 58{ 59 char *buf, *p; 60 int b, ch, k; 61 62 p = buf = safe_malloc (u7len + u7len / 8 + 1); 63 64 for (; u7len; u7++, u7len--) 65 { 66 if (*u7 == '&') 67 { 68 u7++, u7len--; 69 70 if (u7len && *u7 == '-') 71 { 72 *p++ = '&'; 73 continue; 74 } 75 76 ch = 0; 77 k = 10; 78 for (; u7len; u7++, u7len--) 79 { 80 if ((*u7 & 0x80) || (b = Index_64[(int)*u7]) == -1) 81 break; 82 if (k > 0) 83 { 84 ch |= b << k; 85 k -= 6; 86 } 87 else 88 { 89 ch |= b >> (-k); 90 if (ch < 0x80) 91 { 92 if (0x20 <= ch && ch < 0x7f) 93 /* Printable US-ASCII */ 94 goto bail; 95 *p++ = ch; 96 } 97 else if (ch < 0x800) 98 { 99 *p++ = 0xc0 | (ch >> 6); 100 *p++ = 0x80 | (ch & 0x3f); 101 } 102 else 103 { 104 *p++ = 0xe0 | (ch >> 12); 105 *p++ = 0x80 | ((ch >> 6) & 0x3f); 106 *p++ = 0x80 | (ch & 0x3f); 107 } 108 ch = (b << (16 + k)) & 0xffff; 109 k += 10; 110 } 111 } 112 if (ch || k < 6) 113 /* Non-zero or too many extra bits */ 114 goto bail; 115 if (!u7len || *u7 != '-') 116 /* BASE64 not properly terminated */ 117 goto bail; 118 if (u7len > 2 && u7[1] == '&' && u7[2] != '-') 119 /* Adjacent BASE64 sections */ 120 goto bail; 121 } 122 else if (*u7 < 0x20 || *u7 >= 0x7f) 123 /* Not printable US-ASCII */ 124 goto bail; 125 else 126 *p++ = *u7; 127 } 128 *p++ = '\0'; 129 if (u8len) 130 *u8len = p - buf; 131 132 safe_realloc (&buf, p - buf); 133 if (u8) 134 *u8 = buf; 135 return buf; 136 137bail: 138 FREE (&buf); 139 return 0; 140} 141 142/* 143 * Convert the data (u8,u8len) from UTF-8 to RFC 2060's UTF-7. 144 * The result is null-terminated and returned, and also stored 145 * in (*u7,*u7len) if u7 or u7len is non-zero. 146 * Unicode characters above U+FFFF are replaced by U+FFFE. 147 * If input data is invalid, return 0 and don't store anything. 148 */ 149static char *utf8_to_utf7 (const char *u8, size_t u8len, char **u7, 150 size_t *u7len) 151{ 152 char *buf, *p; 153 int ch; 154 int n, i, b = 0, k = 0; 155 int base64 = 0; 156 157 /* 158 * In the worst case we convert 2 chars to 7 chars. For example: 159 * "\x10&\x10&..." -> "&ABA-&-&ABA-&-...". 160 */ 161 p = buf = safe_malloc ((u8len / 2) * 7 + 6); 162 163 while (u8len) 164 { 165 unsigned char c = *u8; 166 167 if (c < 0x80) 168 ch = c, n = 0; 169 else if (c < 0xc2) 170 goto bail; 171 else if (c < 0xe0) 172 ch = c & 0x1f, n = 1; 173 else if (c < 0xf0) 174 ch = c & 0x0f, n = 2; 175 else if (c < 0xf8) 176 ch = c & 0x07, n = 3; 177 else if (c < 0xfc) 178 ch = c & 0x03, n = 4; 179 else if (c < 0xfe) 180 ch = c & 0x01, n = 5; 181 else 182 goto bail; 183 184 u8++, u8len--; 185 if (n > u8len) 186 goto bail; 187 for (i = 0; i < n; i++) 188 { 189 if ((u8[i] & 0xc0) != 0x80) 190 goto bail; 191 ch = (ch << 6) | (u8[i] & 0x3f); 192 } 193 if (n > 1 && !(ch >> (n * 5 + 1))) 194 goto bail; 195 u8 += n, u8len -= n; 196 197 if (ch < 0x20 || ch >= 0x7f) 198 { 199 if (!base64) 200 { 201 *p++ = '&'; 202 base64 = 1; 203 b = 0; 204 k = 10; 205 } 206 if (ch & ~0xffff) 207 ch = 0xfffe; 208 *p++ = B64Chars[b | ch >> k]; 209 k -= 6; 210 for (; k >= 0; k -= 6) 211 *p++ = B64Chars[(ch >> k) & 0x3f]; 212 b = (ch << (-k)) & 0x3f; 213 k += 16; 214 } 215 else 216 { 217 if (base64) 218 { 219 if (k > 10) 220 *p++ = B64Chars[b]; 221 *p++ = '-'; 222 base64 = 0; 223 } 224 *p++ = ch; 225 if (ch == '&') 226 *p++ = '-'; 227 } 228 } 229 230 if (u8len) 231 { 232 FREE (&buf); 233 return 0; 234 } 235 236 if (base64) 237 { 238 if (k > 10) 239 *p++ = B64Chars[b]; 240 *p++ = '-'; 241 } 242 243 *p++ = '\0'; 244 if (u7len) 245 *u7len = p - buf; 246 safe_realloc (&buf, p - buf); 247 if (u7) *u7 = buf; 248 return buf; 249 250bail: 251 FREE (&buf); 252 return 0; 253} 254 255void imap_utf_encode (IMAP_DATA *idata, char **s) 256{ 257 if (Charset) 258 { 259 char *t = safe_strdup (*s); 260 if (t && !mutt_convert_string (&t, Charset, "utf-8", 0)) 261 { 262 FREE (s); /* __FREE_CHECKED__ */ 263 if (idata->unicode) 264 *s = safe_strdup (t); 265 else 266 *s = utf8_to_utf7 (t, strlen (t), NULL, 0); 267 } 268 FREE (&t); 269 } 270} 271 272void imap_utf_decode (IMAP_DATA *idata, char **s) 273{ 274 char *t; 275 276 if (Charset) 277 { 278 if (idata->unicode) 279 t = safe_strdup (*s); 280 else 281 t = utf7_to_utf8 (*s, strlen (*s), 0, 0); 282 283 if (t && !mutt_convert_string (&t, "utf-8", Charset, 0)) 284 { 285 FREE (s); /* __FREE_CHECKED__ */ 286 *s = t; 287 } 288 else 289 FREE (&t); 290 } 291}