mutt stable branch with some hacks
at master 922 lines 21 kB view raw
1/* 2 * Copyright (C) 1996-2000,2010 Michael R. Elkins <me@mutt.org> 3 * Copyright (C) 2000-2002 Edmund Grimley Evans <edmundo@rano.org> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 */ 19 20#if HAVE_CONFIG_H 21# include "config.h" 22#endif 23 24#include "mutt.h" 25#include "mime.h" 26#include "charset.h" 27#include "rfc2047.h" 28 29#include <ctype.h> 30#include <errno.h> 31#include <stdio.h> 32#include <stdlib.h> 33#include <string.h> 34 35/* If you are debugging this file, comment out the following line. */ 36/*#define NDEBUG*/ 37 38#ifdef NDEBUG 39#define assert(x) 40#else 41#include <assert.h> 42#endif 43 44#define ENCWORD_LEN_MAX 75 45#define ENCWORD_LEN_MIN 9 /* strlen ("=?.?.?.?=") */ 46 47#define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t') 48 49#define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80) 50 51extern char RFC822Specials[]; 52 53typedef size_t (*encoder_t) (char *, ICONV_CONST char *, size_t, 54 const char *); 55 56static size_t convert_string (ICONV_CONST char *f, size_t flen, 57 const char *from, const char *to, 58 char **t, size_t *tlen) 59{ 60 iconv_t cd; 61 char *buf, *ob; 62 size_t obl, n; 63 int e; 64 65 cd = mutt_iconv_open (to, from, 0); 66 if (cd == (iconv_t)(-1)) 67 return (size_t)(-1); 68 obl = 4 * flen + 1; 69 ob = buf = safe_malloc (obl); 70 n = iconv (cd, &f, &flen, &ob, &obl); 71 if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1)) 72 { 73 e = errno; 74 FREE (&buf); 75 iconv_close (cd); 76 errno = e; 77 return (size_t)(-1); 78 } 79 *ob = '\0'; 80 81 *tlen = ob - buf; 82 83 safe_realloc (&buf, ob - buf + 1); 84 *t = buf; 85 iconv_close (cd); 86 87 return n; 88} 89 90int convert_nonmime_string (char **ps) 91{ 92 const char *c, *c1; 93 94 for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0) 95 { 96 char *u = *ps; 97 char *s; 98 char *fromcode; 99 size_t m, n; 100 size_t ulen = mutt_strlen (*ps); 101 size_t slen; 102 103 if (!u || !*u) 104 return 0; 105 106 c1 = strchr (c, ':'); 107 n = c1 ? c1 - c : mutt_strlen (c); 108 if (!n) 109 return 0; 110 fromcode = safe_malloc (n + 1); 111 strfcpy (fromcode, c, n + 1); 112 m = convert_string (u, ulen, fromcode, Charset, &s, &slen); 113 FREE (&fromcode); 114 if (m != (size_t)(-1)) 115 { 116 FREE (ps); /* __FREE_CHECKED__ */ 117 *ps = s; 118 return 0; 119 } 120 } 121 mutt_convert_string (ps, 122 (const char *)mutt_get_default_charset (), 123 Charset, MUTT_ICONV_HOOK_FROM); 124 return -1; 125} 126 127char *mutt_choose_charset (const char *fromcode, const char *charsets, 128 char *u, size_t ulen, char **d, size_t *dlen) 129{ 130 char canonical_buff[LONG_STRING]; 131 char *e = 0, *tocode = 0; 132 size_t elen = 0, bestn = 0; 133 const char *p, *q; 134 135 for (p = charsets; p; p = q ? q + 1 : 0) 136 { 137 char *s, *t; 138 size_t slen, n; 139 140 q = strchr (p, ':'); 141 142 n = q ? q - p : strlen (p); 143 if (!n) 144 continue; 145 146 t = safe_malloc (n + 1); 147 memcpy (t, p, n); 148 t[n] = '\0'; 149 150 n = convert_string (u, ulen, fromcode, t, &s, &slen); 151 if (n == (size_t)(-1)) 152 { 153 FREE (&t); 154 continue; 155 } 156 157 if (!tocode || n < bestn) 158 { 159 bestn = n; 160 FREE (&tocode); 161 tocode = t; 162 if (d) 163 { 164 FREE (&e); 165 e = s; 166 } 167 else 168 FREE (&s); 169 elen = slen; 170 if (!bestn) 171 break; 172 } 173 else 174 { 175 FREE (&t); 176 FREE (&s); 177 } 178 } 179 if (tocode) 180 { 181 if (d) 182 *d = e; 183 if (dlen) 184 *dlen = elen; 185 186 mutt_canonical_charset (canonical_buff, sizeof (canonical_buff), tocode); 187 mutt_str_replace (&tocode, canonical_buff); 188 } 189 return tocode; 190} 191 192static size_t b_encoder (char *s, ICONV_CONST char *d, size_t dlen, 193 const char *tocode) 194{ 195 char *s0 = s; 196 197 memcpy (s, "=?", 2), s += 2; 198 memcpy (s, tocode, strlen (tocode)), s += strlen (tocode); 199 memcpy (s, "?B?", 3), s += 3; 200 for (;;) 201 { 202 if (!dlen) 203 break; 204 else if (dlen == 1) 205 { 206 *s++ = B64Chars[(*d >> 2) & 0x3f]; 207 *s++ = B64Chars[(*d & 0x03) << 4]; 208 *s++ = '='; 209 *s++ = '='; 210 break; 211 } 212 else if (dlen == 2) 213 { 214 *s++ = B64Chars[(*d >> 2) & 0x3f]; 215 *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)]; 216 *s++ = B64Chars[(d[1] & 0x0f) << 2]; 217 *s++ = '='; 218 break; 219 } 220 else 221 { 222 *s++ = B64Chars[(*d >> 2) & 0x3f]; 223 *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)]; 224 *s++ = B64Chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)]; 225 *s++ = B64Chars[d[2] & 0x3f]; 226 d += 3, dlen -= 3; 227 } 228 } 229 memcpy (s, "?=", 2), s += 2; 230 return s - s0; 231} 232 233static size_t q_encoder (char *s, ICONV_CONST char *d, size_t dlen, 234 const char *tocode) 235{ 236 static const char hex[] = "0123456789ABCDEF"; 237 char *s0 = s; 238 239 memcpy (s, "=?", 2), s += 2; 240 memcpy (s, tocode, strlen (tocode)), s += strlen (tocode); 241 memcpy (s, "?Q?", 3), s += 3; 242 while (dlen--) 243 { 244 unsigned char c = *d++; 245 if (c == ' ') 246 *s++ = '_'; 247 else if (c >= 0x7f || c < 0x20 || c == '_' || strchr (MimeSpecials, c)) 248 { 249 *s++ = '='; 250 *s++ = hex[(c & 0xf0) >> 4]; 251 *s++ = hex[c & 0x0f]; 252 } 253 else 254 *s++ = c; 255 } 256 memcpy (s, "?=", 2), s += 2; 257 return s - s0; 258} 259 260/* 261 * Return 0 if and set *encoder and *wlen if the data (d, dlen) could 262 * be converted to an encoded word of length *wlen using *encoder. 263 * Otherwise return an upper bound on the maximum length of the data 264 * which could be converted. 265 * The data is converted from fromcode (which must be stateless) to 266 * tocode, unless fromcode is 0, in which case the data is assumed to 267 * be already in tocode, which should be 8-bit and stateless. 268 */ 269static size_t try_block (ICONV_CONST char *d, size_t dlen, 270 const char *fromcode, const char *tocode, 271 encoder_t *encoder, size_t *wlen) 272{ 273 char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1]; 274 iconv_t cd; 275 ICONV_CONST char *ib; 276 char *ob, *p; 277 size_t ibl, obl; 278 int count, len, len_b, len_q; 279 280 if (fromcode) 281 { 282 cd = mutt_iconv_open (tocode, fromcode, 0); 283 assert (cd != (iconv_t)(-1)); 284 ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode); 285 if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t)(-1) || 286 iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1)) 287 { 288 assert (errno == E2BIG); 289 iconv_close (cd); 290 assert (ib > d); 291 return (ib - d == dlen) ? dlen : ib - d + 1; 292 } 293 iconv_close (cd); 294 } 295 else 296 { 297 if (dlen > sizeof (buf1) - strlen (tocode)) 298 return sizeof (buf1) - strlen (tocode) + 1; 299 memcpy (buf1, d, dlen); 300 ob = buf1 + dlen; 301 } 302 303 count = 0; 304 for (p = buf1; p < ob; p++) 305 { 306 unsigned char c = *p; 307 assert (strchr (MimeSpecials, '?')); 308 if (c >= 0x7f || c < 0x20 || *p == '_' || 309 (c != ' ' && strchr (MimeSpecials, *p))) 310 ++count; 311 } 312 313 len = ENCWORD_LEN_MIN - 2 + strlen (tocode); 314 len_b = len + (((ob - buf1) + 2) / 3) * 4; 315 len_q = len + (ob - buf1) + 2 * count; 316 317 /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */ 318 if (!ascii_strcasecmp (tocode, "ISO-2022-JP")) 319 len_q = ENCWORD_LEN_MAX + 1; 320 321 if (len_b < len_q && len_b <= ENCWORD_LEN_MAX) 322 { 323 *encoder = b_encoder; 324 *wlen = len_b; 325 return 0; 326 } 327 else if (len_q <= ENCWORD_LEN_MAX) 328 { 329 *encoder = q_encoder; 330 *wlen = len_q; 331 return 0; 332 } 333 else 334 return dlen; 335} 336 337/* 338 * Encode the data (d, dlen) into s using the encoder. 339 * Return the length of the encoded word. 340 */ 341static size_t encode_block (char *s, char *d, size_t dlen, 342 const char *fromcode, const char *tocode, 343 encoder_t encoder) 344{ 345 char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1]; 346 iconv_t cd; 347 ICONV_CONST char *ib; 348 char *ob; 349 size_t ibl, obl, n1, n2; 350 351 if (fromcode) 352 { 353 cd = mutt_iconv_open (tocode, fromcode, 0); 354 assert (cd != (iconv_t)(-1)); 355 ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode); 356 n1 = iconv (cd, &ib, &ibl, &ob, &obl); 357 n2 = iconv (cd, 0, 0, &ob, &obl); 358 assert (n1 != (size_t)(-1) && n2 != (size_t)(-1)); 359 iconv_close (cd); 360 return (*encoder) (s, buf1, ob - buf1, tocode); 361 } 362 else 363 return (*encoder) (s, d, dlen, tocode); 364} 365 366/* 367 * Discover how much of the data (d, dlen) can be converted into 368 * a single encoded word. Return how much data can be converted, 369 * and set the length *wlen of the encoded word and *encoder. 370 * We start in column col, which limits the length of the word. 371 */ 372static size_t choose_block (char *d, size_t dlen, int col, 373 const char *fromcode, const char *tocode, 374 encoder_t *encoder, size_t *wlen) 375{ 376 size_t n, nn; 377 int utf8 = fromcode && !ascii_strcasecmp (fromcode, "utf-8"); 378 379 n = dlen; 380 for (;;) 381 { 382 assert (d + n > d); 383 nn = try_block (d, n, fromcode, tocode, encoder, wlen); 384 if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1)) 385 break; 386 n = (nn ? nn : n) - 1; 387 assert (n > 0); 388 if (utf8) 389 while (n > 1 && CONTINUATION_BYTE(d[n])) 390 --n; 391 } 392 return n; 393} 394 395/* 396 * Place the result of RFC-2047-encoding (d, dlen) into the dynamically 397 * allocated buffer (e, elen). The input data is in charset fromcode 398 * and is converted into a charset chosen from charsets. 399 * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8 400 * failed, otherwise 0. If conversion failed, fromcode is assumed to be 401 * compatible with us-ascii and the original data is used. 402 * The input data is assumed to be a single line starting at column col; 403 * if col is non-zero, the preceding character was a space. 404 */ 405static int rfc2047_encode (ICONV_CONST char *d, size_t dlen, int col, 406 const char *fromcode, const char *charsets, 407 char **e, size_t *elen, char *specials) 408{ 409 int ret = 0; 410 char *buf; 411 size_t bufpos, buflen; 412 char *u = NULL, *t0, *t1, *t; 413 char *s0, *s1; 414 size_t ulen, r, n, wlen; 415 encoder_t encoder; 416 char *tocode1 = 0; 417 const char *tocode; 418 char *icode = "utf-8"; 419 420 /* Try to convert to UTF-8. */ 421 if (convert_string (d, dlen, fromcode, icode, &u, &ulen)) 422 { 423 ret = 1; 424 icode = 0; 425 safe_realloc (&u, (ulen = dlen) + 1); 426 memcpy (u, d, dlen); 427 u[ulen] = 0; 428 } 429 430 /* Find earliest and latest things we must encode. */ 431 s0 = s1 = t0 = t1 = 0; 432 for (t = u; t < u + ulen; t++) 433 { 434 if ((*t & 0x80) || 435 (*t == '=' && t[1] == '?' && (t == u || HSPACE(*(t-1))))) 436 { 437 if (!t0) t0 = t; 438 t1 = t; 439 } 440 else if (specials && *t && strchr (specials, *t)) 441 { 442 if (!s0) s0 = t; 443 s1 = t; 444 } 445 } 446 447 /* If we have something to encode, include RFC822 specials */ 448 if (t0 && s0 && s0 < t0) 449 t0 = s0; 450 if (t1 && s1 && s1 > t1) 451 t1 = s1; 452 453 if (!t0) 454 { 455 /* No encoding is required. */ 456 *e = u; 457 *elen = ulen; 458 return ret; 459 } 460 461 /* Choose target charset. */ 462 tocode = fromcode; 463 if (icode) 464 { 465 if ((tocode1 = mutt_choose_charset (icode, charsets, u, ulen, 0, 0))) 466 tocode = tocode1; 467 else 468 ret = 2, icode = 0; 469 } 470 471 /* Hack to avoid labelling 8-bit data as us-ascii. */ 472 if (!icode && mutt_is_us_ascii (tocode)) 473 tocode = "unknown-8bit"; 474 475 /* Adjust t0 for maximum length of line. */ 476 t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN; 477 if (t < u) t = u; 478 if (t < t0) t0 = t; 479 480 481 /* Adjust t0 until we can encode a character after a space. */ 482 for (; t0 > u; t0--) 483 { 484 if (!HSPACE(*(t0-1))) 485 continue; 486 t = t0 + 1; 487 if (icode) 488 while (t < u + ulen && CONTINUATION_BYTE(*t)) 489 ++t; 490 if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) && 491 col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1) 492 break; 493 } 494 495 /* Adjust t1 until we can encode a character before a space. */ 496 for (; t1 < u + ulen; t1++) 497 { 498 if (!HSPACE(*t1)) 499 continue; 500 t = t1 - 1; 501 if (icode) 502 while (CONTINUATION_BYTE(*t)) 503 --t; 504 if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) && 505 1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1) 506 break; 507 } 508 509 /* We shall encode the region [t0,t1). */ 510 511 /* Initialise the output buffer with the us-ascii prefix. */ 512 buflen = 2 * ulen; 513 buf = safe_malloc (buflen); 514 bufpos = t0 - u; 515 memcpy (buf, u, t0 - u); 516 517 col += t0 - u; 518 519 t = t0; 520 for (;;) 521 { 522 /* Find how much we can encode. */ 523 n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen); 524 if (n == t1 - t) 525 { 526 /* See if we can fit the us-ascii suffix, too. */ 527 if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1) 528 break; 529 n = t1 - t - 1; 530 if (icode) 531 while (CONTINUATION_BYTE(t[n])) 532 --n; 533 assert (t + n >= t); 534 if (!n) 535 { 536 /* This should only happen in the really stupid case where the 537 only word that needs encoding is one character long, but 538 there is too much us-ascii stuff after it to use a single 539 encoded word. We add the next word to the encoded region 540 and try again. */ 541 assert (t1 < u + ulen); 542 for (t1++; t1 < u + ulen && !HSPACE(*t1); t1++) 543 ; 544 continue; 545 } 546 n = choose_block (t, n, col, icode, tocode, &encoder, &wlen); 547 } 548 549 /* Add to output buffer. */ 550#define LINEBREAK "\n\t" 551 if (bufpos + wlen + strlen (LINEBREAK) > buflen) 552 { 553 buflen = bufpos + wlen + strlen (LINEBREAK); 554 safe_realloc (&buf, buflen); 555 } 556 r = encode_block (buf + bufpos, t, n, icode, tocode, encoder); 557 assert (r == wlen); 558 bufpos += wlen; 559 memcpy (buf + bufpos, LINEBREAK, strlen (LINEBREAK)); 560 bufpos += strlen (LINEBREAK); 561#undef LINEBREAK 562 563 col = 1; 564 565 t += n; 566 } 567 568 /* Add last encoded word and us-ascii suffix to buffer. */ 569 buflen = bufpos + wlen + (u + ulen - t1); 570 safe_realloc (&buf, buflen + 1); 571 r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder); 572 assert (r == wlen); 573 bufpos += wlen; 574 memcpy (buf + bufpos, t1, u + ulen - t1); 575 576 FREE (&tocode1); 577 FREE (&u); 578 579 buf[buflen] = '\0'; 580 581 *e = buf; 582 *elen = buflen + 1; 583 return ret; 584} 585 586void _rfc2047_encode_string (char **pd, int encode_specials, int col) 587{ 588 char *e; 589 size_t elen; 590 char *charsets; 591 592 if (!Charset || !*pd) 593 return; 594 595 charsets = SendCharset; 596 if (!charsets || !*charsets) 597 charsets = "utf-8"; 598 599 rfc2047_encode (*pd, strlen (*pd), col, 600 Charset, charsets, &e, &elen, 601 encode_specials ? RFC822Specials : NULL); 602 603 FREE (pd); /* __FREE_CHECKED__ */ 604 *pd = e; 605} 606 607void rfc2047_encode_adrlist (ADDRESS *addr, const char *tag) 608{ 609 ADDRESS *ptr = addr; 610 int col = tag ? strlen (tag) + 2 : 32; 611 612 while (ptr) 613 { 614 if (ptr->personal) 615 _rfc2047_encode_string (&ptr->personal, 1, col); 616 else if (ptr->group && ptr->mailbox) 617 _rfc2047_encode_string (&ptr->mailbox, 1, col); 618#ifdef EXACT_ADDRESS 619 if (ptr->val) 620 _rfc2047_encode_string (&ptr->val, 1, col); 621#endif 622 ptr = ptr->next; 623 } 624} 625 626static int rfc2047_decode_word (char *d, const char *s, size_t len) 627{ 628 const char *pp, *pp1; 629 char *pd, *d0; 630 const char *t, *t1; 631 int enc = 0, count = 0; 632 char *charset = NULL; 633 int rv = -1; 634 635 pd = d0 = safe_malloc (strlen (s)); 636 637 for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1) 638 { 639 count++; 640 641 /* hack for non-compliant MUAs that allow unquoted question marks in encoded-text */ 642 if (count == 4) 643 { 644 while (pp1 && *(pp1 + 1) != '=') 645 pp1 = strchr(pp1 + 1, '?'); 646 if (!pp1) 647 goto error_out_0; 648 } 649 650 switch (count) 651 { 652 case 2: 653 /* ignore language specification a la RFC 2231 */ 654 t = pp1; 655 if ((t1 = memchr (pp, '*', t - pp))) 656 t = t1; 657 charset = mutt_substrdup (pp, t); 658 break; 659 case 3: 660 if (toupper ((unsigned char) *pp) == 'Q') 661 enc = ENCQUOTEDPRINTABLE; 662 else if (toupper ((unsigned char) *pp) == 'B') 663 enc = ENCBASE64; 664 else 665 goto error_out_0; 666 break; 667 case 4: 668 if (enc == ENCQUOTEDPRINTABLE) 669 { 670 for (; pp < pp1; pp++) 671 { 672 if (*pp == '_') 673 *pd++ = ' '; 674 else if (*pp == '=' && 675 (!(pp[1] & ~127) && hexval(pp[1]) != -1) && 676 (!(pp[2] & ~127) && hexval(pp[2]) != -1)) 677 { 678 *pd++ = (hexval(pp[1]) << 4) | hexval(pp[2]); 679 pp += 2; 680 } 681 else 682 *pd++ = *pp; 683 } 684 *pd = 0; 685 } 686 else if (enc == ENCBASE64) 687 { 688 int c, b = 0, k = 0; 689 690 for (; pp < pp1; pp++) 691 { 692 if (*pp == '=') 693 break; 694 if ((*pp & ~127) || (c = base64val(*pp)) == -1) 695 continue; 696 if (k + 6 >= 8) 697 { 698 k -= 2; 699 *pd++ = b | (c >> k); 700 b = c << (8 - k); 701 } 702 else 703 { 704 b |= c << (k + 2); 705 k += 6; 706 } 707 } 708 *pd = 0; 709 } 710 break; 711 } 712 } 713 714 if (charset) 715 mutt_convert_string (&d0, charset, Charset, MUTT_ICONV_HOOK_FROM); 716 mutt_filter_unprintable (&d0); 717 strfcpy (d, d0, len); 718 rv = 0; 719error_out_0: 720 FREE (&charset); 721 FREE (&d0); 722 return rv; 723} 724 725/* 726 * Find the start and end of the first encoded word in the string. 727 * We use the grammar in section 2 of RFC 2047, but the "encoding" 728 * must be B or Q. Also, we don't require the encoded word to be 729 * separated by linear-white-space (section 5(1)). 730 */ 731static const char *find_encoded_word (const char *s, const char **x) 732{ 733 const char *p, *q; 734 735 q = s; 736 while ((p = strstr (q, "=?"))) 737 { 738 for (q = p + 2; 739 0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q); 740 q++) 741 ; 742 if (q[0] != '?' || q[1] == '\0' || !strchr ("BbQq", q[1]) || q[2] != '?') 743 continue; 744 /* non-strict check since many MUAs will not encode spaces and question marks */ 745 for (q = q + 3; 0x20 <= *q && *q < 0x7f && (*q != '?' || q[1] != '='); q++) 746 ; 747 if (q[0] != '?' || q[1] != '=') 748 { 749 --q; 750 continue; 751 } 752 753 *x = q + 2; 754 return p; 755 } 756 757 return 0; 758} 759 760/* return length of linear-white-space */ 761static size_t lwslen (const char *s, size_t n) 762{ 763 const char *p = s; 764 size_t len = n; 765 766 if (n <= 0) 767 return 0; 768 769 for (; p < s + n; p++) 770 if (!strchr (" \t\r\n", *p)) 771 { 772 len = (size_t)(p - s); 773 break; 774 } 775 if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */ 776 len = (size_t)0; 777 return len; 778} 779 780/* return length of linear-white-space : reverse */ 781static size_t lwsrlen (const char *s, size_t n) 782{ 783 const char *p = s + n - 1; 784 size_t len = n; 785 786 if (n <= 0) 787 return 0; 788 789 if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */ 790 return (size_t)0; 791 792 for (; p >= s; p--) 793 if (!strchr (" \t\r\n", *p)) 794 { 795 len = (size_t)(s + n - 1 - p); 796 break; 797 } 798 return len; 799} 800 801/* try to decode anything that looks like a valid RFC2047 encoded 802 * header field, ignoring RFC822 parsing rules 803 */ 804void rfc2047_decode (char **pd) 805{ 806 const char *p, *q; 807 size_t m, n; 808 int found_encoded = 0; 809 char *d0, *d; 810 const char *s = *pd; 811 size_t dlen; 812 813 if (!s || !*s) 814 return; 815 816 dlen = 4 * strlen (s); /* should be enough */ 817 d = d0 = safe_malloc (dlen + 1); 818 819 while (*s && dlen > 0) 820 { 821 if (!(p = find_encoded_word (s, &q))) 822 { 823 /* no encoded words */ 824 if (option (OPTIGNORELWS)) 825 { 826 n = mutt_strlen (s); 827 if (found_encoded && (m = lwslen (s, n)) != 0) 828 { 829 if (m != n) 830 *d = ' ', d++, dlen--; 831 s += m; 832 } 833 } 834 if (AssumedCharset && *AssumedCharset) 835 { 836 char *t; 837 size_t tlen; 838 839 n = mutt_strlen (s); 840 t = safe_malloc (n + 1); 841 strfcpy (t, s, n + 1); 842 convert_nonmime_string (&t); 843 tlen = mutt_strlen (t); 844 strncpy (d, t, tlen); 845 d += tlen; 846 FREE (&t); 847 break; 848 } 849 strncpy (d, s, dlen); 850 d += dlen; 851 break; 852 } 853 854 if (p != s) 855 { 856 n = (size_t) (p - s); 857 /* ignore spaces between encoded word 858 * and linear-white-space between encoded word and *text */ 859 if (option (OPTIGNORELWS)) 860 { 861 if (found_encoded && (m = lwslen (s, n)) != 0) 862 { 863 if (m != n) 864 *d = ' ', d++, dlen--; 865 n -= m, s += m; 866 } 867 868 if ((m = n - lwsrlen (s, n)) != 0) 869 { 870 if (m > dlen) 871 m = dlen; 872 memcpy (d, s, m); 873 d += m; 874 dlen -= m; 875 if (m != n) 876 *d = ' ', d++, dlen--; 877 } 878 } 879 else if (!found_encoded || strspn (s, " \t\r\n") != n) 880 { 881 if (n > dlen) 882 n = dlen; 883 memcpy (d, s, n); 884 d += n; 885 dlen -= n; 886 } 887 } 888 889 if (rfc2047_decode_word (d, p, dlen) == -1) 890 { 891 /* could not decode word, fall back to displaying the raw string */ 892 strfcpy(d, p, dlen); 893 } 894 found_encoded = 1; 895 s = q; 896 n = mutt_strlen (d); 897 dlen -= n; 898 d += n; 899 } 900 *d = 0; 901 902 FREE (pd); /* __FREE_CHECKED__ */ 903 *pd = d0; 904 mutt_str_adjust (pd); 905} 906 907void rfc2047_decode_adrlist (ADDRESS *a) 908{ 909 while (a) 910 { 911 if (a->personal && ((strstr (a->personal, "=?") != NULL) || 912 (AssumedCharset && *AssumedCharset))) 913 rfc2047_decode (&a->personal); 914 else if (a->group && a->mailbox && (strstr (a->mailbox, "=?") != NULL)) 915 rfc2047_decode (&a->mailbox); 916#ifdef EXACT_ADDRESS 917 if (a->val && strstr (a->val, "=?") != NULL) 918 rfc2047_decode (&a->val); 919#endif 920 a = a->next; 921 } 922}