mutt stable branch with some hacks
1/*
2 * Copyright (C) 1996-2000,2010 Michael R. Elkins <me@mutt.org>
3 * Copyright (C) 2000-2002 Edmund Grimley Evans <edmundo@rano.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 */
19
20#if HAVE_CONFIG_H
21# include "config.h"
22#endif
23
24#include "mutt.h"
25#include "mime.h"
26#include "charset.h"
27#include "rfc2047.h"
28
29#include <ctype.h>
30#include <errno.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34
35/* If you are debugging this file, comment out the following line. */
36/*#define NDEBUG*/
37
38#ifdef NDEBUG
39#define assert(x)
40#else
41#include <assert.h>
42#endif
43
44#define ENCWORD_LEN_MAX 75
45#define ENCWORD_LEN_MIN 9 /* strlen ("=?.?.?.?=") */
46
47#define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
48
49#define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
50
51extern char RFC822Specials[];
52
53typedef size_t (*encoder_t) (char *, ICONV_CONST char *, size_t,
54 const char *);
55
56static size_t convert_string (ICONV_CONST char *f, size_t flen,
57 const char *from, const char *to,
58 char **t, size_t *tlen)
59{
60 iconv_t cd;
61 char *buf, *ob;
62 size_t obl, n;
63 int e;
64
65 cd = mutt_iconv_open (to, from, 0);
66 if (cd == (iconv_t)(-1))
67 return (size_t)(-1);
68 obl = 4 * flen + 1;
69 ob = buf = safe_malloc (obl);
70 n = iconv (cd, &f, &flen, &ob, &obl);
71 if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
72 {
73 e = errno;
74 FREE (&buf);
75 iconv_close (cd);
76 errno = e;
77 return (size_t)(-1);
78 }
79 *ob = '\0';
80
81 *tlen = ob - buf;
82
83 safe_realloc (&buf, ob - buf + 1);
84 *t = buf;
85 iconv_close (cd);
86
87 return n;
88}
89
90int convert_nonmime_string (char **ps)
91{
92 const char *c, *c1;
93
94 for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
95 {
96 char *u = *ps;
97 char *s;
98 char *fromcode;
99 size_t m, n;
100 size_t ulen = mutt_strlen (*ps);
101 size_t slen;
102
103 if (!u || !*u)
104 return 0;
105
106 c1 = strchr (c, ':');
107 n = c1 ? c1 - c : mutt_strlen (c);
108 if (!n)
109 return 0;
110 fromcode = safe_malloc (n + 1);
111 strfcpy (fromcode, c, n + 1);
112 m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
113 FREE (&fromcode);
114 if (m != (size_t)(-1))
115 {
116 FREE (ps); /* __FREE_CHECKED__ */
117 *ps = s;
118 return 0;
119 }
120 }
121 mutt_convert_string (ps,
122 (const char *)mutt_get_default_charset (),
123 Charset, MUTT_ICONV_HOOK_FROM);
124 return -1;
125}
126
127char *mutt_choose_charset (const char *fromcode, const char *charsets,
128 char *u, size_t ulen, char **d, size_t *dlen)
129{
130 char canonical_buff[LONG_STRING];
131 char *e = 0, *tocode = 0;
132 size_t elen = 0, bestn = 0;
133 const char *p, *q;
134
135 for (p = charsets; p; p = q ? q + 1 : 0)
136 {
137 char *s, *t;
138 size_t slen, n;
139
140 q = strchr (p, ':');
141
142 n = q ? q - p : strlen (p);
143 if (!n)
144 continue;
145
146 t = safe_malloc (n + 1);
147 memcpy (t, p, n);
148 t[n] = '\0';
149
150 n = convert_string (u, ulen, fromcode, t, &s, &slen);
151 if (n == (size_t)(-1))
152 {
153 FREE (&t);
154 continue;
155 }
156
157 if (!tocode || n < bestn)
158 {
159 bestn = n;
160 FREE (&tocode);
161 tocode = t;
162 if (d)
163 {
164 FREE (&e);
165 e = s;
166 }
167 else
168 FREE (&s);
169 elen = slen;
170 if (!bestn)
171 break;
172 }
173 else
174 {
175 FREE (&t);
176 FREE (&s);
177 }
178 }
179 if (tocode)
180 {
181 if (d)
182 *d = e;
183 if (dlen)
184 *dlen = elen;
185
186 mutt_canonical_charset (canonical_buff, sizeof (canonical_buff), tocode);
187 mutt_str_replace (&tocode, canonical_buff);
188 }
189 return tocode;
190}
191
192static size_t b_encoder (char *s, ICONV_CONST char *d, size_t dlen,
193 const char *tocode)
194{
195 char *s0 = s;
196
197 memcpy (s, "=?", 2), s += 2;
198 memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
199 memcpy (s, "?B?", 3), s += 3;
200 for (;;)
201 {
202 if (!dlen)
203 break;
204 else if (dlen == 1)
205 {
206 *s++ = B64Chars[(*d >> 2) & 0x3f];
207 *s++ = B64Chars[(*d & 0x03) << 4];
208 *s++ = '=';
209 *s++ = '=';
210 break;
211 }
212 else if (dlen == 2)
213 {
214 *s++ = B64Chars[(*d >> 2) & 0x3f];
215 *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
216 *s++ = B64Chars[(d[1] & 0x0f) << 2];
217 *s++ = '=';
218 break;
219 }
220 else
221 {
222 *s++ = B64Chars[(*d >> 2) & 0x3f];
223 *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
224 *s++ = B64Chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
225 *s++ = B64Chars[d[2] & 0x3f];
226 d += 3, dlen -= 3;
227 }
228 }
229 memcpy (s, "?=", 2), s += 2;
230 return s - s0;
231}
232
233static size_t q_encoder (char *s, ICONV_CONST char *d, size_t dlen,
234 const char *tocode)
235{
236 static const char hex[] = "0123456789ABCDEF";
237 char *s0 = s;
238
239 memcpy (s, "=?", 2), s += 2;
240 memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
241 memcpy (s, "?Q?", 3), s += 3;
242 while (dlen--)
243 {
244 unsigned char c = *d++;
245 if (c == ' ')
246 *s++ = '_';
247 else if (c >= 0x7f || c < 0x20 || c == '_' || strchr (MimeSpecials, c))
248 {
249 *s++ = '=';
250 *s++ = hex[(c & 0xf0) >> 4];
251 *s++ = hex[c & 0x0f];
252 }
253 else
254 *s++ = c;
255 }
256 memcpy (s, "?=", 2), s += 2;
257 return s - s0;
258}
259
260/*
261 * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
262 * be converted to an encoded word of length *wlen using *encoder.
263 * Otherwise return an upper bound on the maximum length of the data
264 * which could be converted.
265 * The data is converted from fromcode (which must be stateless) to
266 * tocode, unless fromcode is 0, in which case the data is assumed to
267 * be already in tocode, which should be 8-bit and stateless.
268 */
269static size_t try_block (ICONV_CONST char *d, size_t dlen,
270 const char *fromcode, const char *tocode,
271 encoder_t *encoder, size_t *wlen)
272{
273 char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
274 iconv_t cd;
275 ICONV_CONST char *ib;
276 char *ob, *p;
277 size_t ibl, obl;
278 int count, len, len_b, len_q;
279
280 if (fromcode)
281 {
282 cd = mutt_iconv_open (tocode, fromcode, 0);
283 assert (cd != (iconv_t)(-1));
284 ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
285 if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t)(-1) ||
286 iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
287 {
288 assert (errno == E2BIG);
289 iconv_close (cd);
290 assert (ib > d);
291 return (ib - d == dlen) ? dlen : ib - d + 1;
292 }
293 iconv_close (cd);
294 }
295 else
296 {
297 if (dlen > sizeof (buf1) - strlen (tocode))
298 return sizeof (buf1) - strlen (tocode) + 1;
299 memcpy (buf1, d, dlen);
300 ob = buf1 + dlen;
301 }
302
303 count = 0;
304 for (p = buf1; p < ob; p++)
305 {
306 unsigned char c = *p;
307 assert (strchr (MimeSpecials, '?'));
308 if (c >= 0x7f || c < 0x20 || *p == '_' ||
309 (c != ' ' && strchr (MimeSpecials, *p)))
310 ++count;
311 }
312
313 len = ENCWORD_LEN_MIN - 2 + strlen (tocode);
314 len_b = len + (((ob - buf1) + 2) / 3) * 4;
315 len_q = len + (ob - buf1) + 2 * count;
316
317 /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
318 if (!ascii_strcasecmp (tocode, "ISO-2022-JP"))
319 len_q = ENCWORD_LEN_MAX + 1;
320
321 if (len_b < len_q && len_b <= ENCWORD_LEN_MAX)
322 {
323 *encoder = b_encoder;
324 *wlen = len_b;
325 return 0;
326 }
327 else if (len_q <= ENCWORD_LEN_MAX)
328 {
329 *encoder = q_encoder;
330 *wlen = len_q;
331 return 0;
332 }
333 else
334 return dlen;
335}
336
337/*
338 * Encode the data (d, dlen) into s using the encoder.
339 * Return the length of the encoded word.
340 */
341static size_t encode_block (char *s, char *d, size_t dlen,
342 const char *fromcode, const char *tocode,
343 encoder_t encoder)
344{
345 char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
346 iconv_t cd;
347 ICONV_CONST char *ib;
348 char *ob;
349 size_t ibl, obl, n1, n2;
350
351 if (fromcode)
352 {
353 cd = mutt_iconv_open (tocode, fromcode, 0);
354 assert (cd != (iconv_t)(-1));
355 ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
356 n1 = iconv (cd, &ib, &ibl, &ob, &obl);
357 n2 = iconv (cd, 0, 0, &ob, &obl);
358 assert (n1 != (size_t)(-1) && n2 != (size_t)(-1));
359 iconv_close (cd);
360 return (*encoder) (s, buf1, ob - buf1, tocode);
361 }
362 else
363 return (*encoder) (s, d, dlen, tocode);
364}
365
366/*
367 * Discover how much of the data (d, dlen) can be converted into
368 * a single encoded word. Return how much data can be converted,
369 * and set the length *wlen of the encoded word and *encoder.
370 * We start in column col, which limits the length of the word.
371 */
372static size_t choose_block (char *d, size_t dlen, int col,
373 const char *fromcode, const char *tocode,
374 encoder_t *encoder, size_t *wlen)
375{
376 size_t n, nn;
377 int utf8 = fromcode && !ascii_strcasecmp (fromcode, "utf-8");
378
379 n = dlen;
380 for (;;)
381 {
382 assert (d + n > d);
383 nn = try_block (d, n, fromcode, tocode, encoder, wlen);
384 if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
385 break;
386 n = (nn ? nn : n) - 1;
387 assert (n > 0);
388 if (utf8)
389 while (n > 1 && CONTINUATION_BYTE(d[n]))
390 --n;
391 }
392 return n;
393}
394
395/*
396 * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
397 * allocated buffer (e, elen). The input data is in charset fromcode
398 * and is converted into a charset chosen from charsets.
399 * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
400 * failed, otherwise 0. If conversion failed, fromcode is assumed to be
401 * compatible with us-ascii and the original data is used.
402 * The input data is assumed to be a single line starting at column col;
403 * if col is non-zero, the preceding character was a space.
404 */
405static int rfc2047_encode (ICONV_CONST char *d, size_t dlen, int col,
406 const char *fromcode, const char *charsets,
407 char **e, size_t *elen, char *specials)
408{
409 int ret = 0;
410 char *buf;
411 size_t bufpos, buflen;
412 char *u = NULL, *t0, *t1, *t;
413 char *s0, *s1;
414 size_t ulen, r, n, wlen;
415 encoder_t encoder;
416 char *tocode1 = 0;
417 const char *tocode;
418 char *icode = "utf-8";
419
420 /* Try to convert to UTF-8. */
421 if (convert_string (d, dlen, fromcode, icode, &u, &ulen))
422 {
423 ret = 1;
424 icode = 0;
425 safe_realloc (&u, (ulen = dlen) + 1);
426 memcpy (u, d, dlen);
427 u[ulen] = 0;
428 }
429
430 /* Find earliest and latest things we must encode. */
431 s0 = s1 = t0 = t1 = 0;
432 for (t = u; t < u + ulen; t++)
433 {
434 if ((*t & 0x80) ||
435 (*t == '=' && t[1] == '?' && (t == u || HSPACE(*(t-1)))))
436 {
437 if (!t0) t0 = t;
438 t1 = t;
439 }
440 else if (specials && *t && strchr (specials, *t))
441 {
442 if (!s0) s0 = t;
443 s1 = t;
444 }
445 }
446
447 /* If we have something to encode, include RFC822 specials */
448 if (t0 && s0 && s0 < t0)
449 t0 = s0;
450 if (t1 && s1 && s1 > t1)
451 t1 = s1;
452
453 if (!t0)
454 {
455 /* No encoding is required. */
456 *e = u;
457 *elen = ulen;
458 return ret;
459 }
460
461 /* Choose target charset. */
462 tocode = fromcode;
463 if (icode)
464 {
465 if ((tocode1 = mutt_choose_charset (icode, charsets, u, ulen, 0, 0)))
466 tocode = tocode1;
467 else
468 ret = 2, icode = 0;
469 }
470
471 /* Hack to avoid labelling 8-bit data as us-ascii. */
472 if (!icode && mutt_is_us_ascii (tocode))
473 tocode = "unknown-8bit";
474
475 /* Adjust t0 for maximum length of line. */
476 t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
477 if (t < u) t = u;
478 if (t < t0) t0 = t;
479
480
481 /* Adjust t0 until we can encode a character after a space. */
482 for (; t0 > u; t0--)
483 {
484 if (!HSPACE(*(t0-1)))
485 continue;
486 t = t0 + 1;
487 if (icode)
488 while (t < u + ulen && CONTINUATION_BYTE(*t))
489 ++t;
490 if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
491 col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
492 break;
493 }
494
495 /* Adjust t1 until we can encode a character before a space. */
496 for (; t1 < u + ulen; t1++)
497 {
498 if (!HSPACE(*t1))
499 continue;
500 t = t1 - 1;
501 if (icode)
502 while (CONTINUATION_BYTE(*t))
503 --t;
504 if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
505 1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
506 break;
507 }
508
509 /* We shall encode the region [t0,t1). */
510
511 /* Initialise the output buffer with the us-ascii prefix. */
512 buflen = 2 * ulen;
513 buf = safe_malloc (buflen);
514 bufpos = t0 - u;
515 memcpy (buf, u, t0 - u);
516
517 col += t0 - u;
518
519 t = t0;
520 for (;;)
521 {
522 /* Find how much we can encode. */
523 n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
524 if (n == t1 - t)
525 {
526 /* See if we can fit the us-ascii suffix, too. */
527 if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
528 break;
529 n = t1 - t - 1;
530 if (icode)
531 while (CONTINUATION_BYTE(t[n]))
532 --n;
533 assert (t + n >= t);
534 if (!n)
535 {
536 /* This should only happen in the really stupid case where the
537 only word that needs encoding is one character long, but
538 there is too much us-ascii stuff after it to use a single
539 encoded word. We add the next word to the encoded region
540 and try again. */
541 assert (t1 < u + ulen);
542 for (t1++; t1 < u + ulen && !HSPACE(*t1); t1++)
543 ;
544 continue;
545 }
546 n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
547 }
548
549 /* Add to output buffer. */
550#define LINEBREAK "\n\t"
551 if (bufpos + wlen + strlen (LINEBREAK) > buflen)
552 {
553 buflen = bufpos + wlen + strlen (LINEBREAK);
554 safe_realloc (&buf, buflen);
555 }
556 r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
557 assert (r == wlen);
558 bufpos += wlen;
559 memcpy (buf + bufpos, LINEBREAK, strlen (LINEBREAK));
560 bufpos += strlen (LINEBREAK);
561#undef LINEBREAK
562
563 col = 1;
564
565 t += n;
566 }
567
568 /* Add last encoded word and us-ascii suffix to buffer. */
569 buflen = bufpos + wlen + (u + ulen - t1);
570 safe_realloc (&buf, buflen + 1);
571 r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
572 assert (r == wlen);
573 bufpos += wlen;
574 memcpy (buf + bufpos, t1, u + ulen - t1);
575
576 FREE (&tocode1);
577 FREE (&u);
578
579 buf[buflen] = '\0';
580
581 *e = buf;
582 *elen = buflen + 1;
583 return ret;
584}
585
586void _rfc2047_encode_string (char **pd, int encode_specials, int col)
587{
588 char *e;
589 size_t elen;
590 char *charsets;
591
592 if (!Charset || !*pd)
593 return;
594
595 charsets = SendCharset;
596 if (!charsets || !*charsets)
597 charsets = "utf-8";
598
599 rfc2047_encode (*pd, strlen (*pd), col,
600 Charset, charsets, &e, &elen,
601 encode_specials ? RFC822Specials : NULL);
602
603 FREE (pd); /* __FREE_CHECKED__ */
604 *pd = e;
605}
606
607void rfc2047_encode_adrlist (ADDRESS *addr, const char *tag)
608{
609 ADDRESS *ptr = addr;
610 int col = tag ? strlen (tag) + 2 : 32;
611
612 while (ptr)
613 {
614 if (ptr->personal)
615 _rfc2047_encode_string (&ptr->personal, 1, col);
616 else if (ptr->group && ptr->mailbox)
617 _rfc2047_encode_string (&ptr->mailbox, 1, col);
618#ifdef EXACT_ADDRESS
619 if (ptr->val)
620 _rfc2047_encode_string (&ptr->val, 1, col);
621#endif
622 ptr = ptr->next;
623 }
624}
625
626static int rfc2047_decode_word (char *d, const char *s, size_t len)
627{
628 const char *pp, *pp1;
629 char *pd, *d0;
630 const char *t, *t1;
631 int enc = 0, count = 0;
632 char *charset = NULL;
633 int rv = -1;
634
635 pd = d0 = safe_malloc (strlen (s));
636
637 for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1)
638 {
639 count++;
640
641 /* hack for non-compliant MUAs that allow unquoted question marks in encoded-text */
642 if (count == 4)
643 {
644 while (pp1 && *(pp1 + 1) != '=')
645 pp1 = strchr(pp1 + 1, '?');
646 if (!pp1)
647 goto error_out_0;
648 }
649
650 switch (count)
651 {
652 case 2:
653 /* ignore language specification a la RFC 2231 */
654 t = pp1;
655 if ((t1 = memchr (pp, '*', t - pp)))
656 t = t1;
657 charset = mutt_substrdup (pp, t);
658 break;
659 case 3:
660 if (toupper ((unsigned char) *pp) == 'Q')
661 enc = ENCQUOTEDPRINTABLE;
662 else if (toupper ((unsigned char) *pp) == 'B')
663 enc = ENCBASE64;
664 else
665 goto error_out_0;
666 break;
667 case 4:
668 if (enc == ENCQUOTEDPRINTABLE)
669 {
670 for (; pp < pp1; pp++)
671 {
672 if (*pp == '_')
673 *pd++ = ' ';
674 else if (*pp == '=' &&
675 (!(pp[1] & ~127) && hexval(pp[1]) != -1) &&
676 (!(pp[2] & ~127) && hexval(pp[2]) != -1))
677 {
678 *pd++ = (hexval(pp[1]) << 4) | hexval(pp[2]);
679 pp += 2;
680 }
681 else
682 *pd++ = *pp;
683 }
684 *pd = 0;
685 }
686 else if (enc == ENCBASE64)
687 {
688 int c, b = 0, k = 0;
689
690 for (; pp < pp1; pp++)
691 {
692 if (*pp == '=')
693 break;
694 if ((*pp & ~127) || (c = base64val(*pp)) == -1)
695 continue;
696 if (k + 6 >= 8)
697 {
698 k -= 2;
699 *pd++ = b | (c >> k);
700 b = c << (8 - k);
701 }
702 else
703 {
704 b |= c << (k + 2);
705 k += 6;
706 }
707 }
708 *pd = 0;
709 }
710 break;
711 }
712 }
713
714 if (charset)
715 mutt_convert_string (&d0, charset, Charset, MUTT_ICONV_HOOK_FROM);
716 mutt_filter_unprintable (&d0);
717 strfcpy (d, d0, len);
718 rv = 0;
719error_out_0:
720 FREE (&charset);
721 FREE (&d0);
722 return rv;
723}
724
725/*
726 * Find the start and end of the first encoded word in the string.
727 * We use the grammar in section 2 of RFC 2047, but the "encoding"
728 * must be B or Q. Also, we don't require the encoded word to be
729 * separated by linear-white-space (section 5(1)).
730 */
731static const char *find_encoded_word (const char *s, const char **x)
732{
733 const char *p, *q;
734
735 q = s;
736 while ((p = strstr (q, "=?")))
737 {
738 for (q = p + 2;
739 0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q);
740 q++)
741 ;
742 if (q[0] != '?' || q[1] == '\0' || !strchr ("BbQq", q[1]) || q[2] != '?')
743 continue;
744 /* non-strict check since many MUAs will not encode spaces and question marks */
745 for (q = q + 3; 0x20 <= *q && *q < 0x7f && (*q != '?' || q[1] != '='); q++)
746 ;
747 if (q[0] != '?' || q[1] != '=')
748 {
749 --q;
750 continue;
751 }
752
753 *x = q + 2;
754 return p;
755 }
756
757 return 0;
758}
759
760/* return length of linear-white-space */
761static size_t lwslen (const char *s, size_t n)
762{
763 const char *p = s;
764 size_t len = n;
765
766 if (n <= 0)
767 return 0;
768
769 for (; p < s + n; p++)
770 if (!strchr (" \t\r\n", *p))
771 {
772 len = (size_t)(p - s);
773 break;
774 }
775 if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
776 len = (size_t)0;
777 return len;
778}
779
780/* return length of linear-white-space : reverse */
781static size_t lwsrlen (const char *s, size_t n)
782{
783 const char *p = s + n - 1;
784 size_t len = n;
785
786 if (n <= 0)
787 return 0;
788
789 if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
790 return (size_t)0;
791
792 for (; p >= s; p--)
793 if (!strchr (" \t\r\n", *p))
794 {
795 len = (size_t)(s + n - 1 - p);
796 break;
797 }
798 return len;
799}
800
801/* try to decode anything that looks like a valid RFC2047 encoded
802 * header field, ignoring RFC822 parsing rules
803 */
804void rfc2047_decode (char **pd)
805{
806 const char *p, *q;
807 size_t m, n;
808 int found_encoded = 0;
809 char *d0, *d;
810 const char *s = *pd;
811 size_t dlen;
812
813 if (!s || !*s)
814 return;
815
816 dlen = 4 * strlen (s); /* should be enough */
817 d = d0 = safe_malloc (dlen + 1);
818
819 while (*s && dlen > 0)
820 {
821 if (!(p = find_encoded_word (s, &q)))
822 {
823 /* no encoded words */
824 if (option (OPTIGNORELWS))
825 {
826 n = mutt_strlen (s);
827 if (found_encoded && (m = lwslen (s, n)) != 0)
828 {
829 if (m != n)
830 *d = ' ', d++, dlen--;
831 s += m;
832 }
833 }
834 if (AssumedCharset && *AssumedCharset)
835 {
836 char *t;
837 size_t tlen;
838
839 n = mutt_strlen (s);
840 t = safe_malloc (n + 1);
841 strfcpy (t, s, n + 1);
842 convert_nonmime_string (&t);
843 tlen = mutt_strlen (t);
844 strncpy (d, t, tlen);
845 d += tlen;
846 FREE (&t);
847 break;
848 }
849 strncpy (d, s, dlen);
850 d += dlen;
851 break;
852 }
853
854 if (p != s)
855 {
856 n = (size_t) (p - s);
857 /* ignore spaces between encoded word
858 * and linear-white-space between encoded word and *text */
859 if (option (OPTIGNORELWS))
860 {
861 if (found_encoded && (m = lwslen (s, n)) != 0)
862 {
863 if (m != n)
864 *d = ' ', d++, dlen--;
865 n -= m, s += m;
866 }
867
868 if ((m = n - lwsrlen (s, n)) != 0)
869 {
870 if (m > dlen)
871 m = dlen;
872 memcpy (d, s, m);
873 d += m;
874 dlen -= m;
875 if (m != n)
876 *d = ' ', d++, dlen--;
877 }
878 }
879 else if (!found_encoded || strspn (s, " \t\r\n") != n)
880 {
881 if (n > dlen)
882 n = dlen;
883 memcpy (d, s, n);
884 d += n;
885 dlen -= n;
886 }
887 }
888
889 if (rfc2047_decode_word (d, p, dlen) == -1)
890 {
891 /* could not decode word, fall back to displaying the raw string */
892 strfcpy(d, p, dlen);
893 }
894 found_encoded = 1;
895 s = q;
896 n = mutt_strlen (d);
897 dlen -= n;
898 d += n;
899 }
900 *d = 0;
901
902 FREE (pd); /* __FREE_CHECKED__ */
903 *pd = d0;
904 mutt_str_adjust (pd);
905}
906
907void rfc2047_decode_adrlist (ADDRESS *a)
908{
909 while (a)
910 {
911 if (a->personal && ((strstr (a->personal, "=?") != NULL) ||
912 (AssumedCharset && *AssumedCharset)))
913 rfc2047_decode (&a->personal);
914 else if (a->group && a->mailbox && (strstr (a->mailbox, "=?") != NULL))
915 rfc2047_decode (&a->mailbox);
916#ifdef EXACT_ADDRESS
917 if (a->val && strstr (a->val, "=?") != NULL)
918 rfc2047_decode (&a->val);
919#endif
920 a = a->next;
921 }
922}