@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
at recaptime-dev/main 370 lines 17 kB view raw
1<?php namespace PhpMimeMailParser; 2 3use PhpMimeMailParser\Contracts\CharsetManager; 4 5class Charset implements CharsetManager 6{ 7 /** 8 * Charset Aliases 9 */ 10 private $charsetAlias = [ 11 'ascii' => 'us-ascii', 12 'us-ascii' => 'us-ascii', 13 'ansi_x3.4-1968' => 'us-ascii', 14 '646' => 'us-ascii', 15 'iso-8859-1' => 'iso-8859-1', 16 'iso-8859-2' => 'iso-8859-2', 17 'iso-8859-3' => 'iso-8859-3', 18 'iso-8859-4' => 'iso-8859-4', 19 'iso-8859-5' => 'iso-8859-5', 20 'iso-8859-6' => 'iso-8859-6', 21 'iso-8859-6-i' => 'iso-8859-6-i', 22 'iso-8859-6-e' => 'iso-8859-6-e', 23 'iso-8859-7' => 'iso-8859-7', 24 'iso-8859-8' => 'iso-8859-8', 25 'iso-8859-8-i' => 'iso-8859-8', 26 'iso-8859-8-e' => 'iso-8859-8-e', 27 'iso-8859-9' => 'iso-8859-9', 28 'iso-8859-10' => 'iso-8859-10', 29 'iso-8859-11' => 'iso-8859-11', 30 'iso-8859-13' => 'iso-8859-13', 31 'iso-8859-14' => 'iso-8859-14', 32 'iso-8859-15' => 'iso-8859-15', 33 'iso-8859-16' => 'iso-8859-16', 34 'iso-ir-111' => 'iso-ir-111', 35 'iso-2022-cn' => 'iso-2022-cn', 36 'iso-2022-cn-ext' => 'iso-2022-cn', 37 'iso-2022-kr' => 'iso-2022-kr', 38 'iso-2022-jp' => 'iso-2022-jp', 39 'utf-16be' => 'utf-16be', 40 'utf-16le' => 'utf-16le', 41 'utf-16' => 'utf-16', 42 'windows-1250' => 'windows-1250', 43 'windows-1251' => 'windows-1251', 44 'windows-1252' => 'windows-1252', 45 'windows-1253' => 'windows-1253', 46 'windows-1254' => 'windows-1254', 47 'windows-1255' => 'windows-1255', 48 'windows-1256' => 'windows-1256', 49 'windows-1257' => 'windows-1257', 50 'windows-1258' => 'windows-1258', 51 'ibm866' => 'ibm866', 52 'ibm850' => 'ibm850', 53 'ibm852' => 'ibm852', 54 'ibm855' => 'ibm855', 55 'ibm857' => 'ibm857', 56 'ibm862' => 'ibm862', 57 'ibm864' => 'ibm864', 58 'utf-8' => 'utf-8', 59 'utf-7' => 'utf-7', 60 'shift_jis' => 'shift_jis', 61 'big5' => 'big5', 62 'euc-jp' => 'euc-jp', 63 'euc-kr' => 'euc-kr', 64 'gb2312' => 'gb2312', 65 'gb18030' => 'gb18030', 66 'viscii' => 'viscii', 67 'koi8-r' => 'koi8-r', 68 'koi8_r' => 'koi8-r', 69 'cskoi8r' => 'koi8-r', 70 'koi' => 'koi8-r', 71 'koi8' => 'koi8-r', 72 'koi8-u' => 'koi8-u', 73 'tis-620' => 'tis-620', 74 't.61-8bit' => 't.61-8bit', 75 'hz-gb-2312' => 'hz-gb-2312', 76 'big5-hkscs' => 'big5-hkscs', 77 'gbk' => 'gbk', 78 'cns11643' => 'x-euc-tw', 79 'x-imap4-modified-utf7' => 'x-imap4-modified-utf7', 80 'x-euc-tw' => 'x-euc-tw', 81 'x-mac-ce' => 'macce', 82 'x-mac-turkish' => 'macturkish', 83 'x-mac-greek' => 'macgreek', 84 'x-mac-icelandic' => 'macicelandic', 85 'x-mac-croatian' => 'maccroatian', 86 'x-mac-romanian' => 'macromanian', 87 'x-mac-cyrillic' => 'maccyrillic', 88 'x-mac-ukrainian' => 'macukrainian', 89 'x-mac-hebrew' => 'machebrew', 90 'x-mac-arabic' => 'macarabic', 91 'x-mac-farsi' => 'macfarsi', 92 'x-mac-devanagari' => 'macdevanagari', 93 'x-mac-gujarati' => 'macgujarati', 94 'x-mac-gurmukhi' => 'macgurmukhi', 95 'armscii-8' => 'armscii-8', 96 'x-viet-tcvn5712' => 'x-viet-tcvn5712', 97 'x-viet-vps' => 'x-viet-vps', 98 'iso-10646-ucs-2' => 'utf-16be', 99 'x-iso-10646-ucs-2-be' => 'utf-16be', 100 'x-iso-10646-ucs-2-le' => 'utf-16le', 101 'x-user-defined' => 'x-user-defined', 102 'x-johab' => 'x-johab', 103 'latin1' => 'iso-8859-1', 104 'iso_8859-1' => 'iso-8859-1', 105 'iso8859-1' => 'iso-8859-1', 106 'iso8859-2' => 'iso-8859-2', 107 'iso8859-3' => 'iso-8859-3', 108 'iso8859-4' => 'iso-8859-4', 109 'iso8859-5' => 'iso-8859-5', 110 'iso8859-6' => 'iso-8859-6', 111 'iso8859-7' => 'iso-8859-7', 112 'iso8859-8' => 'iso-8859-8', 113 'iso8859-9' => 'iso-8859-9', 114 'iso8859-10' => 'iso-8859-10', 115 'iso8859-11' => 'iso-8859-11', 116 'iso8859-13' => 'iso-8859-13', 117 'iso8859-14' => 'iso-8859-14', 118 'iso8859-15' => 'iso-8859-15', 119 'iso_8859-1:1987' => 'iso-8859-1', 120 'iso-ir-100' => 'iso-8859-1', 121 'l1' => 'iso-8859-1', 122 'ibm819' => 'iso-8859-1', 123 'cp819' => 'iso-8859-1', 124 'csisolatin1' => 'iso-8859-1', 125 'latin2' => 'iso-8859-2', 126 'iso_8859-2' => 'iso-8859-2', 127 'iso_8859-2:1987' => 'iso-8859-2', 128 'iso-ir-101' => 'iso-8859-2', 129 'l2' => 'iso-8859-2', 130 'csisolatin2' => 'iso-8859-2', 131 'latin3' => 'iso-8859-3', 132 'iso_8859-3' => 'iso-8859-3', 133 'iso_8859-3:1988' => 'iso-8859-3', 134 'iso-ir-109' => 'iso-8859-3', 135 'l3' => 'iso-8859-3', 136 'csisolatin3' => 'iso-8859-3', 137 'latin4' => 'iso-8859-4', 138 'iso_8859-4' => 'iso-8859-4', 139 'iso_8859-4:1988' => 'iso-8859-4', 140 'iso-ir-110' => 'iso-8859-4', 141 'l4' => 'iso-8859-4', 142 'csisolatin4' => 'iso-8859-4', 143 'cyrillic' => 'iso-8859-5', 144 'iso_8859-5' => 'iso-8859-5', 145 'iso_8859-5:1988' => 'iso-8859-5', 146 'iso-ir-144' => 'iso-8859-5', 147 'csisolatincyrillic' => 'iso-8859-5', 148 'arabic' => 'iso-8859-6', 149 'iso_8859-6' => 'iso-8859-6', 150 'iso_8859-6:1987' => 'iso-8859-6', 151 'iso-ir-127' => 'iso-8859-6', 152 'ecma-114' => 'iso-8859-6', 153 'asmo-708' => 'iso-8859-6', 154 'csisolatinarabic' => 'iso-8859-6', 155 'csiso88596i' => 'iso-8859-6-i', 156 'csiso88596e' => 'iso-8859-6-e', 157 'greek' => 'iso-8859-7', 158 'greek8' => 'iso-8859-7', 159 'sun_eu_greek' => 'iso-8859-7', 160 'iso_8859-7' => 'iso-8859-7', 161 'iso_8859-7:1987' => 'iso-8859-7', 162 'iso-ir-126' => 'iso-8859-7', 163 'elot_928' => 'iso-8859-7', 164 'ecma-118' => 'iso-8859-7', 165 'csisolatingreek' => 'iso-8859-7', 166 'hebrew' => 'iso-8859-8', 167 'iso_8859-8' => 'iso-8859-8', 168 'visual' => 'iso-8859-8', 169 'iso_8859-8:1988' => 'iso-8859-8', 170 'iso-ir-138' => 'iso-8859-8', 171 'csisolatinhebrew' => 'iso-8859-8', 172 'csiso88598i' => 'iso-8859-8', 173 'iso-8859-8i' => 'iso-8859-8', 174 'logical' => 'iso-8859-8', 175 'csiso88598e' => 'iso-8859-8-e', 176 'latin5' => 'iso-8859-9', 177 'iso_8859-9' => 'iso-8859-9', 178 'iso_8859-9:1989' => 'iso-8859-9', 179 'iso-ir-148' => 'iso-8859-9', 180 'l5' => 'iso-8859-9', 181 'csisolatin5' => 'iso-8859-9', 182 'unicode-1-1-utf-8' => 'utf-8', 183 'utf8' => 'utf-8', 184 'x-sjis' => 'shift_jis', 185 'shift-jis' => 'shift_jis', 186 'ms_kanji' => 'shift_jis', 187 'csshiftjis' => 'shift_jis', 188 'windows-31j' => 'shift_jis', 189 'cp932' => 'shift_jis', 190 'sjis' => 'shift_jis', 191 'cseucpkdfmtjapanese' => 'euc-jp', 192 'x-euc-jp' => 'euc-jp', 193 'csiso2022jp' => 'iso-2022-jp', 194 'iso-2022-jp-2' => 'iso-2022-jp', 195 'csiso2022jp2' => 'iso-2022-jp', 196 'csbig5' => 'big5', 197 'cn-big5' => 'big5', 198 'x-x-big5' => 'big5', 199 'zh_tw-big5' => 'big5', 200 'cseuckr' => 'euc-kr', 201 'ks_c_5601-1987' => 'euc-kr', 202 'iso-ir-149' => 'euc-kr', 203 'ks_c_5601-1989' => 'euc-kr', 204 'ksc_5601' => 'euc-kr', 205 'ksc5601' => 'euc-kr', 206 'korean' => 'euc-kr', 207 'csksc56011987' => 'euc-kr', 208 '5601' => 'euc-kr', 209 'windows-949' => 'euc-kr', 210 'gb_2312-80' => 'gb2312', 211 'iso-ir-58' => 'gb2312', 212 'chinese' => 'gb2312', 213 'csiso58gb231280' => 'gb2312', 214 'csgb2312' => 'gb2312', 215 'zh_cn.euc' => 'gb2312', 216 'gb_2312' => 'gb2312', 217 'x-cp1250' => 'windows-1250', 218 'x-cp1251' => 'windows-1251', 219 'x-cp1252' => 'windows-1252', 220 'x-cp1253' => 'windows-1253', 221 'x-cp1254' => 'windows-1254', 222 'x-cp1255' => 'windows-1255', 223 'x-cp1256' => 'windows-1256', 224 'x-cp1257' => 'windows-1257', 225 'x-cp1258' => 'windows-1258', 226 'windows-874' => 'windows-874', 227 'ibm874' => 'windows-874', 228 'dos-874' => 'windows-874', 229 'macintosh' => 'macintosh', 230 'x-mac-roman' => 'macintosh', 231 'mac' => 'macintosh', 232 'csmacintosh' => 'macintosh', 233 'cp866' => 'ibm866', 234 'cp-866' => 'ibm866', 235 '866' => 'ibm866', 236 'csibm866' => 'ibm866', 237 'cp850' => 'ibm850', 238 '850' => 'ibm850', 239 'csibm850' => 'ibm850', 240 'cp852' => 'ibm852', 241 '852' => 'ibm852', 242 'csibm852' => 'ibm852', 243 'cp855' => 'ibm855', 244 '855' => 'ibm855', 245 'csibm855' => 'ibm855', 246 'cp857' => 'ibm857', 247 '857' => 'ibm857', 248 'csibm857' => 'ibm857', 249 'cp862' => 'ibm862', 250 '862' => 'ibm862', 251 'csibm862' => 'ibm862', 252 'cp864' => 'ibm864', 253 '864' => 'ibm864', 254 'csibm864' => 'ibm864', 255 'ibm-864' => 'ibm864', 256 't.61' => 't.61-8bit', 257 'iso-ir-103' => 't.61-8bit', 258 'csiso103t618bit' => 't.61-8bit', 259 'x-unicode-2-0-utf-7' => 'utf-7', 260 'unicode-2-0-utf-7' => 'utf-7', 261 'unicode-1-1-utf-7' => 'utf-7', 262 'csunicode11utf7' => 'utf-7', 263 'csunicode' => 'utf-16be', 264 'csunicode11' => 'utf-16be', 265 'iso-10646-ucs-basic' => 'utf-16be', 266 'csunicodeascii' => 'utf-16be', 267 'iso-10646-unicode-latin1' => 'utf-16be', 268 'csunicodelatin1' => 'utf-16be', 269 'iso-10646' => 'utf-16be', 270 'iso-10646-j-1' => 'utf-16be', 271 'latin6' => 'iso-8859-10', 272 'iso-ir-157' => 'iso-8859-10', 273 'l6' => 'iso-8859-10', 274 'csisolatin6' => 'iso-8859-10', 275 'iso_8859-15' => 'iso-8859-15', 276 'csisolatin9' => 'iso-8859-15', 277 'l9' => 'iso-8859-15', 278 'ecma-cyrillic' => 'iso-ir-111', 279 'csiso111ecmacyrillic' => 'iso-ir-111', 280 'csiso2022kr' => 'iso-2022-kr', 281 'csviscii' => 'viscii', 282 'zh_tw-euc' => 'x-euc-tw', 283 'iso88591' => 'iso-8859-1', 284 'iso88592' => 'iso-8859-2', 285 'iso88593' => 'iso-8859-3', 286 'iso88594' => 'iso-8859-4', 287 'iso88595' => 'iso-8859-5', 288 'iso88596' => 'iso-8859-6', 289 'iso88597' => 'iso-8859-7', 290 'iso88598' => 'iso-8859-8', 291 'iso88599' => 'iso-8859-9', 292 'iso885910' => 'iso-8859-10', 293 'iso885911' => 'iso-8859-11', 294 'iso885912' => 'iso-8859-12', 295 'iso885913' => 'iso-8859-13', 296 'iso885914' => 'iso-8859-14', 297 'iso885915' => 'iso-8859-15', 298 'tis620' => 'tis-620', 299 'cp1250' => 'windows-1250', 300 'cp1251' => 'windows-1251', 301 'cp1252' => 'windows-1252', 302 'cp1253' => 'windows-1253', 303 'cp1254' => 'windows-1254', 304 'cp1255' => 'windows-1255', 305 'cp1256' => 'windows-1256', 306 'cp1257' => 'windows-1257', 307 'cp1258' => 'windows-1258', 308 'x-gbk' => 'gbk', 309 'windows-936' => 'gbk', 310 'ansi-1251' => 'windows-1251', 311 ]; 312 313 /** 314 * {@inheritdoc} 315 */ 316 public function decodeCharset($encodedString, $charset) 317 { 318 $charset = $this->getCharsetAlias($charset); 319 320 if ($charset == 'utf-8' || $charset == 'us-ascii') { 321 return $encodedString; 322 } 323 324 if (function_exists('mb_convert_encoding')) { 325 if ($charset == 'iso-2022-jp') { 326 return mb_convert_encoding($encodedString, 'utf-8', 'iso-2022-jp-ms'); 327 } 328 329 if (array_search($charset, $this->getSupportedEncodings())) { 330 return mb_convert_encoding($encodedString, 'utf-8', $charset); 331 } 332 } 333 334 return iconv($charset, 'utf-8//translit//ignore', $encodedString); 335 } 336 337 /** 338 * {@inheritdoc} 339 */ 340 public function getCharsetAlias($charset) 341 { 342 $charset = strtolower($charset); 343 344 if (array_key_exists($charset, $this->charsetAlias)) { 345 return $this->charsetAlias[$charset]; 346 } 347 348 return 'us-ascii'; 349 } 350 351 private function getSupportedEncodings() 352 { 353 return 354 array_map( 355 'strtolower', 356 array_unique( 357 array_merge( 358 $enc = array_diff(mb_list_encodings(), ['BASE64', 'UUENCODE', 'HTML-ENTITIES', 'Quoted-Printable']), 359 call_user_func_array( 360 'array_merge', 361 array_map( 362 "mb_encoding_aliases", 363 $enc 364 ) 365 ) 366 ) 367 ) 368 ); 369 } 370}