vendor/php-mime-mail-parser/php-mime-mail-parser/src/Charset.php line 316

Open in your IDE?
  1. <?php namespace PhpMimeMailParser;
  2. use PhpMimeMailParser\Contracts\CharsetManager;
  3. class Charset implements CharsetManager
  4. {
  5.     /**
  6.      * Charset Aliases
  7.      */
  8.     private $charsetAlias = [
  9.         'ascii'                    => 'us-ascii',
  10.         'us-ascii'                 => 'us-ascii',
  11.         'ansi_x3.4-1968'           => 'us-ascii',
  12.         '646'                      => 'us-ascii',
  13.         'iso-8859-1'               => 'iso-8859-1',
  14.         'iso-8859-2'               => 'iso-8859-2',
  15.         'iso-8859-3'               => 'iso-8859-3',
  16.         'iso-8859-4'               => 'iso-8859-4',
  17.         'iso-8859-5'               => 'iso-8859-5',
  18.         'iso-8859-6'               => 'iso-8859-6',
  19.         'iso-8859-6-i'             => 'iso-8859-6-i',
  20.         'iso-8859-6-e'             => 'iso-8859-6-e',
  21.         'iso-8859-7'               => 'iso-8859-7',
  22.         'iso-8859-8'               => 'iso-8859-8',
  23.         'iso-8859-8-i'             => 'iso-8859-8',
  24.         'iso-8859-8-e'             => 'iso-8859-8-e',
  25.         'iso-8859-9'               => 'iso-8859-9',
  26.         'iso-8859-10'              => 'iso-8859-10',
  27.         'iso-8859-11'              => 'iso-8859-11',
  28.         'iso-8859-13'              => 'iso-8859-13',
  29.         'iso-8859-14'              => 'iso-8859-14',
  30.         'iso-8859-15'              => 'iso-8859-15',
  31.         'iso-8859-16'              => 'iso-8859-16',
  32.         'iso-ir-111'               => 'iso-ir-111',
  33.         'iso-2022-cn'              => 'iso-2022-cn',
  34.         'iso-2022-cn-ext'          => 'iso-2022-cn',
  35.         'iso-2022-kr'              => 'iso-2022-kr',
  36.         'iso-2022-jp'              => 'iso-2022-jp',
  37.         'utf-16be'                 => 'utf-16be',
  38.         'utf-16le'                 => 'utf-16le',
  39.         'utf-16'                   => 'utf-16',
  40.         'windows-1250'             => 'windows-1250',
  41.         'windows-1251'             => 'windows-1251',
  42.         'windows-1252'             => 'windows-1252',
  43.         'windows-1253'             => 'windows-1253',
  44.         'windows-1254'             => 'windows-1254',
  45.         'windows-1255'             => 'windows-1255',
  46.         'windows-1256'             => 'windows-1256',
  47.         'windows-1257'             => 'windows-1257',
  48.         'windows-1258'             => 'windows-1258',
  49.         'ibm866'                   => 'ibm866',
  50.         'ibm850'                   => 'ibm850',
  51.         'ibm852'                   => 'ibm852',
  52.         'ibm855'                   => 'ibm855',
  53.         'ibm857'                   => 'ibm857',
  54.         'ibm862'                   => 'ibm862',
  55.         'ibm864'                   => 'ibm864',
  56.         'utf-8'                    => 'utf-8',
  57.         'utf-7'                    => 'utf-7',
  58.         'shift_jis'                => 'shift_jis',
  59.         'big5'                     => 'big5',
  60.         'euc-jp'                   => 'euc-jp',
  61.         'euc-kr'                   => 'euc-kr',
  62.         'gb2312'                   => 'gb2312',
  63.         'gb18030'                  => 'gb18030',
  64.         'viscii'                   => 'viscii',
  65.         'koi8-r'                   => 'koi8-r',
  66.         'koi8_r'                   => 'koi8-r',
  67.         'cskoi8r'                  => 'koi8-r',
  68.         'koi'                      => 'koi8-r',
  69.         'koi8'                     => 'koi8-r',
  70.         'koi8-u'                   => 'koi8-u',
  71.         'tis-620'                  => 'tis-620',
  72.         't.61-8bit'                => 't.61-8bit',
  73.         'hz-gb-2312'               => 'hz-gb-2312',
  74.         'big5-hkscs'               => 'big5-hkscs',
  75.         'gbk'                      => 'gbk',
  76.         'cns11643'                 => 'x-euc-tw',
  77.         'x-imap4-modified-utf7'    => 'x-imap4-modified-utf7',
  78.         'x-euc-tw'                 => 'x-euc-tw',
  79.         'x-mac-ce'                 => 'macce',
  80.         'x-mac-turkish'            => 'macturkish',
  81.         'x-mac-greek'              => 'macgreek',
  82.         'x-mac-icelandic'          => 'macicelandic',
  83.         'x-mac-croatian'           => 'maccroatian',
  84.         'x-mac-romanian'           => 'macromanian',
  85.         'x-mac-cyrillic'           => 'maccyrillic',
  86.         'x-mac-ukrainian'          => 'macukrainian',
  87.         'x-mac-hebrew'             => 'machebrew',
  88.         'x-mac-arabic'             => 'macarabic',
  89.         'x-mac-farsi'              => 'macfarsi',
  90.         'x-mac-devanagari'         => 'macdevanagari',
  91.         'x-mac-gujarati'           => 'macgujarati',
  92.         'x-mac-gurmukhi'           => 'macgurmukhi',
  93.         'armscii-8'                => 'armscii-8',
  94.         'x-viet-tcvn5712'          => 'x-viet-tcvn5712',
  95.         'x-viet-vps'               => 'x-viet-vps',
  96.         'iso-10646-ucs-2'          => 'utf-16be',
  97.         'x-iso-10646-ucs-2-be'     => 'utf-16be',
  98.         'x-iso-10646-ucs-2-le'     => 'utf-16le',
  99.         'x-user-defined'           => 'x-user-defined',
  100.         'x-johab'                  => 'x-johab',
  101.         'latin1'                   => 'iso-8859-1',
  102.         'iso_8859-1'               => 'iso-8859-1',
  103.         'iso8859-1'                => 'iso-8859-1',
  104.         'iso8859-2'                => 'iso-8859-2',
  105.         'iso8859-3'                => 'iso-8859-3',
  106.         'iso8859-4'                => 'iso-8859-4',
  107.         'iso8859-5'                => 'iso-8859-5',
  108.         'iso8859-6'                => 'iso-8859-6',
  109.         'iso8859-7'                => 'iso-8859-7',
  110.         'iso8859-8'                => 'iso-8859-8',
  111.         'iso8859-9'                => 'iso-8859-9',
  112.         'iso8859-10'               => 'iso-8859-10',
  113.         'iso8859-11'               => 'iso-8859-11',
  114.         'iso8859-13'               => 'iso-8859-13',
  115.         'iso8859-14'               => 'iso-8859-14',
  116.         'iso8859-15'               => 'iso-8859-15',
  117.         'iso_8859-1:1987'          => 'iso-8859-1',
  118.         'iso-ir-100'               => 'iso-8859-1',
  119.         'l1'                       => 'iso-8859-1',
  120.         'ibm819'                   => 'iso-8859-1',
  121.         'cp819'                    => 'iso-8859-1',
  122.         'csisolatin1'              => 'iso-8859-1',
  123.         'latin2'                   => 'iso-8859-2',
  124.         'iso_8859-2'               => 'iso-8859-2',
  125.         'iso_8859-2:1987'          => 'iso-8859-2',
  126.         'iso-ir-101'               => 'iso-8859-2',
  127.         'l2'                       => 'iso-8859-2',
  128.         'csisolatin2'              => 'iso-8859-2',
  129.         'latin3'                   => 'iso-8859-3',
  130.         'iso_8859-3'               => 'iso-8859-3',
  131.         'iso_8859-3:1988'          => 'iso-8859-3',
  132.         'iso-ir-109'               => 'iso-8859-3',
  133.         'l3'                       => 'iso-8859-3',
  134.         'csisolatin3'              => 'iso-8859-3',
  135.         'latin4'                   => 'iso-8859-4',
  136.         'iso_8859-4'               => 'iso-8859-4',
  137.         'iso_8859-4:1988'          => 'iso-8859-4',
  138.         'iso-ir-110'               => 'iso-8859-4',
  139.         'l4'                       => 'iso-8859-4',
  140.         'csisolatin4'              => 'iso-8859-4',
  141.         'cyrillic'                 => 'iso-8859-5',
  142.         'iso_8859-5'               => 'iso-8859-5',
  143.         'iso_8859-5:1988'          => 'iso-8859-5',
  144.         'iso-ir-144'               => 'iso-8859-5',
  145.         'csisolatincyrillic'       => 'iso-8859-5',
  146.         'arabic'                   => 'iso-8859-6',
  147.         'iso_8859-6'               => 'iso-8859-6',
  148.         'iso_8859-6:1987'          => 'iso-8859-6',
  149.         'iso-ir-127'               => 'iso-8859-6',
  150.         'ecma-114'                 => 'iso-8859-6',
  151.         'asmo-708'                 => 'iso-8859-6',
  152.         'csisolatinarabic'         => 'iso-8859-6',
  153.         'csiso88596i'              => 'iso-8859-6-i',
  154.         'csiso88596e'              => 'iso-8859-6-e',
  155.         'greek'                    => 'iso-8859-7',
  156.         'greek8'                   => 'iso-8859-7',
  157.         'sun_eu_greek'             => 'iso-8859-7',
  158.         'iso_8859-7'               => 'iso-8859-7',
  159.         'iso_8859-7:1987'          => 'iso-8859-7',
  160.         'iso-ir-126'               => 'iso-8859-7',
  161.         'elot_928'                 => 'iso-8859-7',
  162.         'ecma-118'                 => 'iso-8859-7',
  163.         'csisolatingreek'          => 'iso-8859-7',
  164.         'hebrew'                   => 'iso-8859-8',
  165.         'iso_8859-8'               => 'iso-8859-8',
  166.         'visual'                   => 'iso-8859-8',
  167.         'iso_8859-8:1988'          => 'iso-8859-8',
  168.         'iso-ir-138'               => 'iso-8859-8',
  169.         'csisolatinhebrew'         => 'iso-8859-8',
  170.         'csiso88598i'              => 'iso-8859-8',
  171.         'iso-8859-8i'              => 'iso-8859-8',
  172.         'logical'                  => 'iso-8859-8',
  173.         'csiso88598e'              => 'iso-8859-8-e',
  174.         'latin5'                   => 'iso-8859-9',
  175.         'iso_8859-9'               => 'iso-8859-9',
  176.         'iso_8859-9:1989'          => 'iso-8859-9',
  177.         'iso-ir-148'               => 'iso-8859-9',
  178.         'l5'                       => 'iso-8859-9',
  179.         'csisolatin5'              => 'iso-8859-9',
  180.         'unicode-1-1-utf-8'        => 'utf-8',
  181.         'utf8'                     => 'utf-8',
  182.         'x-sjis'                   => 'shift_jis',
  183.         'shift-jis'                => 'shift_jis',
  184.         'ms_kanji'                 => 'shift_jis',
  185.         'csshiftjis'               => 'shift_jis',
  186.         'windows-31j'              => 'shift_jis',
  187.         'cp932'                    => 'shift_jis',
  188.         'sjis'                     => 'shift_jis',
  189.         'cseucpkdfmtjapanese'      => 'euc-jp',
  190.         'x-euc-jp'                 => 'euc-jp',
  191.         'csiso2022jp'              => 'iso-2022-jp',
  192.         'iso-2022-jp-2'            => 'iso-2022-jp',
  193.         'csiso2022jp2'             => 'iso-2022-jp',
  194.         'csbig5'                   => 'big5',
  195.         'cn-big5'                  => 'big5',
  196.         'x-x-big5'                 => 'big5',
  197.         'zh_tw-big5'               => 'big5',
  198.         'cseuckr'                  => 'euc-kr',
  199.         'ks_c_5601-1987'           => 'euc-kr',
  200.         'iso-ir-149'               => 'euc-kr',
  201.         'ks_c_5601-1989'           => 'euc-kr',
  202.         'ksc_5601'                 => 'euc-kr',
  203.         'ksc5601'                  => 'euc-kr',
  204.         'korean'                   => 'euc-kr',
  205.         'csksc56011987'            => 'euc-kr',
  206.         '5601'                     => 'euc-kr',
  207.         'windows-949'              => 'euc-kr',
  208.         'gb_2312-80'               => 'gb2312',
  209.         'iso-ir-58'                => 'gb2312',
  210.         'chinese'                  => 'gb2312',
  211.         'csiso58gb231280'          => 'gb2312',
  212.         'csgb2312'                 => 'gb2312',
  213.         'zh_cn.euc'                => 'gb2312',
  214.         'gb_2312'                  => 'gb2312',
  215.         'x-cp1250'                 => 'windows-1250',
  216.         'x-cp1251'                 => 'windows-1251',
  217.         'x-cp1252'                 => 'windows-1252',
  218.         'x-cp1253'                 => 'windows-1253',
  219.         'x-cp1254'                 => 'windows-1254',
  220.         'x-cp1255'                 => 'windows-1255',
  221.         'x-cp1256'                 => 'windows-1256',
  222.         'x-cp1257'                 => 'windows-1257',
  223.         'x-cp1258'                 => 'windows-1258',
  224.         'windows-874'              => 'windows-874',
  225.         'ibm874'                   => 'windows-874',
  226.         'dos-874'                  => 'windows-874',
  227.         'macintosh'                => 'macintosh',
  228.         'x-mac-roman'              => 'macintosh',
  229.         'mac'                      => 'macintosh',
  230.         'csmacintosh'              => 'macintosh',
  231.         'cp866'                    => 'ibm866',
  232.         'cp-866'                   => 'ibm866',
  233.         '866'                      => 'ibm866',
  234.         'csibm866'                 => 'ibm866',
  235.         'cp850'                    => 'ibm850',
  236.         '850'                      => 'ibm850',
  237.         'csibm850'                 => 'ibm850',
  238.         'cp852'                    => 'ibm852',
  239.         '852'                      => 'ibm852',
  240.         'csibm852'                 => 'ibm852',
  241.         'cp855'                    => 'ibm855',
  242.         '855'                      => 'ibm855',
  243.         'csibm855'                 => 'ibm855',
  244.         'cp857'                    => 'ibm857',
  245.         '857'                      => 'ibm857',
  246.         'csibm857'                 => 'ibm857',
  247.         'cp862'                    => 'ibm862',
  248.         '862'                      => 'ibm862',
  249.         'csibm862'                 => 'ibm862',
  250.         'cp864'                    => 'ibm864',
  251.         '864'                      => 'ibm864',
  252.         'csibm864'                 => 'ibm864',
  253.         'ibm-864'                  => 'ibm864',
  254.         't.61'                     => 't.61-8bit',
  255.         'iso-ir-103'               => 't.61-8bit',
  256.         'csiso103t618bit'          => 't.61-8bit',
  257.         'x-unicode-2-0-utf-7'      => 'utf-7',
  258.         'unicode-2-0-utf-7'        => 'utf-7',
  259.         'unicode-1-1-utf-7'        => 'utf-7',
  260.         'csunicode11utf7'          => 'utf-7',
  261.         'csunicode'                => 'utf-16be',
  262.         'csunicode11'              => 'utf-16be',
  263.         'iso-10646-ucs-basic'      => 'utf-16be',
  264.         'csunicodeascii'           => 'utf-16be',
  265.         'iso-10646-unicode-latin1' => 'utf-16be',
  266.         'csunicodelatin1'          => 'utf-16be',
  267.         'iso-10646'                => 'utf-16be',
  268.         'iso-10646-j-1'            => 'utf-16be',
  269.         'latin6'                   => 'iso-8859-10',
  270.         'iso-ir-157'               => 'iso-8859-10',
  271.         'l6'                       => 'iso-8859-10',
  272.         'csisolatin6'              => 'iso-8859-10',
  273.         'iso_8859-15'              => 'iso-8859-15',
  274.         'csisolatin9'              => 'iso-8859-15',
  275.         'l9'                       => 'iso-8859-15',
  276.         'ecma-cyrillic'            => 'iso-ir-111',
  277.         'csiso111ecmacyrillic'     => 'iso-ir-111',
  278.         'csiso2022kr'              => 'iso-2022-kr',
  279.         'csviscii'                 => 'viscii',
  280.         'zh_tw-euc'                => 'x-euc-tw',
  281.         'iso88591'                 => 'iso-8859-1',
  282.         'iso88592'                 => 'iso-8859-2',
  283.         'iso88593'                 => 'iso-8859-3',
  284.         'iso88594'                 => 'iso-8859-4',
  285.         'iso88595'                 => 'iso-8859-5',
  286.         'iso88596'                 => 'iso-8859-6',
  287.         'iso88597'                 => 'iso-8859-7',
  288.         'iso88598'                 => 'iso-8859-8',
  289.         'iso88599'                 => 'iso-8859-9',
  290.         'iso885910'                => 'iso-8859-10',
  291.         'iso885911'                => 'iso-8859-11',
  292.         'iso885912'                => 'iso-8859-12',
  293.         'iso885913'                => 'iso-8859-13',
  294.         'iso885914'                => 'iso-8859-14',
  295.         'iso885915'                => 'iso-8859-15',
  296.         'tis620'                   => 'tis-620',
  297.         'cp1250'                   => 'windows-1250',
  298.         'cp1251'                   => 'windows-1251',
  299.         'cp1252'                   => 'windows-1252',
  300.         'cp1253'                   => 'windows-1253',
  301.         'cp1254'                   => 'windows-1254',
  302.         'cp1255'                   => 'windows-1255',
  303.         'cp1256'                   => 'windows-1256',
  304.         'cp1257'                   => 'windows-1257',
  305.         'cp1258'                   => 'windows-1258',
  306.         'x-gbk'                    => 'gbk',
  307.         'windows-936'              => 'gbk',
  308.         'ansi-1251'                => 'windows-1251',
  309.     ];
  310.     /**
  311.      * {@inheritdoc}
  312.      */
  313.     public function decodeCharset($encodedString$charset)
  314.     {
  315.         $charset $this->getCharsetAlias($charset);
  316.         if ($charset == 'utf-8' || $charset == 'us-ascii') {
  317.             return $encodedString;
  318.         }
  319.         if (function_exists('mb_convert_encoding')) {
  320.             if ($charset == 'iso-2022-jp') {
  321.                 return mb_convert_encoding($encodedString'utf-8''iso-2022-jp-ms');
  322.             }
  323.             if (array_search($charset$this->getSupportedEncodings())) {
  324.                 return mb_convert_encoding($encodedString'utf-8'$charset);
  325.             }
  326.         }
  327.         return iconv($charset'utf-8//translit//ignore'$encodedString);
  328.     }
  329.     /**
  330.      * {@inheritdoc}
  331.      */
  332.     public function getCharsetAlias($charset)
  333.     {
  334.         $charset strtolower($charset);
  335.         if (array_key_exists($charset$this->charsetAlias)) {
  336.             return $this->charsetAlias[$charset];
  337.         }
  338.         
  339.         return 'us-ascii';
  340.     }
  341.     private function getSupportedEncodings()
  342.     {
  343.         return
  344.         array_map(
  345.             'strtolower',
  346.             array_unique(
  347.                 array_merge(
  348.                     $enc mb_list_encodings(),
  349.                     call_user_func_array(
  350.                         'array_merge',
  351.                         array_map(
  352.                             "mb_encoding_aliases",
  353.                             $enc
  354.                         )
  355.                     )
  356.                 )
  357.             )
  358.         );
  359.     }
  360. }