HTMLPurifier_Encoder::testEncodingSupportsASCII PHP Method

testEncodingSupportsASCII() public static method

This expensive function tests whether or not a given character encoding supports ASCII. 7/8-bit encodings like Shift_JIS will fail this test, and require special processing. Variable width encodings shouldn't ever fail.
public static testEncodingSupportsASCII ( string $encoding, boolean $bypass = false ) : Array
$encoding string Encoding name to test, as per iconv format
$bypass boolean Whether or not to bypass the precompiled arrays.
return Array of UTF-8 characters to their corresponding ASCII, which can be used to "undo" any overzealous iconv action.
    public static function testEncodingSupportsASCII($encoding, $bypass = false)
    {
        // All calls to iconv here are unsafe, proof by case analysis:
        // If ICONV_OK, no difference.
        // If ICONV_TRUNCATE, all calls involve one character inputs,
        // so bug is not triggered.
        // If ICONV_UNUSABLE, this call is irrelevant
        static $encodings = array();
        if (!$bypass) {
            if (isset($encodings[$encoding])) {
                return $encodings[$encoding];
            }
            $lenc = strtolower($encoding);
            switch ($lenc) {
                case 'shift_jis':
                    return array("¥" => '\\', "‾" => '~');
                case 'johab':
                    return array("₩" => '\\');
            }
            if (strpos($lenc, 'iso-8859-') === 0) {
                return array();
            }
        }
        $ret = array();
        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) {
            return false;
        }
        for ($i = 0x20; $i <= 0x7e; $i++) {
            // all printable ASCII chars
            $c = chr($i);
            // UTF-8 char
            $r = self::unsafeIconv('UTF-8', "{$encoding}//IGNORE", $c);
            // initial conversion
            if ($r === '' || $r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c) {
                // Reverse engineer: what's the UTF-8 equiv of this byte
                // sequence? This assumes that there's no variable width
                // encoding that doesn't support ASCII.
                $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
            }
        }
        $encodings[$encoding] = $ret;
        return $ret;
    }

Usage Example

Exemplo n.º 1
0
 /**
  * Converts a string from UTF-8 based on configuration.
  * @note Currently, this is a lossy conversion, with unexpressable
  *       characters being omitted.
  */
 public static function convertFromUTF8($str, $config, $context)
 {
     $encoding = $config->get('Core.Encoding');
     if ($encoding === 'utf-8') {
         return $str;
     }
     static $iconv = null;
     if ($iconv === null) {
         $iconv = function_exists('iconv');
     }
     if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
         $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
     }
     set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
     if ($iconv && !$config->get('Test.ForceNoIconv')) {
         // Undo our previous fix in convertToUTF8, otherwise iconv will barf
         $ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding);
         if (!$escape && !empty($ascii_fix)) {
             $clear_fix = array();
             foreach ($ascii_fix as $utf8 => $native) {
                 $clear_fix[$utf8] = '';
             }
             $str = strtr($str, $clear_fix);
         }
         $str = strtr($str, array_flip($ascii_fix));
         // Normal stuff
         $str = iconv('utf-8', $encoding . '//IGNORE', $str);
         restore_error_handler();
         return $str;
     } elseif ($encoding === 'iso-8859-1') {
         $str = utf8_decode($str);
         restore_error_handler();
         return $str;
     }
     trigger_error('Encoding not supported', E_USER_ERROR);
 }
All Usage Examples Of HTMLPurifier_Encoder::testEncodingSupportsASCII