HTMLPurifier_Encoder::convertToASCIIDumbLossless PHP Method

convertToASCIIDumbLossless() public static method

Lossless (character-wise) conversion of HTML to ASCII
public static convertToASCIIDumbLossless ( string $str ) : string
$str string UTF-8 string to be converted to ASCII
return string ASCII encoded string with non-ASCII character entity-ized
    public static function convertToASCIIDumbLossless($str)
    {
        $bytesleft = 0;
        $result = '';
        $working = 0;
        $len = strlen($str);
        for ($i = 0; $i < $len; $i++) {
            $bytevalue = ord($str[$i]);
            if ($bytevalue <= 0x7f) {
                //0xxx xxxx
                $result .= chr($bytevalue);
                $bytesleft = 0;
            } elseif ($bytevalue <= 0xbf) {
                //10xx xxxx
                $working = $working << 6;
                $working += $bytevalue & 0x3f;
                $bytesleft--;
                if ($bytesleft <= 0) {
                    $result .= "&#" . $working . ";";
                }
            } elseif ($bytevalue <= 0xdf) {
                //110x xxxx
                $working = $bytevalue & 0x1f;
                $bytesleft = 1;
            } elseif ($bytevalue <= 0xef) {
                //1110 xxxx
                $working = $bytevalue & 0xf;
                $bytesleft = 2;
            } else {
                //1111 0xxx
                $working = $bytevalue & 0x7;
                $bytesleft = 3;
            }
        }
        return $result;
    }

Usage Example

Example #1
0
 /**
  * Converts a string from UTF-8 based on configuration.
  * @note Currently, this is a lossy conversion, with unexpressable
  *       characters being omitted.
  */
 public static function convertFromUTF8($str, $config, $context)
 {
     static $iconv = null;
     if ($iconv === null) {
         $iconv = function_exists('iconv');
     }
     $encoding = $config->get('Core', 'Encoding');
     if ($encoding === 'utf-8') {
         return $str;
     }
     if ($config->get('Core', 'EscapeNonASCIICharacters')) {
         $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
     }
     if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
         return @iconv('utf-8', $encoding . '//IGNORE', $str);
     } elseif ($encoding === 'iso-8859-1') {
         return @utf8_decode($str);
     }
     trigger_error('Encoding not supported', E_USER_ERROR);
 }
All Usage Examples Of HTMLPurifier_Encoder::convertToASCIIDumbLossless