HTMLPurifier_Encoder::iconv PHP Method

iconv() public static method

iconv wrapper which mutes errors and works around bugs.
public static iconv ( string $in, string $out, string $text, integer $max_chunk_size = 8000 ) : string
$in string Input encoding
$out string Output encoding
$text string The text to convert
$max_chunk_size integer
return string
    public static function iconv($in, $out, $text, $max_chunk_size = 8000)
    {
        $code = self::testIconvTruncateBug();
        if ($code == self::ICONV_OK) {
            return self::unsafeIconv($in, $out, $text);
        } elseif ($code == self::ICONV_TRUNCATES) {
            // we can only work around this if the input character set
            // is utf-8
            if ($in == 'utf-8') {
                if ($max_chunk_size < 4) {
                    trigger_error('max_chunk_size is too small', E_USER_WARNING);
                    return false;
                }
                // split into 8000 byte chunks, but be careful to handle
                // multibyte boundaries properly
                if (($c = strlen($text)) <= $max_chunk_size) {
                    return self::unsafeIconv($in, $out, $text);
                }
                $r = '';
                $i = 0;
                while (true) {
                    if ($i + $max_chunk_size >= $c) {
                        $r .= self::unsafeIconv($in, $out, substr($text, $i));
                        break;
                    }
                    // wibble the boundary
                    if (0x80 != (0xc0 & ord($text[$i + $max_chunk_size]))) {
                        $chunk_size = $max_chunk_size;
                    } elseif (0x80 != (0xc0 & ord($text[$i + $max_chunk_size - 1]))) {
                        $chunk_size = $max_chunk_size - 1;
                    } elseif (0x80 != (0xc0 & ord($text[$i + $max_chunk_size - 2]))) {
                        $chunk_size = $max_chunk_size - 2;
                    } elseif (0x80 != (0xc0 & ord($text[$i + $max_chunk_size - 3]))) {
                        $chunk_size = $max_chunk_size - 3;
                    } else {
                        return false;
                        // rather confusing UTF-8...
                    }
                    $chunk = substr($text, $i, $chunk_size);
                    // substr doesn't mind overlong lengths
                    $r .= self::unsafeIconv($in, $out, $chunk);
                    $i += $chunk_size;
                }
                return $r;
            } else {
                return false;
            }
        } else {
            return false;
        }
    }

Usage Example

Example #1
0
 public function testIconvChunking()
 {
     if (!HTMLPurifier_Encoder::iconvAvailable()) {
         return;
     }
     if (HTMLPurifier_Encoder::testIconvTruncateBug() !== HTMLPurifier_Encoder::ICONV_TRUNCATES) {
         return;
     }
     $this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "aó € b", 4), 'ab');
     $this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "aa中b", 4), 'aab');
     $this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "aaaαb", 4), 'aaab');
     $this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "aaaaó € b", 4), 'aaaab');
     $this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "aaaa中b", 4), 'aaaab');
     $this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "aaaaαb", 4), 'aaaab');
 }