public static toUTF8Character ( integer $charCode ) : string | ||
$charCode | integer | |
return | string |
public static function toUTF8Character($charCode)
{
switch ($charCode) {
case 0:
$char = chr(0);
break;
case !($charCode & 0xffffff80):
// 7 bit
$char = chr($charCode);
break;
case !($charCode & 0xfffff800):
// 11 bit
$char = chr(0xc0 | $charCode >> 6 & 0x1f) . chr(0x80 | $charCode & 0x3f);
break;
case !($charCode & 0xffff0000):
// 16 bit
$char = chr(0xe0 | $charCode >> 12 & 0xf) . chr(0x80 | $charCode >> 6 & 0x3f) . chr(0x80 | $charCode & 0x3f);
break;
case !($charCode & 0xffe00000):
// 21 bit
$char = chr(0xf0 | $charCode >> 18 & 0x7) . chr(0x80 | $charCode >> 12 & 0x3f) . chr(0x80 | $charCode >> 6 & 0x3f) . chr(0x80 | $charCode & 0x3f);
break;
case !($charCode & 0xfc000000):
// 26 bit
$char = chr(0xf8 | $charCode >> 24 & 0x3) . chr(0x80 | $charCode >> 18 & 0x3f) . chr(0x80 | $charCode >> 12 & 0x3f) . chr(0x80 | $charCode >> 6 & 0x3f) . chr(0x80 | $charCode & 0x3f);
break;
default:
// 31 bit
$char = chr(0xfc | $charCode >> 30 & 0x1) . chr(0x80 | $charCode >> 24 & 0x3f) . chr(0x80 | $charCode >> 18 & 0x3f) . chr(0x80 | $charCode >> 12 & 0x3f) . chr(0x80 | $charCode >> 6 & 0x3f) . chr(0x80 | $charCode & 0x3f);
}
return $char;
}
/** * Compile a single source character definition into a plain UTF-8 character * * Handles the two formats from the possible character definitions: * - U+xxxx : Unicode value in hexadecimal * - xx: Ascii value in hexadecimal * * @param string $char * * @return string */ protected function compileCharacter($char) { switch (true) { case preg_match('(^U\\+[0-9a-fA-F]{4}$)', $char): return $this->converter->toUTF8Character(hexdec(substr($char, 2))); case preg_match('(^[0-9a-fA-F]{2}$)', $char): return chr(hexdec($char)); default: throw new RuntimeException("Invalid character definition: {$char}"); } }