public static function toUnicodeCodepoint($char)
{
$charCode = false;
// 7bits, 1 char
if ((ord($char[0]) & 0x80) == 0x0) {
$charCode = ord($char[0]);
} elseif ((ord($char[0]) & 0xe0) == 0xc0) {
// 11 bits, 2 chars
if ((ord($char[1]) & 0xc0) != 0x80) {
return $charCode;
}
$charCode = ((ord($char[0]) & 0x1f) << 6) + (ord($char[1]) & 0x3f);
if ($charCode < 128) {
throw new RuntimeException('Illegal UTF-8 input character: ' . $char);
}
} elseif ((ord($char[0]) & 0xf0) == 0xe0) {
// 16 bits, 3 chars
if ((ord($char[1]) & 0xc0) != 0x80 || (ord($char[2]) & 0xc0) != 0x80) {
return $charCode;
}
$charCode = ((ord($char[0]) & 0xf) << 12) + ((ord($char[1]) & 0x3f) << 6) + (ord($char[2]) & 0x3f);
if ($charCode < 2048) {
throw new RuntimeException('Illegal UTF-8 input character: ' . $char);
}
} elseif ((ord($char[0]) & 0xf8) == 0xf0) {
// 21 bits, 4 chars
if ((ord($char[1]) & 0xc0) != 0x80 || (ord($char[2]) & 0xc0) != 0x80 || (ord($char[3]) & 0xc0) != 0x80) {
return $charCode;
}
$charCode = ((ord($char[0]) & 0x7) << 18) + ((ord($char[1]) & 0x3f) << 12) + ((ord($char[2]) & 0x3f) << 6) + (ord($char[3]) & 0x3f);
if ($charCode < 65536) {
throw new RuntimeException('Illegal UTF-8 input character: ' . $char);
}
} elseif ((ord($char[0]) & 0xfc) == 0xf8) {
// 26 bits, 5 chars
if ((ord($char[1]) & 0xc0) != 0x80 || (ord($char[2]) & 0xc0) != 0x80 || (ord($char[3]) & 0xc0) != 0x80 || (ord($char[4]) & 0xc0) != 0x80) {
return $charCode;
}
$charCode = ((ord($char[0]) & 0x3) << 24) + ((ord($char[1]) & 0x3f) << 18) + ((ord($char[2]) & 0x3f) << 12) + ((ord($char[3]) & 0x3f) << 6) + (ord($char[4]) & 0x3f);
if ($charCode < 2097152) {
throw new RuntimeException('Illegal UTF-8 input character: ' . $char);
}
} elseif ((ord($char[0]) & 0xfe) == 0xfc) {
// 31 bits, 6 chars
if ((ord($char[1]) & 0xc0) != 0x80 || (ord($char[2]) & 0xc0) != 0x80 || (ord($char[3]) & 0xc0) != 0x80 || (ord($char[4]) & 0xc0) != 0x80 || (ord($char[5]) & 0xc0) != 0x80) {
return $charCode;
}
$charCode = ((ord($char[0]) & 0x1) << 30) + ((ord($char[1]) & 0x3f) << 24) + ((ord($char[2]) & 0x3f) << 18) + ((ord($char[3]) & 0x3f) << 12) + ((ord($char[4]) & 0x3f) << 6) + (ord($char[5]) & 0x3f);
if ($charCode < 67108864) {
throw new RuntimeException('Illegal UTF-8 input character: ' . $char);
}
}
return $charCode;
}