eZ\Publish\Core\Persistence\Utf8Converter::toUnicodeCodepoint PHP Method

toUnicodeCodepoint() public static method

Convert a single UTF-8 character into its decimal code point.
public static toUnicodeCodepoint ( string $char ) : integer
$char string
return integer
    public static function toUnicodeCodepoint($char)
    {
        $charCode = false;
        // 7bits, 1 char
        if ((ord($char[0]) & 0x80) == 0x0) {
            $charCode = ord($char[0]);
        } elseif ((ord($char[0]) & 0xe0) == 0xc0) {
            // 11 bits, 2 chars
            if ((ord($char[1]) & 0xc0) != 0x80) {
                return $charCode;
            }
            $charCode = ((ord($char[0]) & 0x1f) << 6) + (ord($char[1]) & 0x3f);
            if ($charCode < 128) {
                throw new RuntimeException('Illegal UTF-8 input character: ' . $char);
            }
        } elseif ((ord($char[0]) & 0xf0) == 0xe0) {
            // 16 bits, 3 chars
            if ((ord($char[1]) & 0xc0) != 0x80 || (ord($char[2]) & 0xc0) != 0x80) {
                return $charCode;
            }
            $charCode = ((ord($char[0]) & 0xf) << 12) + ((ord($char[1]) & 0x3f) << 6) + (ord($char[2]) & 0x3f);
            if ($charCode < 2048) {
                throw new RuntimeException('Illegal UTF-8 input character: ' . $char);
            }
        } elseif ((ord($char[0]) & 0xf8) == 0xf0) {
            // 21 bits, 4 chars
            if ((ord($char[1]) & 0xc0) != 0x80 || (ord($char[2]) & 0xc0) != 0x80 || (ord($char[3]) & 0xc0) != 0x80) {
                return $charCode;
            }
            $charCode = ((ord($char[0]) & 0x7) << 18) + ((ord($char[1]) & 0x3f) << 12) + ((ord($char[2]) & 0x3f) << 6) + (ord($char[3]) & 0x3f);
            if ($charCode < 65536) {
                throw new RuntimeException('Illegal UTF-8 input character: ' . $char);
            }
        } elseif ((ord($char[0]) & 0xfc) == 0xf8) {
            // 26 bits, 5 chars
            if ((ord($char[1]) & 0xc0) != 0x80 || (ord($char[2]) & 0xc0) != 0x80 || (ord($char[3]) & 0xc0) != 0x80 || (ord($char[4]) & 0xc0) != 0x80) {
                return $charCode;
            }
            $charCode = ((ord($char[0]) & 0x3) << 24) + ((ord($char[1]) & 0x3f) << 18) + ((ord($char[2]) & 0x3f) << 12) + ((ord($char[3]) & 0x3f) << 6) + (ord($char[4]) & 0x3f);
            if ($charCode < 2097152) {
                throw new RuntimeException('Illegal UTF-8 input character: ' . $char);
            }
        } elseif ((ord($char[0]) & 0xfe) == 0xfc) {
            // 31 bits, 6 chars
            if ((ord($char[1]) & 0xc0) != 0x80 || (ord($char[2]) & 0xc0) != 0x80 || (ord($char[3]) & 0xc0) != 0x80 || (ord($char[4]) & 0xc0) != 0x80 || (ord($char[5]) & 0xc0) != 0x80) {
                return $charCode;
            }
            $charCode = ((ord($char[0]) & 0x1) << 30) + ((ord($char[1]) & 0x3f) << 24) + ((ord($char[2]) & 0x3f) << 18) + ((ord($char[3]) & 0x3f) << 12) + ((ord($char[4]) & 0x3f) << 6) + (ord($char[5]) & 0x3f);
            if ($charCode < 67108864) {
                throw new RuntimeException('Illegal UTF-8 input character: ' . $char);
            }
        }
        return $charCode;
    }

Usage Example

 /**
  * Get string with all characters defined by parameters
  *
  * Returns a string containing all UTF-8 characters starting with the
  * specified $start character up to the $end character with the step size
  * defined in $modulo.
  *
  * @param string $start
  * @param string $end
  * @param string $modulo
  *
  * @return string
  */
 protected function getModuloCharRange($start, $end, $modulo)
 {
     $start = $this->converter->toUnicodeCodepoint($start);
     $end = $this->converter->toUnicodeCodepoint($end);
     $modulo = hexdec($modulo);
     $chars = '';
     for ($start; $start <= $end; $start += $modulo) {
         $chars .= $this->converter->toUTF8Character($start);
     }
     return $chars;
 }