public function parseNumber()
{
// A rudimentary state machine is being used to parse numbers due to
// the various forms of their notation.
//
// Below are the states of the machines and the conditions to change
// the state.
//
// 1 --------------------[ + or - ]-------------------> 1
// 1 -------------------[ 0x or 0X ]------------------> 2
// 1 --------------------[ 0 to 9 ]-------------------> 3
// 1 -----------------------[ . ]---------------------> 4
// 1 -----------------------[ b ]---------------------> 7
//
// 2 --------------------[ 0 to F ]-------------------> 2
//
// 3 --------------------[ 0 to 9 ]-------------------> 3
// 3 -----------------------[ . ]---------------------> 4
// 3 --------------------[ e or E ]-------------------> 5
//
// 4 --------------------[ 0 to 9 ]-------------------> 4
// 4 --------------------[ e or E ]-------------------> 5
//
// 5 ---------------[ + or - or 0 to 9 ]--------------> 6
//
// 7 -----------------------[ ' ]---------------------> 8
//
// 8 --------------------[ 0 or 1 ]-------------------> 8
// 8 -----------------------[ ' ]---------------------> 9
//
// State 1 may be reached by negative numbers.
// State 2 is reached only by hex numbers.
// State 4 is reached only by float numbers.
// State 5 is reached only by numbers in approximate form.
// State 7 is reached only by numbers in bit representation.
//
// Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
// state other than these is invalid.
$iBak = $this->last;
$token = '';
$flags = 0;
$state = 1;
for (; $this->last < $this->len; ++$this->last) {
if ($state === 1) {
if ($this->str[$this->last] === '-') {
$flags |= Token::FLAG_NUMBER_NEGATIVE;
} elseif ($this->last + 1 < $this->len && $this->str[$this->last] === '0' && ($this->str[$this->last + 1] === 'x' || $this->str[$this->last + 1] === 'X')) {
$token .= $this->str[$this->last++];
$state = 2;
} elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
$state = 3;
} elseif ($this->str[$this->last] === '.') {
$state = 4;
} elseif ($this->str[$this->last] === 'b') {
$state = 7;
} elseif ($this->str[$this->last] !== '+') {
// `+` is a valid character in a number.
break;
}
} elseif ($state === 2) {
$flags |= Token::FLAG_NUMBER_HEX;
if (!($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9' || $this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F' || $this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f')) {
break;
}
} elseif ($state === 3) {
if ($this->str[$this->last] === '.') {
$state = 4;
} elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
$state = 5;
} elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
// Just digits and `.`, `e` and `E` are valid characters.
break;
}
} elseif ($state === 4) {
$flags |= Token::FLAG_NUMBER_FLOAT;
if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
$state = 5;
} elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
// Just digits, `e` and `E` are valid characters.
break;
}
} elseif ($state === 5) {
$flags |= Token::FLAG_NUMBER_APPROXIMATE;
if ($this->str[$this->last] === '+' || $this->str[$this->last] === '-' || $this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
$state = 6;
} else {
break;
}
} elseif ($state === 6) {
if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
// Just digits are valid characters.
break;
}
} elseif ($state === 7) {
$flags |= Token::FLAG_NUMBER_BINARY;
if ($this->str[$this->last] === '\'') {
$state = 8;
} else {
break;
}
} elseif ($state === 8) {
if ($this->str[$this->last] === '\'') {
$state = 9;
} elseif ($this->str[$this->last] !== '0' && $this->str[$this->last] !== '1') {
break;
}
} elseif ($state === 9) {
break;
}
$token .= $this->str[$this->last];
}
if ($state === 2 || $state === 3 || $token !== '.' && $state === 4 || $state === 6 || $state === 9) {
--$this->last;
return new Token($token, Token::TYPE_NUMBER, $flags);
}
$this->last = $iBak;
return null;
}