function tokenize($str, &$tokens, $o = 0)
{
$length = strlen($str);
$pending = new Rule\PendingState();
while ($o < $length) {
/* Absorb white-space */
if (preg_match('/\\G\\s+/', $str, $match, 0, $o)) {
$o += strlen($match[0]);
} elseif (preg_match('/\\G(\\w*):/', $str, $match, 0, $o)) {
$pending->set('tag', isset($match[1]) ? $match[1] : '');
$o += strlen($match[0]);
} elseif (preg_match('/\\G[\\w-]+/', $str, $match, 0, $o)) {
$tokens[] = $t = new Token\Recurse($match[0]);
$pending->apply_if_present($t);
$o += strlen($match[0]);
} elseif (preg_match('/\\G"[^"]*"/', $str, $match, 0, $o)) {
$tokens[] = $t = new Token\Literal($match[0]);
$pending->apply_if_present($t);
$o += strlen($match[0]);
} elseif (preg_match("/\\G'[^']*'/", $str, $match, 0, $o)) {
$tokens[] = $t = new Token\Literal($match[0]);
$pending->apply_if_present($t);
$o += strlen($match[0]);
} elseif (preg_match(self::$rx_rx, $str, $match, 0, $o)) {
$tokens[] = $t = new Token\Regex($match[0]);
$pending->apply_if_present($t);
$o += strlen($match[0]);
} elseif (preg_match('/\\G\\$(\\w+)/', $str, $match, 0, $o)) {
$tokens[] = $t = new Token\ExpressionedRecurse($match[1]);
$pending->apply_if_present($t);
$o += strlen($match[0]);
} elseif (preg_match('/\\G\\@(\\w+)/', $str, $match, 0, $o)) {
$l = count($tokens) - 1;
$o += strlen($match[0]);
user_error("TODO: Flags not currently supported", E_USER_WARNING);
} else {
$c = substr($str, $o, 1);
$l = count($tokens) - 1;
$o += 1;
switch ($c) {
case '?':
$tokens[$l]->quantifier = array('min' => 0, 'max' => 1);
break;
case '*':
$tokens[$l]->quantifier = array('min' => 0, 'max' => null);
break;
case '+':
$tokens[$l]->quantifier = array('min' => 1, 'max' => null);
break;
case '{':
if (preg_match('/\\G\\{([0-9]+)(,([0-9]*))?\\}/', $str, $matches, 0, $o - 1)) {
$min = $max = (int) $matches[1];
if (isset($matches[2])) {
$max = $matches[3] ? (int) $matches[3] : null;
}
$tokens[$l]->quantifier = array('min' => $min, 'max' => $max);
$o += strlen($matches[0]) - 1;
} else {
throw new \Exception(sprintf("Unknown quantifier: %s", substr($str, $o, 10)));
}
break;
case '&':
$pending->set('positive_lookahead');
break;
case '!':
$pending->set('negative_lookahead');
break;
case '.':
$pending->set('silent');
break;
case '[':
case ']':
$tokens[] = new Token\Whitespace(FALSE);
break;
case '<':
case '>':
$tokens[] = new Token\Whitespace(TRUE);
break;
case '(':
$subtokens = array();
$o = $this->tokenize($str, $subtokens, $o);
$tokens[] = $t = new Token\Sequence($subtokens);
$pending->apply_if_present($t);
break;
case ')':
return $o;
case '|':
$option1 = $tokens;
$option2 = array();
$o = $this->tokenize($str, $option2, $o);
$option1 = count($option1) == 1 ? $option1[0] : new Token\Sequence($option1);
$option2 = count($option2) == 1 ? $option2[0] : new Token\Sequence($option2);
$pending->apply_if_present($option2);
$tokens = array(new Token\Option($option1, $option2));
return $o;
default:
user_error("Can't parser {$c} - attempting to skip", E_USER_WARNING);
}
}
}
return $o;
}