private function ConvertHtmlToListOfWords($characterString)
{
$mode = 'character';
$current_word = '';
$words = array();
foreach ($characterString as $character) {
switch ($mode) {
case 'character':
if ($this->IsStartOfTag($character)) {
if ($current_word != '') {
$words[] = $current_word;
}
$current_word = "<";
$mode = 'tag';
} else {
if (preg_match("[^\\s]", $character) > 0) {
if ($current_word != '') {
$words[] = $current_word;
}
$current_word = $character;
$mode = 'whitespace';
} else {
if ($this->IsAlphaNum($character) && (strlen($current_word) == 0 || $this->IsAlphaNum($current_word))) {
$current_word .= $character;
} else {
$words[] = $current_word;
$current_word = $character;
}
}
}
break;
case 'tag':
if ($this->IsEndOfTag($character)) {
$current_word .= ">";
$words[] = $current_word;
$current_word = "";
if (!preg_match('[^\\s]', $character)) {
$mode = 'whitespace';
} else {
$mode = 'character';
}
} else {
$current_word .= $character;
}
break;
case 'whitespace':
if ($this->IsStartOfTag($character)) {
if ($current_word != '') {
$words[] = $current_word;
}
$current_word = "<";
$mode = 'tag';
} else {
if (preg_match("[^\\s]", $character)) {
$current_word .= $character;
} else {
if ($current_word != '') {
$words[] = $current_word;
}
$current_word = $character;
$mode = 'character';
}
}
break;
default:
break;
}
}
if ($current_word != '') {
$words[] = $current_word;
}
return $words;
}