public static function ParseKeywords(array &$p_keywordsHash, $p_kwd, $p_isHTML = true)
{
// table of characters that may be part of words (marked by 1)
static $t = array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
// characters that may be part of words but can not make works by themselves
static $nonWordForming = array(36, 39, 45, 64, 95);
if (empty($p_kwd)) {
return false;
}
$inHTMLTag = 0;
$kwd_l = strlen($p_kwd);
$q = $p_kwd;
$x = 0;
$l = 0;
while ($x < $kwd_l) {
$w_l = 0;
$charCount = 0;
$validWord = false;
$splitPoints = array(0);
while ($x < $kwd_l && $t[ord($q[$x])]) {
$char = $q[$x];
if ($p_isHTML) {
self::SetHtmlTag($inHTMLTag, $char);
}
if ($inHTMLTag) {
// do not process HTML tags
$x++;
$w_l++;
continue;
}
$charOrd = ord($char);
// increment the letter count if an ASCII character
// or start of UTF-8 sequence
if ($charOrd < 128 || $charOrd >= 194 && $charOrd <= 223 || $charOrd >= 224 && $charOrd <= 239 || $charOrd >= 240 && $charOrd <= 244) {
$charCount++;
}
if (array_search(ord($char), $nonWordForming) === false && (ord($char) < 128 || ord($char) > 191)) {
$validWord = true;
}
if (array_search(ord($char), $nonWordForming) !== false) {
$splitPoints[] = $w_l;
}
$x++;
$w_l++;
}
if ($w_l > 1 && $validWord) {
if ($inHTMLTag) {
continue;
}
$splitPoints[] = $w_l;
$word = substr($q, $l, $w_l);
if (is_numeric($word) && $word < 100) {
continue;
}
if ($charCount > 1) {
self::AddKeyword($p_keywordsHash, $word);
} else {
continue;
}
for ($i = 0; $i < count($splitPoints) - 1; $i++) {
$splitStart = $i == 0 ? $splitPoints[0] : $splitPoints[$i] + 1;
$partLen = $splitPoints[$i + 1] - $splitStart;
if ($partLen > 1) {
self::AddKeyword($p_keywordsHash, substr($word, $splitStart, $partLen));
}
}
} else {
$l = $x;
while ($l < $kwd_l && !$t[ord(substr($q, $l, 1))]) {
if ($p_isHTML) {
self::SetHtmlTag($inHTMLTag, $q[$l]);
}
$l++;
}
$x = $l;
}
}
}