ArticleIndex::ParseKeywords PHP Method

ParseKeywords() public static method

fn BuildKeywordsList
public static ParseKeywords ( array &$p_keywordsHash, $p_kwd, $p_isHTML = true )
$p_keywordsHash array
    public static function ParseKeywords(array &$p_keywordsHash, $p_kwd, $p_isHTML = true)
    {
        // table of characters that may be part of words (marked by 1)
        static $t = array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
        // characters that may be part of words but can not make works by themselves
        static $nonWordForming = array(36, 39, 45, 64, 95);
        if (empty($p_kwd)) {
            return false;
        }
        $inHTMLTag = 0;
        $kwd_l = strlen($p_kwd);
        $q = $p_kwd;
        $x = 0;
        $l = 0;
        while ($x < $kwd_l) {
            $w_l = 0;
            $charCount = 0;
            $validWord = false;
            $splitPoints = array(0);
            while ($x < $kwd_l && $t[ord($q[$x])]) {
                $char = $q[$x];
                if ($p_isHTML) {
                    self::SetHtmlTag($inHTMLTag, $char);
                }
                if ($inHTMLTag) {
                    // do not process HTML tags
                    $x++;
                    $w_l++;
                    continue;
                }
                $charOrd = ord($char);
                // increment the letter count if an ASCII character
                // or start of UTF-8 sequence
                if ($charOrd < 128 || $charOrd >= 194 && $charOrd <= 223 || $charOrd >= 224 && $charOrd <= 239 || $charOrd >= 240 && $charOrd <= 244) {
                    $charCount++;
                }
                if (array_search(ord($char), $nonWordForming) === false && (ord($char) < 128 || ord($char) > 191)) {
                    $validWord = true;
                }
                if (array_search(ord($char), $nonWordForming) !== false) {
                    $splitPoints[] = $w_l;
                }
                $x++;
                $w_l++;
            }
            if ($w_l > 1 && $validWord) {
                if ($inHTMLTag) {
                    continue;
                }
                $splitPoints[] = $w_l;
                $word = substr($q, $l, $w_l);
                if (is_numeric($word) && $word < 100) {
                    continue;
                }
                if ($charCount > 1) {
                    self::AddKeyword($p_keywordsHash, $word);
                } else {
                    continue;
                }
                for ($i = 0; $i < count($splitPoints) - 1; $i++) {
                    $splitStart = $i == 0 ? $splitPoints[0] : $splitPoints[$i] + 1;
                    $partLen = $splitPoints[$i + 1] - $splitStart;
                    if ($partLen > 1) {
                        self::AddKeyword($p_keywordsHash, substr($word, $splitStart, $partLen));
                    }
                }
            } else {
                $l = $x;
                while ($l < $kwd_l && !$t[ord(substr($q, $l, 1))]) {
                    if ($p_isHTML) {
                        self::SetHtmlTag($inHTMLTag, $q[$l]);
                    }
                    $l++;
                }
                $x = $l;
            }
        }
    }