phpbb\search\fulltext_native::split_keywords PHP Method

split_keywords() public method

If $terms is 'any' then the words will be extracted from the search query and combined with | inside brackets. They will afterwards be treated like an standard search query. Then it analyses the query and fills the internal arrays $must_not_contain_ids, $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search()
public split_keywords ( string $keywords, string $terms ) : boolean
$keywords string contains the search query string as entered by the user
$terms string is either 'all' (use search query as entered, default words to 'must be contained in post') or 'any' (find all posts containing at least one of the given words)
return boolean false if no valid keywords were found and otherwise true
    public function split_keywords($keywords, $terms)
    {
        $tokens = '+-|()*';
        $keywords = trim($this->cleanup($keywords, $tokens));
        // allow word|word|word without brackets
        if (strpos($keywords, ' ') === false && strpos($keywords, '|') !== false && strpos($keywords, '(') === false) {
            $keywords = '(' . $keywords . ')';
        }
        $open_bracket = $space = false;
        for ($i = 0, $n = strlen($keywords); $i < $n; $i++) {
            if ($open_bracket !== false) {
                switch ($keywords[$i]) {
                    case ')':
                        if ($open_bracket + 1 == $i) {
                            $keywords[$i - 1] = '|';
                            $keywords[$i] = '|';
                        }
                        $open_bracket = false;
                        break;
                    case '(':
                        $keywords[$i] = '|';
                        break;
                    case '+':
                    case '-':
                    case ' ':
                        $keywords[$i] = '|';
                        break;
                    case '*':
                        if ($i === 0 || $keywords[$i - 1] !== '*' && strcspn($keywords[$i - 1], $tokens) === 0) {
                            if ($i === $n - 1 || $keywords[$i + 1] !== '*' && strcspn($keywords[$i + 1], $tokens) === 0) {
                                $keywords = substr($keywords, 0, $i) . substr($keywords, $i + 1);
                            }
                        }
                        break;
                }
            } else {
                switch ($keywords[$i]) {
                    case ')':
                        $keywords[$i] = ' ';
                        break;
                    case '(':
                        $open_bracket = $i;
                        $space = false;
                        break;
                    case '|':
                        $keywords[$i] = ' ';
                        break;
                    case '-':
                    case '+':
                        $space = $keywords[$i];
                        break;
                    case ' ':
                        if ($space !== false) {
                            $keywords[$i] = $space;
                        }
                        break;
                    default:
                        $space = false;
                }
            }
        }
        if ($open_bracket) {
            $keywords .= ')';
        }
        $match = array('#  +#', '#\\|\\|+#', '#(\\+|\\-)(?:\\+|\\-)+#', '#\\(\\|#', '#\\|\\)#');
        $replace = array(' ', '|', '$1', '(', ')');
        $keywords = preg_replace($match, $replace, $keywords);
        $num_keywords = sizeof(explode(' ', $keywords));
        // We limit the number of allowed keywords to minimize load on the database
        if ($this->config['max_num_search_keywords'] && $num_keywords > $this->config['max_num_search_keywords']) {
            trigger_error($this->user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', (int) $this->config['max_num_search_keywords'], $num_keywords));
        }
        // $keywords input format: each word separated by a space, words in a bracket are not separated
        // the user wants to search for any word, convert the search query
        if ($terms == 'any') {
            $words = array();
            preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words);
            if (sizeof($words[1])) {
                $keywords = '(' . implode('|', $words[1]) . ')';
            }
        }
        // set the search_query which is shown to the user
        $this->search_query = $keywords;
        $exact_words = array();
        preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words);
        $exact_words = $exact_words[1];
        $common_ids = $words = array();
        if (sizeof($exact_words)) {
            $sql = 'SELECT word_id, word_text, word_common
				FROM ' . SEARCH_WORDLIST_TABLE . '
				WHERE ' . $this->db->sql_in_set('word_text', $exact_words) . '
				ORDER BY word_count ASC';
            $result = $this->db->sql_query($sql);
            // store an array of words and ids, remove common words
            while ($row = $this->db->sql_fetchrow($result)) {
                if ($row['word_common']) {
                    $this->common_words[] = $row['word_text'];
                    $common_ids[$row['word_text']] = (int) $row['word_id'];
                    continue;
                }
                $words[$row['word_text']] = (int) $row['word_id'];
            }
            $this->db->sql_freeresult($result);
        }
        // Handle +, - without preceeding whitespace character
        $match = array('#(\\S)\\+#', '#(\\S)-#');
        $replace = array('$1 +', '$1 +');
        $keywords = preg_replace($match, $replace, $keywords);
        // now analyse the search query, first split it using the spaces
        $query = explode(' ', $keywords);
        $this->must_contain_ids = array();
        $this->must_not_contain_ids = array();
        $this->must_exclude_one_ids = array();
        $mode = '';
        $ignore_no_id = true;
        foreach ($query as $word) {
            if (empty($word)) {
                continue;
            }
            // words which should not be included
            if ($word[0] == '-') {
                $word = substr($word, 1);
                // a group of which at least one may not be in the resulting posts
                if ($word[0] == '(') {
                    $word = array_unique(explode('|', substr($word, 1, -1)));
                    $mode = 'must_exclude_one';
                } else {
                    $mode = 'must_not_contain';
                }
                $ignore_no_id = true;
            } else {
                // no prefix is the same as a +prefix
                if ($word[0] == '+') {
                    $word = substr($word, 1);
                }
                // a group of words of which at least one word should be in every resulting post
                if ($word[0] == '(') {
                    $word = array_unique(explode('|', substr($word, 1, -1)));
                }
                $ignore_no_id = false;
                $mode = 'must_contain';
            }
            if (empty($word)) {
                continue;
            }
            // if this is an array of words then retrieve an id for each
            if (is_array($word)) {
                $non_common_words = array();
                $id_words = array();
                foreach ($word as $i => $word_part) {
                    if (strpos($word_part, '*') !== false) {
                        $id_words[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
                        $non_common_words[] = $word_part;
                    } else {
                        if (isset($words[$word_part])) {
                            $id_words[] = $words[$word_part];
                            $non_common_words[] = $word_part;
                        } else {
                            $len = utf8_strlen($word_part);
                            if ($len < $this->word_length['min'] || $len > $this->word_length['max']) {
                                $this->common_words[] = $word_part;
                            }
                        }
                    }
                }
                if (sizeof($id_words)) {
                    sort($id_words);
                    if (sizeof($id_words) > 1) {
                        $this->{$mode . '_ids'}[] = $id_words;
                    } else {
                        $mode = $mode == 'must_exclude_one' ? 'must_not_contain' : $mode;
                        $this->{$mode . '_ids'}[] = $id_words[0];
                    }
                } else {
                    if (!$ignore_no_id && sizeof($non_common_words)) {
                        trigger_error(sprintf($this->user->lang['WORDS_IN_NO_POST'], implode($this->user->lang['COMMA_SEPARATOR'], $non_common_words)));
                    }
                }
                unset($non_common_words);
            } else {
                if (($wildcard = strpos($word, '*') !== false) || isset($words[$word])) {
                    if ($wildcard) {
                        $len = utf8_strlen(str_replace('*', '', $word));
                        if ($len >= $this->word_length['min'] && $len <= $this->word_length['max']) {
                            $this->{$mode . '_ids'}[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word)) . '\'';
                        } else {
                            $this->common_words[] = $word;
                        }
                    } else {
                        $this->{$mode . '_ids'}[] = $words[$word];
                    }
                } else {
                    if (!isset($common_ids[$word])) {
                        $len = utf8_strlen($word);
                        if ($len < $this->word_length['min'] || $len > $this->word_length['max']) {
                            $this->common_words[] = $word;
                        }
                    }
                }
            }
        }
        // Return true if all words are not common words
        if (sizeof($exact_words) - sizeof($this->common_words) > 0) {
            return true;
        }
        return false;
    }