public function split_keywords($keywords, $terms)
{
$tokens = '+-|()*';
$keywords = trim($this->cleanup($keywords, $tokens));
// allow word|word|word without brackets
if (strpos($keywords, ' ') === false && strpos($keywords, '|') !== false && strpos($keywords, '(') === false) {
$keywords = '(' . $keywords . ')';
}
$open_bracket = $space = false;
for ($i = 0, $n = strlen($keywords); $i < $n; $i++) {
if ($open_bracket !== false) {
switch ($keywords[$i]) {
case ')':
if ($open_bracket + 1 == $i) {
$keywords[$i - 1] = '|';
$keywords[$i] = '|';
}
$open_bracket = false;
break;
case '(':
$keywords[$i] = '|';
break;
case '+':
case '-':
case ' ':
$keywords[$i] = '|';
break;
case '*':
if ($i === 0 || $keywords[$i - 1] !== '*' && strcspn($keywords[$i - 1], $tokens) === 0) {
if ($i === $n - 1 || $keywords[$i + 1] !== '*' && strcspn($keywords[$i + 1], $tokens) === 0) {
$keywords = substr($keywords, 0, $i) . substr($keywords, $i + 1);
}
}
break;
}
} else {
switch ($keywords[$i]) {
case ')':
$keywords[$i] = ' ';
break;
case '(':
$open_bracket = $i;
$space = false;
break;
case '|':
$keywords[$i] = ' ';
break;
case '-':
case '+':
$space = $keywords[$i];
break;
case ' ':
if ($space !== false) {
$keywords[$i] = $space;
}
break;
default:
$space = false;
}
}
}
if ($open_bracket) {
$keywords .= ')';
}
$match = array('# +#', '#\\|\\|+#', '#(\\+|\\-)(?:\\+|\\-)+#', '#\\(\\|#', '#\\|\\)#');
$replace = array(' ', '|', '$1', '(', ')');
$keywords = preg_replace($match, $replace, $keywords);
$num_keywords = sizeof(explode(' ', $keywords));
// We limit the number of allowed keywords to minimize load on the database
if ($this->config['max_num_search_keywords'] && $num_keywords > $this->config['max_num_search_keywords']) {
trigger_error($this->user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', (int) $this->config['max_num_search_keywords'], $num_keywords));
}
// $keywords input format: each word separated by a space, words in a bracket are not separated
// the user wants to search for any word, convert the search query
if ($terms == 'any') {
$words = array();
preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words);
if (sizeof($words[1])) {
$keywords = '(' . implode('|', $words[1]) . ')';
}
}
// set the search_query which is shown to the user
$this->search_query = $keywords;
$exact_words = array();
preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words);
$exact_words = $exact_words[1];
$common_ids = $words = array();
if (sizeof($exact_words)) {
$sql = 'SELECT word_id, word_text, word_common
FROM ' . SEARCH_WORDLIST_TABLE . '
WHERE ' . $this->db->sql_in_set('word_text', $exact_words) . '
ORDER BY word_count ASC';
$result = $this->db->sql_query($sql);
// store an array of words and ids, remove common words
while ($row = $this->db->sql_fetchrow($result)) {
if ($row['word_common']) {
$this->common_words[] = $row['word_text'];
$common_ids[$row['word_text']] = (int) $row['word_id'];
continue;
}
$words[$row['word_text']] = (int) $row['word_id'];
}
$this->db->sql_freeresult($result);
}
// Handle +, - without preceeding whitespace character
$match = array('#(\\S)\\+#', '#(\\S)-#');
$replace = array('$1 +', '$1 +');
$keywords = preg_replace($match, $replace, $keywords);
// now analyse the search query, first split it using the spaces
$query = explode(' ', $keywords);
$this->must_contain_ids = array();
$this->must_not_contain_ids = array();
$this->must_exclude_one_ids = array();
$mode = '';
$ignore_no_id = true;
foreach ($query as $word) {
if (empty($word)) {
continue;
}
// words which should not be included
if ($word[0] == '-') {
$word = substr($word, 1);
// a group of which at least one may not be in the resulting posts
if ($word[0] == '(') {
$word = array_unique(explode('|', substr($word, 1, -1)));
$mode = 'must_exclude_one';
} else {
$mode = 'must_not_contain';
}
$ignore_no_id = true;
} else {
// no prefix is the same as a +prefix
if ($word[0] == '+') {
$word = substr($word, 1);
}
// a group of words of which at least one word should be in every resulting post
if ($word[0] == '(') {
$word = array_unique(explode('|', substr($word, 1, -1)));
}
$ignore_no_id = false;
$mode = 'must_contain';
}
if (empty($word)) {
continue;
}
// if this is an array of words then retrieve an id for each
if (is_array($word)) {
$non_common_words = array();
$id_words = array();
foreach ($word as $i => $word_part) {
if (strpos($word_part, '*') !== false) {
$id_words[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
$non_common_words[] = $word_part;
} else {
if (isset($words[$word_part])) {
$id_words[] = $words[$word_part];
$non_common_words[] = $word_part;
} else {
$len = utf8_strlen($word_part);
if ($len < $this->word_length['min'] || $len > $this->word_length['max']) {
$this->common_words[] = $word_part;
}
}
}
}
if (sizeof($id_words)) {
sort($id_words);
if (sizeof($id_words) > 1) {
$this->{$mode . '_ids'}[] = $id_words;
} else {
$mode = $mode == 'must_exclude_one' ? 'must_not_contain' : $mode;
$this->{$mode . '_ids'}[] = $id_words[0];
}
} else {
if (!$ignore_no_id && sizeof($non_common_words)) {
trigger_error(sprintf($this->user->lang['WORDS_IN_NO_POST'], implode($this->user->lang['COMMA_SEPARATOR'], $non_common_words)));
}
}
unset($non_common_words);
} else {
if (($wildcard = strpos($word, '*') !== false) || isset($words[$word])) {
if ($wildcard) {
$len = utf8_strlen(str_replace('*', '', $word));
if ($len >= $this->word_length['min'] && $len <= $this->word_length['max']) {
$this->{$mode . '_ids'}[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word)) . '\'';
} else {
$this->common_words[] = $word;
}
} else {
$this->{$mode . '_ids'}[] = $words[$word];
}
} else {
if (!isset($common_ids[$word])) {
$len = utf8_strlen($word);
if ($len < $this->word_length['min'] || $len > $this->word_length['max']) {
$this->common_words[] = $word;
}
}
}
}
}
// Return true if all words are not common words
if (sizeof($exact_words) - sizeof($this->common_words) > 0) {
return true;
}
return false;
}