cebe\jssearch\tokenizer\StandardTokenizer::tokenize PHP Method

tokenize() public method

[['t' => 'word', 'w' => 2], ['t' => 'other', 'w' => 1]] where the first part is the token string and the second is a weight value. Also removes [[stopWords]] from the list.
public tokenize ( string $string ) : array
$string string the string to tokenize
return array
    public function tokenize($string)
    {
        $delimiters = preg_quote($this->delimiters, '/');
        return array_map(function ($token) {
            return ['t' => $token, 'w' => 1];
        }, array_filter(array_map(function ($t) {
            return mb_strtolower($t, 'UTF-8');
        }, preg_split("/[\\s{$delimiters}]+/", $string, -1, PREG_SPLIT_NO_EMPTY)), function ($word) {
            return !in_array($word, $this->stopWords);
        }));
    }
StandardTokenizer