public function get_words($text) { $words = preg_split('/(?:(^\\p{P}+)|(\\p{P}*\\s+\\p{P}*)|(\\p{P}+$))/', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); return array_values(array_filter(array_map('trim', $words))); }
public function get_words($text) { $detect = $this->getClassifier(); $stopwords = $this->getStopwords(); $lang = 'french'; if (!is_string($lang)) { throw new \RuntimeException("Cannot detect the language of the text"); } if (empty($stopwords[$lang])) { throw new \RuntimeException("We dont have an stop word for {$lang}, please add it in " . __DIR__ . "/Stopword/{$lang}-stopwords.txt and run generate.php"); } $this->stopword = $stopwords[$lang]; $this->common_words = []; $common_words = $this->normalize_keywords(file(__DIR__ . '/Stopword/common-french.txt', FILE_IGNORE_NEW_LINES)); foreach ($common_words as $cw) { $this->common_words[$cw] = 1; } $this->lang = $lang; return parent::get_words($text); }