crodas\TextRank\TextRank::getAllKeywordsSorted PHP Method

getAllKeywordsSorted() public method

public getAllKeywordsSorted ( $text )
    public function getAllKeywordsSorted($text)
    {
        // split the text into words
        $words = $this->config->trigger('get_words', $text);
        // get the candidates
        $keywords = $this->config->trigger('filter_keywords', $words);
        // normalize each candidate
        $normalized = $this->config->trigger('normalize_keywords', $keywords);
        if (count($keywords) != count($normalized)) {
            throw new \RuntimeException("{normalize_keywords} event returned invalid data");
        }
        $graph = new PageRank();
        $sorted = $graph->sort(array_values($normalized), true);
        if ($sorted == $normalized) {
            // PageRank failed, probably because the input was invalid
            return [];
        }
        $top = array_slice($sorted, 0, 10);
        // build an index of words and positions (so we can collapse compount keywords)
        $index = [];
        $pindex = [];
        // search for coumpounds keywords
        $prev = [];
        $phrases = [];
        foreach ($normalized as $pos => $word) {
            if (empty($top[$word])) {
                if (count($prev) > 1 && count($prev) < 4) {
                    $phrases[] = $prev;
                }
                $prev = [];
                continue;
            }
            $prev[] = [$pos, $word];
        }
        if (count($prev) > 1 && count($prev) < 4) {
            $phrases[] = $prev;
        }
        foreach ($phrases as $prev) {
            $start = current($prev)[0];
            $end = end($prev)[0];
            $zwords = array_slice($words, $start, $end - $start + 1, true);
            if (count(array_filter($zwords, 'ctype_punct')) > 0) {
                continue;
            }
            $phrase = implode(' ', $zwords);
            $score = 0;
            foreach ($prev as $word) {
                $score += $top[$word[1]];
            }
            $sorted[trim($phrase)] = $score / ($end - $start);
        }
        // denormalize each single words
        foreach ($normalized as $pos => $word) {
            if (!empty($sorted[$word]) && $word != $words[$pos]) {
                $sorted[$words[$pos]] = $sorted[$word];
                unset($sorted[$word]);
            }
        }
        arsort($sorted);
        return $sorted;
    }