LanguageDetector\Sort\PageRank::sort PHP Method

sort() public method

public sort ( array $ngrams )
$ngrams array
    public function sort(array $ngrams)
    {
        $_graph = $this->getGraph($ngrams);
        foreach (array('outlinks', 'graph', 'values') as $prop) {
            if (empty($_graph[$prop]) || !is_array($_graph[$prop])) {
                throw new \RuntimeException("Invalid or missing {$prop}");
            }
            ${$prop} = $_graph[$prop];
        }
        //graph would be empty if all ngrams are the same
        if (count($graph) === 0) {
            return $ngrams;
        }
        $damping = $this->damping;
        $newvals = array();
        do {
            foreach ($graph as $id => $inlinks) {
                $pr = 0;
                foreach ($inlinks as $zid) {
                    $pr += $values[$zid] / $outlinks[$zid];
                }
                $pr = (1 - $damping) * $damping * $pr;
                $newvals[$id] = $pr;
            }
            if ($this->hasConverge($values, $newvals)) {
                break;
            }
            /* update values array */
            $values = $newvals;
        } while (true);
        arsort($newvals);
        return $newvals;
    }

Usage Example

Beispiel #1
0
 public function getAllKeywordsSorted($text)
 {
     // split the text into words
     $words = $this->config->trigger('get_words', $text);
     // get the candidates
     $keywords = $this->config->trigger('filter_keywords', $words);
     // normalize each candidate
     $normalized = $this->config->trigger('normalize_keywords', $keywords);
     if (count($keywords) != count($normalized)) {
         throw new \RuntimeException("{normalize_keywords} event returned invalid data");
     }
     $graph = new PageRank();
     $sorted = $graph->sort(array_values($normalized), true);
     if ($sorted == $normalized) {
         // PageRank failed, probably because the input was invalid
         return [];
     }
     $top = array_slice($sorted, 0, 10);
     // build an index of words and positions (so we can collapse compount keywords)
     $index = [];
     $pindex = [];
     // search for coumpounds keywords
     $prev = [];
     $phrases = [];
     foreach ($normalized as $pos => $word) {
         if (empty($top[$word])) {
             if (count($prev) > 1 && count($prev) < 4) {
                 $phrases[] = $prev;
             }
             $prev = [];
             continue;
         }
         $prev[] = [$pos, $word];
     }
     if (count($prev) > 1 && count($prev) < 4) {
         $phrases[] = $prev;
     }
     foreach ($phrases as $prev) {
         $start = current($prev)[0];
         $end = end($prev)[0];
         $zwords = array_slice($words, $start, $end - $start + 1, true);
         if (count(array_filter($zwords, 'ctype_punct')) > 0) {
             continue;
         }
         $phrase = implode(' ', $zwords);
         $score = 0;
         foreach ($prev as $word) {
             $score += $top[$word[1]];
         }
         $sorted[trim($phrase)] = $score / ($end - $start);
     }
     // denormalize each single words
     foreach ($normalized as $pos => $word) {
         if (!empty($sorted[$word]) && $word != $words[$pos]) {
             $sorted[$words[$pos]] = $sorted[$word];
             unset($sorted[$word]);
         }
     }
     arsort($sorted);
     return $sorted;
 }