Fukuball\Jieba\Posseg::viterbi PHP Method

viterbi() public static method

Static method viterbi
public static viterbi ( string $sentence, array $options = [] ) : array
$sentence string # input sentence
$options array # other options
return array $viterbi
    public static function viterbi($sentence, $options = array())
    {
        $defaults = array('mode' => 'default');
        $options = array_merge($defaults, $options);
        $obs = $sentence;
        $states = self::$char_state;
        $V = array();
        $V[0] = array();
        $mem_path = array();
        $mem_path[0] = array();
        $all_states = array_keys(self::$prob_trans);
        $c = mb_substr($obs, 0, 1, 'UTF-8');
        if (isset($states[$c]) && !empty($states[$c])) {
            $c_states = $states[$c];
        } else {
            $c_states = $all_states;
        }
        foreach ($c_states as $key => $state) {
            $y = $state;
            $c = mb_substr($obs, 0, 1, 'UTF-8');
            $prob_emit = 0.0;
            if (isset(self::$prob_emit[$y][$c])) {
                $prob_emit = self::$prob_emit[$y][$c];
            } else {
                $prob_emit = MIN_FLOAT;
            }
            $V[0][$y] = self::$prob_start[$y] + $prob_emit;
            $mem_path[0][$y] = '';
        }
        for ($t = 1; $t < mb_strlen($obs, 'UTF-8'); $t++) {
            $c = mb_substr($obs, $t, 1, 'UTF-8');
            $V[$t] = array();
            $mem_path[$t] = array();
            $prev_states = array_keys(self::getTopStates($V[$t - 1]));
            $prev_mem_path = array_keys($mem_path[$t - 1]);
            $prev_states = array();
            foreach ($prev_mem_path as $mem_path_state) {
                if (count(self::$prob_trans[$mem_path_state]) > 0) {
                    array_push($prev_states, $mem_path_state);
                }
            }
            $prev_states_expect_next = array();
            foreach ($prev_states as $prev_state) {
                $prev_states_expect_next = array_unique(array_merge($prev_states_expect_next, array_keys(self::$prob_trans[$prev_state])));
            }
            $obs_states = array();
            if (isset($states[$c])) {
                $obs_states = $states[$c];
            } else {
                $obs_states = $all_states;
            }
            $obs_states = array_intersect($obs_states, $prev_states_expect_next);
            if (count($obs_states) == 0) {
                $obs_states = $all_states;
            }
            foreach ($obs_states as $y) {
                $temp_prob_array = array();
                foreach ($prev_states as $y0) {
                    $prob_trans = 0.0;
                    if (isset(self::$prob_trans[$y0][$y])) {
                        $prob_trans = self::$prob_trans[$y0][$y];
                    } else {
                        $prob_trans = MIN_FLOAT;
                    }
                    $prob_emit = 0.0;
                    if (isset(self::$prob_emit[$y][$c])) {
                        $prob_emit = self::$prob_emit[$y][$c];
                    } else {
                        $prob_emit = MIN_FLOAT;
                    }
                    $temp_prob_array[$y0] = $V[$t - 1][$y0] + $prob_trans + $prob_emit;
                }
                arsort($temp_prob_array);
                $max_prob = reset($temp_prob_array);
                $max_key = key($temp_prob_array);
                $V[$t][$y] = $max_prob;
                $mem_path[$t][$y] = $max_key;
            }
        }
        $last = array();
        $mem_path_end_keys = array_keys(end($mem_path));
        foreach ($mem_path_end_keys as $y) {
            $end_array = end($V);
            $last[$y] = $end_array[$y];
        }
        arsort($last);
        $return_prob = reset($last);
        $return_prob_key = key($last);
        $obs_length = mb_strlen($obs, 'UTF-8');
        $route = array();
        for ($t = 0; $t < $obs_length; $t++) {
            array_push($route, 'None');
        }
        $i = $obs_length - 1;
        while ($i >= 0) {
            $route[$i] = $return_prob_key;
            $return_prob_key = $mem_path[$i][$return_prob_key];
            $i -= 1;
        }
        return array("prob" => $return_prob, "pos_list" => $route);
    }