Fukuball\Jieba\Posseg::__cutDAG PHP Method

__cutDAG() public static method

Static method __cutDAG
public static __cutDAG ( string $sentence, array $options = [] ) : array
$sentence string # input sentence
$options array # other options
return array $words
    public static function __cutDAG($sentence, $options = array())
    {
        $defaults = array('mode' => 'default');
        $options = array_merge($defaults, $options);
        $words = array();
        $N = mb_strlen($sentence, 'UTF-8');
        $DAG = Jieba::getDAG($sentence);
        Jieba::calc($sentence, $DAG);
        $x = 0;
        $buf = '';
        while ($x < $N) {
            $current_route_keys = array_keys(Jieba::$route[$x]);
            $y = $current_route_keys[0] + 1;
            $l_word = mb_substr($sentence, $x, $y - $x, 'UTF-8');
            if ($y - $x == 1) {
                $buf = $buf . $l_word;
            } else {
                if (mb_strlen($buf, 'UTF-8') > 0) {
                    if (mb_strlen($buf, 'UTF-8') == 1) {
                        if (isset(self::$word_tag[$buf])) {
                            $buf_tag = self::$word_tag[$buf];
                        } else {
                            $buf_tag = "x";
                        }
                        array_push($words, array("word" => $buf, "tag" => $buf_tag));
                        $buf = '';
                    } else {
                        $regognized = self::__cutDetail($buf);
                        foreach ($regognized as $key => $word) {
                            array_push($words, $word);
                        }
                        $buf = '';
                    }
                }
                if (isset(self::$word_tag[$l_word])) {
                    $buf_tag = self::$word_tag[$l_word];
                } else {
                    $buf_tag = "x";
                }
                array_push($words, array("word" => $l_word, "tag" => $buf_tag));
            }
            $x = $y;
        }
        if (mb_strlen($buf, 'UTF-8') > 0) {
            if (mb_strlen($buf, 'UTF-8') == 1) {
                if (isset(self::$word_tag[$buf])) {
                    $buf_tag = self::$word_tag[$buf];
                } else {
                    $buf_tag = "x";
                }
                array_push($words, array("word" => $buf, "tag" => $buf_tag));
            } else {
                $regognized = self::__cutDetail($buf);
                foreach ($regognized as $key => $word) {
                    array_push($words, $word);
                }
            }
        }
        return $words;
    }