Fukuball\Jieba\Jieba::__cutDAG PHP Метод

__cutDAG() публичный статический Метод

Static method __cutDAG
public static __cutDAG ( string $sentence, array $options = [] ) : array
$sentence string # input sentence
$options array # other options
Результат array $words
    public static function __cutDAG($sentence, $options = array())
    {
        $defaults = array('mode' => 'default');
        $options = array_merge($defaults, $options);
        $words = array();
        $N = mb_strlen($sentence, 'UTF-8');
        $DAG = self::getDAG($sentence);
        self::calc($sentence, $DAG);
        $x = 0;
        $buf = '';
        while ($x < $N) {
            $current_route_keys = array_keys(self::$route[$x]);
            $y = $current_route_keys[0] + 1;
            $l_word = mb_substr($sentence, $x, $y - $x, 'UTF-8');
            if ($y - $x == 1) {
                $buf = $buf . $l_word;
            } else {
                if (mb_strlen($buf, 'UTF-8') > 0) {
                    if (mb_strlen($buf, 'UTF-8') == 1) {
                        array_push($words, $buf);
                        $buf = '';
                    } else {
                        $regognized = Finalseg::cut($buf);
                        foreach ($regognized as $key => $word) {
                            array_push($words, $word);
                        }
                        $buf = '';
                    }
                }
                array_push($words, $l_word);
            }
            $x = $y;
        }
        if (mb_strlen($buf, 'UTF-8') > 0) {
            if (mb_strlen($buf, 'UTF-8') == 1) {
                array_push($words, $buf);
            } else {
                $regognized = Finalseg::cut($buf);
                foreach ($regognized as $key => $word) {
                    array_push($words, $word);
                }
            }
        }
        return $words;
    }

Usage Example

Пример #1
0
 /**
  * Static method cut
  *
  * @param string  $sentence # input sentence
  * @param boolean $cut_all  # cut_all or not
  * @param array   $options  # other options
  *
  * @return array $seg_list
  */
 public static function cut($sentence, $cut_all = false, $options = array())
 {
     $defaults = array('mode' => 'default');
     $options = array_merge($defaults, $options);
     $seg_list = array();
     $re_han_pattern = '([\\x{4E00}-\\x{9FA5}]+)';
     $re_skip_pattern = '([a-zA-Z0-9+#\\n]+)';
     preg_match_all('/(' . $re_han_pattern . '|' . $re_skip_pattern . ')/u', $sentence, $matches, PREG_PATTERN_ORDER);
     $blocks = $matches[0];
     foreach ($blocks as $blk) {
         if (preg_match('/' . $re_han_pattern . '/u', $blk)) {
             if ($cut_all) {
                 $words = Jieba::__cutAll($blk);
             } else {
                 $words = Jieba::__cutDAG($blk);
             }
             foreach ($words as $word) {
                 array_push($seg_list, $word);
             }
         } else {
             array_push($seg_list, $blk);
         }
         // end else (preg_match('/'.$re_han_pattern.'/u', $blk))
     }
     // end foreach ($blocks as $blk)
     return $seg_list;
 }