Fukuball\Jieba\Finalseg::__cut PHP Method

__cut() public static method

Static method __cut
public static __cut ( string $sentence, array $options = [] ) : array
$sentence string # input sentence
$options array # other options
return array $words
    public static function __cut($sentence, $options = array())
    {
        $defaults = array('mode' => 'default');
        $options = array_merge($defaults, $options);
        $words = array();
        $viterbi_array = self::viterbi($sentence);
        $prob = $viterbi_array['prob'];
        $pos_list = $viterbi_array['pos_list'];
        $begin = 0;
        $next = 0;
        $len = mb_strlen($sentence, 'UTF-8');
        for ($i = 0; $i < $len; $i++) {
            $char = mb_substr($sentence, $i, 1, 'UTF-8');
            $pos = $pos_list[$i];
            if ($pos == 'B') {
                $begin = $i;
            } elseif ($pos == 'E') {
                array_push($words, mb_substr($sentence, $begin, $i + 1 - $begin, 'UTF-8'));
                $next = $i + 1;
            } elseif ($pos == 'S') {
                array_push($words, $char);
                $next = $i + 1;
            }
        }
        if ($next < $len) {
            array_push($words, mb_substr($sentence, $next, null, 'UTF-8'));
        }
        return $words;
    }

Usage Example

Beispiel #1
0
 /**
  * Static method __cutDAG
  *
  * @param string $sentence # input sentence
  * @param array  $options  # other options
  *
  * @return array $words
  */
 public static function __cutDAG($sentence, $options = array())
 {
     $defaults = array('mode' => 'default');
     $options = array_merge($defaults, $options);
     $words = array();
     $N = mb_strlen($sentence, 'UTF-8');
     $DAG = self::getDAG($sentence);
     self::calc($sentence, $DAG);
     $x = 0;
     $buf = '';
     while ($x < $N) {
         $current_route_keys = array_keys(self::$route[$x]);
         $y = $current_route_keys[0] + 1;
         $l_word = mb_substr($sentence, $x, $y - $x, 'UTF-8');
         if ($y - $x == 1) {
             $buf = $buf . $l_word;
         } else {
             if (mb_strlen($buf, 'UTF-8') > 0) {
                 if (mb_strlen($buf, 'UTF-8') == 1) {
                     array_push($words, $buf);
                     $buf = '';
                 } else {
                     $regognized = Finalseg::__cut($buf);
                     foreach ($regognized as $key => $word) {
                         array_push($words, $word);
                     }
                     $buf = '';
                 }
             }
             array_push($words, $l_word);
         }
         $x = $y;
     }
     if (mb_strlen($buf, 'UTF-8') > 0) {
         if (mb_strlen($buf, 'UTF-8') == 1) {
             array_push($words, $buf);
         } else {
             $regognized = Finalseg::__cut($buf);
             foreach ($regognized as $key => $word) {
                 array_push($words, $word);
             }
         }
     }
     return $words;
 }