Fukuball\Jieba\Posseg::__cutDetail PHP Method

__cutDetail() public static method

Static method __cutDetail
public static __cutDetail ( string $sentence, array $options = [] ) : array
$sentence string # input sentence
$options array # other options
return array $words
    public static function __cutDetail($sentence, $options = array())
    {
        $defaults = array('mode' => 'default');
        $options = array_merge($defaults, $options);
        $words = array();
        $re_han_pattern = '([\\x{4E00}-\\x{9FA5}]+)';
        $re_skip_pattern = '([a-zA-Z0-9+#\\r\\n]+)';
        $re_punctuation_pattern = '([\\x{ff5e}\\x{ff01}\\x{ff08}\\x{ff09}\\x{300e}' . '\\x{300c}\\x{300d}\\x{300f}\\x{3001}\\x{ff1a}\\x{ff1b}' . '\\x{ff0c}\\x{ff1f}\\x{3002}]+)';
        $re_eng_pattern = '[a-zA-Z+#]+';
        $re_num_pattern = '[0-9]+';
        preg_match_all('/(' . $re_han_pattern . '|' . $re_skip_pattern . '|' . $re_punctuation_pattern . ')/u', $sentence, $matches, PREG_PATTERN_ORDER);
        $blocks = $matches[0];
        foreach ($blocks as $blk) {
            if (preg_match('/' . $re_han_pattern . '/u', $blk)) {
                $blk_words = self::__cut($blk);
                foreach ($blk_words as $blk_word) {
                    array_push($words, $blk_word);
                }
            } elseif (preg_match('/' . $re_skip_pattern . '/u', $blk)) {
                if (preg_match('/' . $re_num_pattern . '/u', $blk)) {
                    array_push($words, array("word" => $blk, "tag" => "m"));
                } elseif (preg_match('/' . $re_eng_pattern . '/u', $blk)) {
                    array_push($words, array("word" => $blk, "tag" => "eng"));
                }
            } elseif (preg_match('/' . $re_punctuation_pattern . '/u', $blk)) {
                array_push($words, array("word" => $blk, "tag" => "w"));
            }
        }
        return $words;
    }