public static function __cutDetail($sentence, $options = array())
{
$defaults = array('mode' => 'default');
$options = array_merge($defaults, $options);
$words = array();
$re_han_pattern = '([\\x{4E00}-\\x{9FA5}]+)';
$re_skip_pattern = '([a-zA-Z0-9+#\\r\\n]+)';
$re_punctuation_pattern = '([\\x{ff5e}\\x{ff01}\\x{ff08}\\x{ff09}\\x{300e}' . '\\x{300c}\\x{300d}\\x{300f}\\x{3001}\\x{ff1a}\\x{ff1b}' . '\\x{ff0c}\\x{ff1f}\\x{3002}]+)';
$re_eng_pattern = '[a-zA-Z+#]+';
$re_num_pattern = '[0-9]+';
preg_match_all('/(' . $re_han_pattern . '|' . $re_skip_pattern . '|' . $re_punctuation_pattern . ')/u', $sentence, $matches, PREG_PATTERN_ORDER);
$blocks = $matches[0];
foreach ($blocks as $blk) {
if (preg_match('/' . $re_han_pattern . '/u', $blk)) {
$blk_words = self::__cut($blk);
foreach ($blk_words as $blk_word) {
array_push($words, $blk_word);
}
} elseif (preg_match('/' . $re_skip_pattern . '/u', $blk)) {
if (preg_match('/' . $re_num_pattern . '/u', $blk)) {
array_push($words, array("word" => $blk, "tag" => "m"));
} elseif (preg_match('/' . $re_eng_pattern . '/u', $blk)) {
array_push($words, array("word" => $blk, "tag" => "eng"));
}
} elseif (preg_match('/' . $re_punctuation_pattern . '/u', $blk)) {
array_push($words, array("word" => $blk, "tag" => "w"));
}
}
return $words;
}