public static function genTrie($f_name, $options = array())
{
$defaults = array('mode' => 'default');
$options = array_merge($defaults, $options);
self::$trie = new MultiArray(file_get_contents($f_name . '.json'));
self::$trie->cache = new MultiArray(file_get_contents($f_name . '.cache.json'));
$content = fopen($f_name, "r");
while (($line = fgets($content)) !== false) {
$explode_line = explode(" ", trim($line));
$word = $explode_line[0];
$freq = $explode_line[1];
$tag = $explode_line[2];
$freq = (double) $freq;
self::$FREQ[$word] = $freq;
self::$total += $freq;
//$l = mb_strlen($word, 'UTF-8');
//$word_c = array();
//for ($i=0; $i<$l; $i++) {
// $c = mb_substr($word, $i, 1, 'UTF-8');
// array_push($word_c, $c);
//}
//$word_c_key = implode('.', $word_c);
//self::$trie->set($word_c_key, array("end"=>""));
}
fclose($content);
return self::$trie;
}
/** * Static method init * * @param array $options # other options * * @return void */ public static function init($options = array()) { $defaults = array('mode' => 'default', 'dict' => 'normal'); $options = array_merge($defaults, $options); if ($options['mode'] == 'test') { echo "Building Trie...\n"; } if ($options['dict'] == 'small') { $f_name = "dict.small.txt"; self::$dictname = "dict.small.txt"; } elseif ($options['dict'] == 'big') { $f_name = "dict.big.txt"; self::$dictname = "dict.big.txt"; } else { $f_name = "dict.txt"; self::$dictname = "dict.txt"; } $t1 = microtime(true); self::$trie = Jieba::genTrie(dirname(dirname(__FILE__)) . "/dict/" . $f_name); foreach (self::$FREQ as $key => $value) { self::$FREQ[$key] = log($value / self::$total); } self::$min_freq = min(self::$FREQ); if ($options['mode'] == 'test') { echo "loading model cost " . (microtime(true) - $t1) . " seconds.\n"; echo "Trie has been built succesfully.\n"; } }