public static function RunIndexer($p_timeLimit = null, $p_articlesLimit = null, $p_lastModifiedFirst = true)
{
global $g_ado_db;
$startTime = microtime(true);
$rowsLimit = 0;
if (!is_null($p_timeLimit)) {
$rowsLimit = (int) $p_timeLimit * 5;
}
if (!is_null($p_articlesLimit)) {
$rowsLimit = $rowsLimit > 0 ? min($rowsLimit, $p_articlesLimit) : $p_articlesLimit;
}
$lockFile = fopen(__DIR__ . '/../cache/newscoop-indexer.lock', "w+");
if ($lockFile === false) {
return new PEAR_Error("Unable to create single process lock control!");
}
if (!flock($lockFile, LOCK_EX | LOCK_NB)) {
// do an exclusive lock
return new PEAR_Error("Another indexer process is already running!");
}
try {
if ($p_lastModifiedFirst) {
$order = 'time_updated DESC';
} else {
$order = 'Number ASC';
}
$limit = $rowsLimit > 0 ? "LIMIT 0, {$rowsLimit}" : null;
// selects articles not yet indexed
$sql_query = 'SELECT art.IdPublication, art.NrIssue, art.NrSection, art.Number, ' . "art.IdLanguage, art.Type, art.Keywords, art.Name \n" . "FROM Articles as art \n" . "WHERE art.IsIndexed = 'N' ORDER BY {$order} {$limit}";
$sql_result = $g_ado_db->GetAll($sql_query);
if ($sql_result === false) {
throw new Exception('Error selecting articles not yet indexed');
}
$sql = "SELECT COUNT(*) FROM Articles WHERE IsIndexed = 'N'";
$total_art = $g_ado_db->GetOne($sql);
$nr_art = 0;
$nr_new = 0;
$nr_word = 0;
$word_cache_hits = 0;
$articleWordsBatch = array();
$wordInsertQueries = 0;
$existing_words = array();
foreach ($sql_result as $row) {
$sql = "SELECT GROUP_CONCAT(CONCAT_WS(' ', first_name, last_name) SEPARATOR ', ')" . "FROM Authors AS au, ArticleAuthors AS aa " . "WHERE au.id = aa.fk_author_id AND aa.fk_article_number = " . (int) $row['Number'] . " AND aa.fk_language_id = " . (int) $row['IdLanguage'];
$article['AuthorName'] = $g_ado_db->GetOne($sql);
$article['IdPublication'] = $row['IdPublication'] ? (int) $row['IdPublication'] : 0;
$article['NrIssue'] = $row['NrIssue'] ? (int) $row['NrIssue'] : 0;
$article['NrSection'] = $row['NrSection'] ? (int) $row['NrSection'] : 0;
$article['Number'] = $row['Number'] ? (int) $row['Number'] : 0;
$article['IdLanguage'] = $row['IdLanguage'] ? (int) $row['IdLanguage'] : 0;
$article['Type'] = $row['Type'] ? $row['Type'] : '';
$article['Keywords'] = $row['Keywords'] ? $row['Keywords'] : '';
$article['Name'] = $row['Name'] ? $row['Name'] : '';
// deletes from index
$sql_query = 'DELETE FROM ArticleIndex ' . 'WHERE IdPublication = ' . $article['IdPublication'] . ' AND IdLanguage = ' . $article['IdLanguage'] . ' AND NrIssue = ' . $article['NrIssue'] . ' AND NrSection = ' . $article['NrSection'] . ' AND NrArticle = ' . $article['Number'];
if (!$g_ado_db->Execute($sql_query)) {
throw new Exception('Error deleting the old article index');
}
$nr_art++;
$keywordsHash = array();
self::BuildKeywordsList($article, $keywordsHash);
foreach ($keywordsHash as $keyword => $isSet) {
if (empty($keyword)) {
continue;
}
$nr_word++;
if (isset($existing_words[$keyword])) {
$kwd_id = $existing_words[$keyword];
$word_cache_hits++;
} else {
$sql_query = 'SELECT Id FROM KeywordIndex ' . 'WHERE Keyword = ' . $g_ado_db->escape($keyword);
$kwd_id = 0 + $g_ado_db->GetOne($sql_query);
$existing_words[$keyword] = $kwd_id;
}
if ($kwd_id == 0) {
$sql_query = 'SELECT MAX(Id) AS Id FROM KeywordIndex';
$last_kwd_id = 0 + $g_ado_db->GetOne($sql_query);
$kwd_id = $last_kwd_id + 1;
// inserts in keyword list
$sql_query = 'INSERT IGNORE INTO KeywordIndex ' . 'SET Keyword = ' . $g_ado_db->escape($keyword) . ', ' . "Id = {$kwd_id}";
if (!$g_ado_db->Execute($sql_query)) {
throw new Exception('Error adding keyword');
}
$existing_words[$keyword] = $kwd_id;
$nr_new++;
}
if (!self::BatchAddArticleWord($articleWordsBatch, $article, $kwd_id, $wordInsertQueries)) {
throw new Exception('Error adding article to index');
}
}
self::RunArticleWordBatch($articleWordsBatch, $wordInsertQueries);
unset($article['Name']);
unset($article['Keywords']);
unset($article['Type']);
$sql_query = "UPDATE Articles SET IsIndexed = 'Y' " . 'WHERE IdPublication = ' . $article['IdPublication'] . ' AND NrIssue = ' . $article['NrIssue'] . ' AND NrSection = ' . $article['NrSection'] . ' AND Number = ' . $article['Number'] . ' AND IdLanguage = ' . $article['IdLanguage'];
if (!$g_ado_db->Execute($sql_query)) {
throw new Exception('Error updating the article');
}
if ($p_articlesLimit > 0 && $nr_art >= $p_articlesLimit) {
break;
}
$runTime = microtime(true) - $startTime;
$articleTime = $runTime / $nr_art;
if ($p_timeLimit > 0 && $runTime >= $p_timeLimit - $articleTime) {
break;
}
}
} catch (Exception $ex) {
flock($lockFile, LOCK_UN);
// release the lock
return new PEAR_Error($ex->getMessage() . ': ' . $g_ado_db->ErrorMsg());
}
flock($lockFile, LOCK_UN);
// release the lock
$totalTime = microtime(true) - $startTime;
$articleTime = $nr_art > 0 ? $totalTime / $nr_art : 0;
return array('articles' => $nr_art, 'words' => $nr_word, 'new words' => $nr_new, 'total articles' => $total_art, 'total time' => $totalTime, 'article time' => $articleTime, 'word cache hits' => $word_cache_hits, 'word insert queries' => $wordInsertQueries);
}