ArticleIndex::RunIndexer PHP Method

RunIndexer() public static method

fn OnArticleDelete
public static RunIndexer ( $p_timeLimit = null, $p_articlesLimit = null, $p_lastModifiedFirst = true )
    public static function RunIndexer($p_timeLimit = null, $p_articlesLimit = null, $p_lastModifiedFirst = true)
    {
        global $g_ado_db;
        $startTime = microtime(true);
        $rowsLimit = 0;
        if (!is_null($p_timeLimit)) {
            $rowsLimit = (int) $p_timeLimit * 5;
        }
        if (!is_null($p_articlesLimit)) {
            $rowsLimit = $rowsLimit > 0 ? min($rowsLimit, $p_articlesLimit) : $p_articlesLimit;
        }
        $lockFile = fopen(__DIR__ . '/../cache/newscoop-indexer.lock', "w+");
        if ($lockFile === false) {
            return new PEAR_Error("Unable to create single process lock control!");
        }
        if (!flock($lockFile, LOCK_EX | LOCK_NB)) {
            // do an exclusive lock
            return new PEAR_Error("Another indexer process is already running!");
        }
        try {
            if ($p_lastModifiedFirst) {
                $order = 'time_updated DESC';
            } else {
                $order = 'Number ASC';
            }
            $limit = $rowsLimit > 0 ? "LIMIT 0, {$rowsLimit}" : null;
            // selects articles not yet indexed
            $sql_query = 'SELECT art.IdPublication, art.NrIssue, art.NrSection, art.Number, ' . "art.IdLanguage, art.Type, art.Keywords, art.Name \n" . "FROM Articles as art \n" . "WHERE art.IsIndexed = 'N' ORDER BY {$order} {$limit}";
            $sql_result = $g_ado_db->GetAll($sql_query);
            if ($sql_result === false) {
                throw new Exception('Error selecting articles not yet indexed');
            }
            $sql = "SELECT COUNT(*) FROM Articles WHERE IsIndexed = 'N'";
            $total_art = $g_ado_db->GetOne($sql);
            $nr_art = 0;
            $nr_new = 0;
            $nr_word = 0;
            $word_cache_hits = 0;
            $articleWordsBatch = array();
            $wordInsertQueries = 0;
            $existing_words = array();
            foreach ($sql_result as $row) {
                $sql = "SELECT GROUP_CONCAT(CONCAT_WS(' ', first_name, last_name) SEPARATOR ', ')" . "FROM Authors AS au, ArticleAuthors AS aa " . "WHERE au.id = aa.fk_author_id AND aa.fk_article_number = " . (int) $row['Number'] . " AND aa.fk_language_id = " . (int) $row['IdLanguage'];
                $article['AuthorName'] = $g_ado_db->GetOne($sql);
                $article['IdPublication'] = $row['IdPublication'] ? (int) $row['IdPublication'] : 0;
                $article['NrIssue'] = $row['NrIssue'] ? (int) $row['NrIssue'] : 0;
                $article['NrSection'] = $row['NrSection'] ? (int) $row['NrSection'] : 0;
                $article['Number'] = $row['Number'] ? (int) $row['Number'] : 0;
                $article['IdLanguage'] = $row['IdLanguage'] ? (int) $row['IdLanguage'] : 0;
                $article['Type'] = $row['Type'] ? $row['Type'] : '';
                $article['Keywords'] = $row['Keywords'] ? $row['Keywords'] : '';
                $article['Name'] = $row['Name'] ? $row['Name'] : '';
                // deletes from index
                $sql_query = 'DELETE FROM ArticleIndex ' . 'WHERE IdPublication = ' . $article['IdPublication'] . ' AND IdLanguage = ' . $article['IdLanguage'] . ' AND NrIssue = ' . $article['NrIssue'] . ' AND NrSection = ' . $article['NrSection'] . ' AND NrArticle = ' . $article['Number'];
                if (!$g_ado_db->Execute($sql_query)) {
                    throw new Exception('Error deleting the old article index');
                }
                $nr_art++;
                $keywordsHash = array();
                self::BuildKeywordsList($article, $keywordsHash);
                foreach ($keywordsHash as $keyword => $isSet) {
                    if (empty($keyword)) {
                        continue;
                    }
                    $nr_word++;
                    if (isset($existing_words[$keyword])) {
                        $kwd_id = $existing_words[$keyword];
                        $word_cache_hits++;
                    } else {
                        $sql_query = 'SELECT Id FROM KeywordIndex ' . 'WHERE Keyword = ' . $g_ado_db->escape($keyword);
                        $kwd_id = 0 + $g_ado_db->GetOne($sql_query);
                        $existing_words[$keyword] = $kwd_id;
                    }
                    if ($kwd_id == 0) {
                        $sql_query = 'SELECT MAX(Id) AS Id FROM KeywordIndex';
                        $last_kwd_id = 0 + $g_ado_db->GetOne($sql_query);
                        $kwd_id = $last_kwd_id + 1;
                        // inserts in keyword list
                        $sql_query = 'INSERT IGNORE INTO KeywordIndex ' . 'SET Keyword = ' . $g_ado_db->escape($keyword) . ', ' . "Id = {$kwd_id}";
                        if (!$g_ado_db->Execute($sql_query)) {
                            throw new Exception('Error adding keyword');
                        }
                        $existing_words[$keyword] = $kwd_id;
                        $nr_new++;
                    }
                    if (!self::BatchAddArticleWord($articleWordsBatch, $article, $kwd_id, $wordInsertQueries)) {
                        throw new Exception('Error adding article to index');
                    }
                }
                self::RunArticleWordBatch($articleWordsBatch, $wordInsertQueries);
                unset($article['Name']);
                unset($article['Keywords']);
                unset($article['Type']);
                $sql_query = "UPDATE Articles SET IsIndexed = 'Y' " . 'WHERE IdPublication = ' . $article['IdPublication'] . ' AND NrIssue = ' . $article['NrIssue'] . ' AND NrSection = ' . $article['NrSection'] . ' AND Number = ' . $article['Number'] . ' AND IdLanguage = ' . $article['IdLanguage'];
                if (!$g_ado_db->Execute($sql_query)) {
                    throw new Exception('Error updating the article');
                }
                if ($p_articlesLimit > 0 && $nr_art >= $p_articlesLimit) {
                    break;
                }
                $runTime = microtime(true) - $startTime;
                $articleTime = $runTime / $nr_art;
                if ($p_timeLimit > 0 && $runTime >= $p_timeLimit - $articleTime) {
                    break;
                }
            }
        } catch (Exception $ex) {
            flock($lockFile, LOCK_UN);
            // release the lock
            return new PEAR_Error($ex->getMessage() . ': ' . $g_ado_db->ErrorMsg());
        }
        flock($lockFile, LOCK_UN);
        // release the lock
        $totalTime = microtime(true) - $startTime;
        $articleTime = $nr_art > 0 ? $totalTime / $nr_art : 0;
        return array('articles' => $nr_art, 'words' => $nr_word, 'new words' => $nr_new, 'total articles' => $total_art, 'total time' => $totalTime, 'article time' => $articleTime, 'word cache hits' => $word_cache_hits, 'word insert queries' => $wordInsertQueries);
    }

Usage Example

 /**
  * @see Console\Command\Command
  */
 protected function execute(Console\Input\InputInterface $input, Console\Output\OutputInterface $output)
 {
     $res = \ArticleIndex::RunIndexer($input->getOption('time-limit'));
     if ($input->getOption('verbose')) {
         $output->writeln($res['articles'] . ' out of ' . $res['total articles'] . ' articles were indexed with a total of ' . $res['words'] . ' words.');
         $output->writeln('Total index time was ' . sprintf("%.3f", $res['total time'] . ' seconds.'));
         $output->writeln('Average article index time was ' . sprintf("%.3f", $res['article time']) . ' seconds.');
     }
 }
All Usage Examples Of ArticleIndex::RunIndexer