public function crawl($url, $rawHTML = null)
{
$article = new Article();
$parseCandidate = Helper::getCleanedUrl($url);
$xmlInternalErrors = libxml_use_internal_errors(true);
if (empty($rawHTML)) {
$rawHTML = $this->getHTML($parseCandidate->url);
}
// Generate document
$doc = $this->getDocument($rawHTML);
// Set core mutators
$article->setFinalUrl($parseCandidate->url);
$article->setDomain($parseCandidate->parts->host);
$article->setLinkhash($parseCandidate->linkhash);
$article->setRawHtml($rawHTML);
$article->setDoc($doc);
$article->setRawDoc(clone $doc);
// Pre-extraction document cleaning
$this->modules('cleaners', $article);
// Extract content
$this->modules('extractors', $article);
// Post-extraction content formatting
$this->modules('formatters', $article);
libxml_use_internal_errors($xmlInternalErrors);
return $article;
}