private extractBody ( boolean $detectBody, string $xpathExpression, DOMNode $node, string $type ) : boolean | ||
$detectBody | boolean | Do we have to detect body ? |
$xpathExpression | string | XPath expression to extract body |
$node | DOMNode | DOMNode to look into |
$type | string | Format type we are looking for, only used for log message |
return | boolean | Telling if we have to detect body again or not |
private function extractBody($detectBody, $xpathExpression, \DOMNode $node, $type)
{
if (false === $detectBody) {
return false;
}
$elems = $this->xpath->query($xpathExpression, $node);
if (false === $this->hasElements($elems)) {
return $detectBody;
}
$this->logger->log('debug', $type . ': found "' . $elems->length . '" with ' . $xpathExpression);
if ($elems->length === 1) {
$this->body = $elems->item(0);
// prune (clean up elements that may not be content)
if ($this->siteConfig->prune()) {
$this->logger->log('debug', 'Pruning content');
$this->readability->prepArticle($this->body);
}
return false;
}
$this->body = $this->readability->dom->createElement('div');
$this->logger->log('debug', '{nb} body elems found', array('nb' => $elems->length));
$len = 0;
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) {
continue;
}
$isDescendant = false;
foreach ($this->body->childNodes as $parent) {
$node = $elem->parentNode;
while ($node !== null) {
if ($node->isSameNode($parent)) {
$isDescendant = true;
break 2;
}
$node = $node->parentNode;
}
}
if ($isDescendant) {
$this->logger->log('debug', '...element is child of another body element, skipping.');
} else {
// prune (clean up elements that may not be content)
if ($this->siteConfig->prune()) {
$this->logger->log('debug', '...pruning content');
$this->readability->prepArticle($elem);
}
if ($elem) {
++$len;
$this->body->appendChild($elem);
}
}
}
$this->logger->log('debug', '...{len} elements added to body', array('len' => $len));
return false;
}