Graby\Extractor\ContentExtractor::extractBody PHP Method

extractBody() private method

Extract body from a given CSS for a node.
private extractBody ( boolean $detectBody, string $xpathExpression, DOMNode $node, string $type ) : boolean
$detectBody boolean Do we have to detect body ?
$xpathExpression string XPath expression to extract body
$node DOMNode DOMNode to look into
$type string Format type we are looking for, only used for log message
return boolean Telling if we have to detect body again or not
    private function extractBody($detectBody, $xpathExpression, \DOMNode $node, $type)
    {
        if (false === $detectBody) {
            return false;
        }
        $elems = $this->xpath->query($xpathExpression, $node);
        if (false === $this->hasElements($elems)) {
            return $detectBody;
        }
        $this->logger->log('debug', $type . ': found "' . $elems->length . '" with ' . $xpathExpression);
        if ($elems->length === 1) {
            $this->body = $elems->item(0);
            // prune (clean up elements that may not be content)
            if ($this->siteConfig->prune()) {
                $this->logger->log('debug', 'Pruning content');
                $this->readability->prepArticle($this->body);
            }
            return false;
        }
        $this->body = $this->readability->dom->createElement('div');
        $this->logger->log('debug', '{nb} body elems found', array('nb' => $elems->length));
        $len = 0;
        foreach ($elems as $elem) {
            if (!isset($elem->parentNode)) {
                continue;
            }
            $isDescendant = false;
            foreach ($this->body->childNodes as $parent) {
                $node = $elem->parentNode;
                while ($node !== null) {
                    if ($node->isSameNode($parent)) {
                        $isDescendant = true;
                        break 2;
                    }
                    $node = $node->parentNode;
                }
            }
            if ($isDescendant) {
                $this->logger->log('debug', '...element is child of another body element, skipping.');
            } else {
                // prune (clean up elements that may not be content)
                if ($this->siteConfig->prune()) {
                    $this->logger->log('debug', '...pruning content');
                    $this->readability->prepArticle($elem);
                }
                if ($elem) {
                    ++$len;
                    $this->body->appendChild($elem);
                }
            }
        }
        $this->logger->log('debug', '...{len} elements added to body', array('len' => $len));
        return false;
    }