/**
* returns single page response, or false if not found.
*
* @param string $html
* @param string $url
*
* @return false|array From httpClient fetch
*/
private function getSinglePage($html, $url)
{
$this->logger->log('debug', 'Looking for site config files to see if single page link exists');
$siteConfig = $this->configBuilder->buildFromUrl($url);
// no single page found?
if (empty($siteConfig->single_page_link)) {
$this->logger->log('debug', 'No "single_page_link" config found');
return false;
}
// Build DOM tree from HTML
$readability = new Readability($html, $url);
$xpath = new \DOMXPath($readability->dom);
// Loop through single_page_link xpath expressions
$singlePageUrl = null;
foreach ($siteConfig->single_page_link as $pattern) {
$elems = $xpath->evaluate($pattern, $readability->dom);
if (is_string($elems)) {
$singlePageUrl = trim($elems);
break;
} elseif ($elems instanceof \DOMNodeList && $elems->length > 0) {
foreach ($elems as $item) {
if ($item instanceof \DOMElement && $item->hasAttribute('href')) {
$singlePageUrl = $item->getAttribute('href');
break 2;
} elseif ($item instanceof \DOMAttr && $item->value) {
$singlePageUrl = $item->value;
break 2;
}
}
}
}
if (!$singlePageUrl) {
$this->logger->log('debug', 'No url found');
return false;
}
// try to resolve against $url
$singlePageUrl = $this->makeAbsoluteStr($url, $singlePageUrl);
// check it's not what we have already!
if (false !== $singlePageUrl && $singlePageUrl != $url) {
// it's not, so let's try to fetch it...
$response = $this->httpClient->fetch($singlePageUrl, false, $siteConfig->http_header);
if ($response['status'] < 300) {
$this->logger->log('debug', 'Single page content found with url', ['url' => $singlePageUrl]);
return $response;
}
}
$this->logger->log('debug', 'No content found with url', ['url' => $singlePageUrl]);
return false;
}