Graby\SiteConfig\ConfigBuilder::buildFromUrl PHP Method

buildFromUrl() public method

Use buildForHost if you already have the host.
public buildFromUrl ( string $url, boolean $addToCache = true ) : SiteConfig
$url string
$addToCache boolean
return SiteConfig
    public function buildFromUrl($url, $addToCache = true)
    {
        // extract host name
        $host = parse_url($url, PHP_URL_HOST);
        return $this->buildForHost($host, $addToCache);
    }

Usage Example

コード例 #1
0
ファイル: Graby.php プロジェクト: j0k3r/graby
 /**
  * returns single page response, or false if not found.
  *
  * @param string $html
  * @param string $url
  *
  * @return false|array From httpClient fetch
  */
 private function getSinglePage($html, $url)
 {
     $this->logger->log('debug', 'Looking for site config files to see if single page link exists');
     $siteConfig = $this->configBuilder->buildFromUrl($url);
     // no single page found?
     if (empty($siteConfig->single_page_link)) {
         $this->logger->log('debug', 'No "single_page_link" config found');
         return false;
     }
     // Build DOM tree from HTML
     $readability = new Readability($html, $url);
     $xpath = new \DOMXPath($readability->dom);
     // Loop through single_page_link xpath expressions
     $singlePageUrl = null;
     foreach ($siteConfig->single_page_link as $pattern) {
         $elems = $xpath->evaluate($pattern, $readability->dom);
         if (is_string($elems)) {
             $singlePageUrl = trim($elems);
             break;
         } elseif ($elems instanceof \DOMNodeList && $elems->length > 0) {
             foreach ($elems as $item) {
                 if ($item instanceof \DOMElement && $item->hasAttribute('href')) {
                     $singlePageUrl = $item->getAttribute('href');
                     break 2;
                 } elseif ($item instanceof \DOMAttr && $item->value) {
                     $singlePageUrl = $item->value;
                     break 2;
                 }
             }
         }
     }
     if (!$singlePageUrl) {
         $this->logger->log('debug', 'No url found');
         return false;
     }
     // try to resolve against $url
     $singlePageUrl = $this->makeAbsoluteStr($url, $singlePageUrl);
     // check it's not what we have already!
     if (false !== $singlePageUrl && $singlePageUrl != $url) {
         // it's not, so let's try to fetch it...
         $response = $this->httpClient->fetch($singlePageUrl, false, $siteConfig->http_header);
         if ($response['status'] < 300) {
             $this->logger->log('debug', 'Single page content found with url', ['url' => $singlePageUrl]);
             return $response;
         }
     }
     $this->logger->log('debug', 'No content found with url', ['url' => $singlePageUrl]);
     return false;
 }