PicoFeed\Scraper\Scraper::execute PHP Method

execute() public method

Execute the scraper.
public execute ( $pageContent = '', $recursionDepth )
    public function execute($pageContent = '', $recursionDepth = 0)
    {
        $this->html = '';
        $this->encoding = '';
        $this->content = '';
        $this->download();
        $this->prepareHtml();
        $parser = $this->getParser();
        if ($parser !== null) {
            $maxRecursions = $this->config->getMaxRecursions();
            if (!isset($maxRecursions)) {
                $maxRecursions = 25;
            }
            $pageContent .= $parser->execute();
            // check if there is a link to next page and recursively get content (max 25 pages)
            if (($nextLink = $parser->findNextLink()) !== null && $recursionDepth < $maxRecursions) {
                $nextLink = Url::resolve($nextLink, $this->url);
                $this->setUrl($nextLink);
                $this->execute($pageContent, $recursionDepth + 1);
            } else {
                $this->content = $pageContent;
            }
            Logger::setMessage(get_called_class() . ': Content length: ' . strlen($this->content) . ' bytes');
        }
    }

Usage Example

コード例 #1
0
 /**
  * @group online
  */
 public function testGrabContentRegex()
 {
     $grabber = new Scraper(new Config());
     $grabber->setUrl('http://penny-arcade.com/comic/2015/04/13/101-part-one');
     $grabber->execute();
     $this->assertTrue($grabber->hasRelevantContent());
     $this->assertEquals('<img src="http://art.penny-arcade.com/photos/i-tBMHkzG/0/1050x10000/i-tBMHkzG-1050x10000.jpg" alt="101, Part One"/>', $grabber->getRelevantContent());
     $grabber->setUrl('http://penny-arcade.com/news/post/2015/04/15/101-part-two');
     $grabber->execute();
     $this->assertTrue($grabber->hasRelevantContent());
     $this->assertContains('101, Part Two', $grabber->getRelevantContent());
 }
All Usage Examples Of PicoFeed\Scraper\Scraper::execute