Symfony\Component\DomCrawler\Crawler::addHtmlContent PHP Method

addHtmlContent() public method

The libxml errors are disabled when the content is parsed. If you want to get parsing errors, be sure to enable internal errors via libxml_use_internal_errors(true) and then, get the errors via libxml_get_errors(). Be sure to clear errors with libxml_clear_errors() afterward.
public addHtmlContent ( string $content, string $charset = 'UTF-8' )
$content string The HTML content
$charset string The charset
    public function addHtmlContent($content, $charset = 'UTF-8')
    {
        $internalErrors = libxml_use_internal_errors(true);
        $disableEntities = libxml_disable_entity_loader(true);
        $dom = new \DOMDocument('1.0', $charset);
        $dom->validateOnParse = true;
        set_error_handler(function () {
            throw new \Exception();
        });
        try {
            // Convert charset to HTML-entities to work around bugs in DOMDocument::loadHTML()
            $content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
        } catch (\Exception $e) {
        }
        restore_error_handler();
        if ('' !== trim($content)) {
            @$dom->loadHTML($content);
        }
        libxml_use_internal_errors($internalErrors);
        libxml_disable_entity_loader($disableEntities);
        $this->addDocument($dom);
        $base = $this->filterRelativeXPath('descendant-or-self::base')->extract(array('href'));
        $baseHref = current($base);
        if (count($base) && !empty($baseHref)) {
            if ($this->baseHref) {
                $linkNode = $dom->createElement('a');
                $linkNode->setAttribute('href', $baseHref);
                $link = new Link($linkNode, $this->baseHref);
                $this->baseHref = $link->getUri();
            } else {
                $this->baseHref = $baseHref;
            }
        }
    }

Usage Example

 /**
  * Process the DOM
  *
  * @return array
  * @throws Exception
  */
 public function process()
 {
     // Check if HTML content is already set
     $this->checkIfContentIsEmpty($this->html);
     $items = [];
     $total = 0;
     $prepareItems = function (Crawler $nodeCrawler, $i) use(&$items, &$total) {
         $title = $nodeCrawler->filter('h3 > a');
         $link = $nodeCrawler->filter('h3 > a')->attr('href');
         $price = $nodeCrawler->filter('p.pricePerUnit')->text();
         $descriptionPage = $this->fetch($link);
         //prepare items array
         $items[$i]['title'] = trim($title->text());
         $items[$i]['size'] = $this->sizeOf($descriptionPage);
         $items[$i]['unit_price'] = $this->format($price);
         $items[$i]['description'] = $this->getDescriptionFor($descriptionPage);
         $total += $items[$i]['unit_price'];
     };
     // bind the closure to the object context
     // so we can access the object inside the closure
     $prepareItems->bindTo($this);
     $this->domCrawler->addHtmlContent($this->html);
     $this->domCrawler->filter('ul.productLister > li')->each($prepareItems);
     $this->items = $items;
     $this->total = number_format($total, 2);
     unset($items);
     unset($total);
     return ['items' => $this->items, 'total' => $this->total];
 }
All Usage Examples Of Symfony\Component\DomCrawler\Crawler::addHtmlContent