Pressbooks\Modules\Export\Epub\Epub201::kneadHtml PHP Method

kneadHtml() protected method

Pummel the HTML into EPUB compatible dough.
protected kneadHtml ( string $html, string $type, integer $pos ) : string
$html string
$type string front-matter, part, chapter, back-matter, ...
$pos integer (optional) position of content, used when creating filenames like: chapter-001, chapter-002, ...
return string
    protected function kneadHtml($html, $type, $pos = 0)
    {
        libxml_use_internal_errors(true);
        // Load HTML snippet into DOMDocument using UTF-8 hack
        $utf8_hack = '<?xml version="1.0" encoding="UTF-8"?>';
        $doc = new \DOMDocument();
        $doc->loadHTML($utf8_hack . $html);
        // Download images, change to relative paths
        $doc = $this->scrapeAndKneadImages($doc);
        // Download audio files, change to relative paths
        $doc = $this->scrapeAndKneadMedia($doc);
        // Deal with <a href="">, <a href=''>, and other mutations
        $doc = $this->kneadHref($doc, $type, $pos);
        // Make sure empty tags (e.g. <b></b>) don't get turned into self-closing versions by adding an empty text node to them.
        $xpath = new \DOMXPath($doc);
        while (($nodes = $xpath->query('//*[not(text() or node() or self::br or self::hr or self::img)]')) && $nodes->length > 0) {
            foreach ($nodes as $node) {
                $node->appendChild(new \DOMText(''));
            }
        }
        // Remove srcset attributes because responsive images aren't a thing in the EPUB world.
        $srcsets = $xpath->query('//img[@srcset]');
        foreach ($srcsets as $srcset) {
            $srcset->removeAttribute('srcset');
        }
        // If you are storing multi-byte characters in XML, then saving the XML using saveXML() will create problems.
        // Ie. It will spit out the characters converted in encoded format. Instead do the following:
        $html = $doc->saveXML($doc->documentElement);
        // Remove auto-created <html> <body> and <!DOCTYPE> tags.
        $html = preg_replace('/^<!DOCTYPE.+?>/', '', str_replace(array('<html>', '</html>', '<body>', '</body>'), array('', '', '', ''), $html));
        // Mobi7 hacks
        $html = $this->transformXML($utf8_hack . "<html>{$html}</html>", $this->dir . '/templates/epub201/mobi-hacks.xsl');
        $errors = libxml_get_errors();
        // TODO: Handle errors gracefully
        libxml_clear_errors();
        return $html;
    }