PHPePub\Helpers\StringHelper::removeComments PHP Method

removeComments() public static method

public static removeComments ( $doc ) : string
$doc
return string
    public static function removeComments($doc)
    {
        $doc = preg_replace('~--\\s+>~', '-->', $doc);
        $doc = preg_replace('~<\\s*!\\s*--~', '<!--', $doc);
        $cPos = BinStringStatic::_strpos($doc, "<!--");
        if ($cPos !== false) {
            $startCount = substr_count($doc, "<!--");
            $endCount = substr_count($doc, "-->");
            $lastCPos = -1;
            while ($cPos !== false && $lastCPos != $cPos) {
                $lastCPos = $cPos;
                $lastEPos = $cPos;
                $ePos = $cPos;
                do {
                    $ePos = BinStringStatic::_strpos($doc, "-->", $ePos + 1);
                    if ($ePos !== false) {
                        $lastEPos = $ePos;
                        $comment = BinStringStatic::_substr($doc, $cPos, $lastEPos + 3 - $cPos);
                        $startCount = substr_count($comment, "<!--");
                        $endCount = substr_count($comment, "-->");
                    } elseif ($lastEPos == $cPos) {
                        $lastEPos = BinStringStatic::_strlen($doc) - 3;
                    }
                } while ($startCount != $endCount && $ePos !== false);
                $doc = substr_replace($doc, "", $cPos, $lastEPos + 3 - $cPos);
                $cPos = BinStringStatic::_strpos($doc, "<!--");
            }
        }
        // print "<pre>\n" . htmlentities($doc) . "\n</pre>\n";
        return $doc;
    }

Usage Example

Exemplo n.º 1
0
 /**
  * Process external references from a HTML to the book. The chapter itself is not stored.
  * the HTML is scanned for &lt;link..., &lt;style..., and &lt;img tags.
  * Embedded CSS styles and links will also be processed.
  * Script tags are not processed, as scripting should be avoided in e-books.
  *
  * EPub keeps track of added files, and duplicate files referenced across multiple
  *  chapters, are only added once.
  *
  * If the $doc is a string, it is assumed to be the content of an HTML file,
  *  else is it assumes to be a DOMDocument.
  *
  * Basedir is the root dir the HTML is supposed to "live" in, used to resolve
  *  relative references such as <code>&lt;img src="../images/image.png"/&gt;</code>
  *
  * $externalReferences determines how the function will handle external references.
  *
  * @param mixed  &$doc               (referenced)
  * @param int    $externalReferences How to handle external references, EPub::EXTERNAL_REF_IGNORE, EPub::EXTERNAL_REF_ADD or EPub::EXTERNAL_REF_REMOVE_IMAGES? Default is EPub::EXTERNAL_REF_ADD.
  * @param string $baseDir            Default is "", meaning it is pointing to the document root.
  * @param string $htmlDir            The path to the parent HTML file's directory from the root of the archive.
  *
  * @return bool  false if unsuccessful (book is finalized or $externalReferences == EXTERNAL_REF_IGNORE).
  */
 protected function processChapterExternalReferences(&$doc, $externalReferences = EPub::EXTERNAL_REF_ADD, $baseDir = "", $htmlDir = "")
 {
     if ($this->isFinalized || $externalReferences === EPub::EXTERNAL_REF_IGNORE) {
         return false;
     }
     $backPath = preg_replace('#[^/]+/#i', "../", $htmlDir);
     $isDocAString = is_string($doc);
     $xmlDoc = null;
     if ($isDocAString) {
         $doc = StringHelper::removeComments($doc);
         $xmlDoc = new DOMDocument();
         @$xmlDoc->loadHTML($doc);
     } else {
         $xmlDoc = $doc;
     }
     $this->processChapterStyles($xmlDoc, $externalReferences, $baseDir, $htmlDir);
     $this->processChapterLinks($xmlDoc, $externalReferences, $baseDir, $htmlDir, $backPath);
     $this->processChapterImages($xmlDoc, $externalReferences, $baseDir, $htmlDir, $backPath);
     $this->processChapterSources($xmlDoc, $externalReferences, $baseDir, $htmlDir, $backPath);
     if ($isDocAString) {
         //$html = $xmlDoc->saveXML();
         $htmlNode = $xmlDoc->getElementsByTagName("html");
         $headNode = $xmlDoc->getElementsByTagName("head");
         $bodyNode = $xmlDoc->getElementsByTagName("body");
         $htmlNS = "";
         for ($index = 0; $index < $htmlNode->item(0)->attributes->length; $index++) {
             $nodeName = $htmlNode->item(0)->attributes->item($index)->nodeName;
             $nodeValue = $htmlNode->item(0)->attributes->item($index)->nodeValue;
             if ($nodeName != "xmlns") {
                 $htmlNS .= " {$nodeName}=\"{$nodeValue}\"";
             }
         }
         $xml = new DOMDocument('1.0', "utf-8");
         $xml->lookupPrefix("http://www.w3.org/1999/xhtml");
         $xml->preserveWhiteSpace = false;
         $xml->formatOutput = true;
         $xml2Doc = new DOMDocument('1.0', "utf-8");
         $xml2Doc->lookupPrefix("http://www.w3.org/1999/xhtml");
         $xml2Doc->loadXML("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\n" . "   \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n" . "<html xmlns=\"http://www.w3.org/1999/xhtml\"" . $htmlNS . ">\n</html>\n");
         $html = $xml2Doc->getElementsByTagName("html")->item(0);
         $html->appendChild($xml2Doc->importNode($headNode->item(0), true));
         $html->appendChild($xml2Doc->importNode($bodyNode->item(0), true));
         // force pretty printing and correct formatting, should not be needed, but it is.
         $xml->loadXML($xml2Doc->saveXML());
         $doc = $xml->saveXML();
         if (!$this->isEPubVersion2()) {
             $doc = preg_replace('#^\\s*<!DOCTYPE\\ .+?>\\s*#im', '', $doc);
         }
     }
     return true;
 }