/**
* Process external references from a HTML to the book. The chapter itself is not stored.
* the HTML is scanned for <link..., <style..., and <img tags.
* Embedded CSS styles and links will also be processed.
* Script tags are not processed, as scripting should be avoided in e-books.
*
* EPub keeps track of added files, and duplicate files referenced across multiple
* chapters, are only added once.
*
* If the $doc is a string, it is assumed to be the content of an HTML file,
* else is it assumes to be a DOMDocument.
*
* Basedir is the root dir the HTML is supposed to "live" in, used to resolve
* relative references such as <code><img src="../images/image.png"/></code>
*
* $externalReferences determines how the function will handle external references.
*
* @param mixed &$doc (referenced)
* @param int $externalReferences How to handle external references, EPub::EXTERNAL_REF_IGNORE, EPub::EXTERNAL_REF_ADD or EPub::EXTERNAL_REF_REMOVE_IMAGES? Default is EPub::EXTERNAL_REF_ADD.
* @param string $baseDir Default is "", meaning it is pointing to the document root.
* @param string $htmlDir The path to the parent HTML file's directory from the root of the archive.
*
* @return bool false if unsuccessful (book is finalized or $externalReferences == EXTERNAL_REF_IGNORE).
*/
protected function processChapterExternalReferences(&$doc, $externalReferences = EPub::EXTERNAL_REF_ADD, $baseDir = "", $htmlDir = "")
{
if ($this->isFinalized || $externalReferences === EPub::EXTERNAL_REF_IGNORE) {
return false;
}
$backPath = preg_replace('#[^/]+/#i', "../", $htmlDir);
$isDocAString = is_string($doc);
$xmlDoc = null;
if ($isDocAString) {
$doc = StringHelper::removeComments($doc);
$xmlDoc = new DOMDocument();
@$xmlDoc->loadHTML($doc);
} else {
$xmlDoc = $doc;
}
$this->processChapterStyles($xmlDoc, $externalReferences, $baseDir, $htmlDir);
$this->processChapterLinks($xmlDoc, $externalReferences, $baseDir, $htmlDir, $backPath);
$this->processChapterImages($xmlDoc, $externalReferences, $baseDir, $htmlDir, $backPath);
$this->processChapterSources($xmlDoc, $externalReferences, $baseDir, $htmlDir, $backPath);
if ($isDocAString) {
//$html = $xmlDoc->saveXML();
$htmlNode = $xmlDoc->getElementsByTagName("html");
$headNode = $xmlDoc->getElementsByTagName("head");
$bodyNode = $xmlDoc->getElementsByTagName("body");
$htmlNS = "";
for ($index = 0; $index < $htmlNode->item(0)->attributes->length; $index++) {
$nodeName = $htmlNode->item(0)->attributes->item($index)->nodeName;
$nodeValue = $htmlNode->item(0)->attributes->item($index)->nodeValue;
if ($nodeName != "xmlns") {
$htmlNS .= " {$nodeName}=\"{$nodeValue}\"";
}
}
$xml = new DOMDocument('1.0', "utf-8");
$xml->lookupPrefix("http://www.w3.org/1999/xhtml");
$xml->preserveWhiteSpace = false;
$xml->formatOutput = true;
$xml2Doc = new DOMDocument('1.0', "utf-8");
$xml2Doc->lookupPrefix("http://www.w3.org/1999/xhtml");
$xml2Doc->loadXML("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\n" . " \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n" . "<html xmlns=\"http://www.w3.org/1999/xhtml\"" . $htmlNS . ">\n</html>\n");
$html = $xml2Doc->getElementsByTagName("html")->item(0);
$html->appendChild($xml2Doc->importNode($headNode->item(0), true));
$html->appendChild($xml2Doc->importNode($bodyNode->item(0), true));
// force pretty printing and correct formatting, should not be needed, but it is.
$xml->loadXML($xml2Doc->saveXML());
$doc = $xml->saveXML();
if (!$this->isEPubVersion2()) {
$doc = preg_replace('#^\\s*<!DOCTYPE\\ .+?>\\s*#im', '', $doc);
}
}
return true;
}