private function parseXMLString($string, $flags = null)
{
$document = new \DOMDocument('1.0');
$lead = strtolower(substr($string, 0, 5));
// <?xml
try {
set_error_handler(array('\\QueryPath\\ParseException', 'initializeFromError'), $this->errTypes);
if (isset($this->options['convert_to_encoding'])) {
// Is there another way to do this?
$from_enc = isset($this->options['convert_from_encoding']) ? $this->options['convert_from_encoding'] : 'auto';
$to_enc = $this->options['convert_to_encoding'];
if (function_exists('mb_convert_encoding')) {
$string = mb_convert_encoding($string, $to_enc, $from_enc);
}
}
// This is to avoid cases where low ascii digits have slipped into HTML.
// AFAIK, it should not adversly effect UTF-8 documents.
if (!empty($this->options['strip_low_ascii'])) {
$string = filter_var($string, FILTER_UNSAFE_RAW, FILTER_FLAG_ENCODE_LOW);
}
// Allow users to override parser settings.
if (empty($this->options['use_parser'])) {
$useParser = '';
} else {
$useParser = strtolower($this->options['use_parser']);
}
// If HTML parser is requested, we use it.
if ($useParser == 'html') {
$document->loadHTML($string);
} elseif ($lead == '<?xml' || $useParser == 'xml') {
if ($this->options['replace_entities']) {
$string = \QueryPath\Entities::replaceAllEntities($string);
}
$document->loadXML($string, $flags);
} else {
$document->loadHTML($string);
}
} catch (Exception $e) {
restore_error_handler();
throw $e;
}
restore_error_handler();
if (empty($document)) {
throw new \QueryPath\ParseException('Unknown parser exception.');
}
return $document;
}