HTMLPurifier_Lexer::normalize PHP Method

normalize() public method

Takes a piece of HTML and normalizes it by converting entities, fixing encoding, extracting bits, and other good stuff.
public normalize ( string $html, HTMLPurifier_Config $config, HTMLPurifier_Context $context ) : string
$html string HTML.
$config HTMLPurifier_Config
$context HTMLPurifier_Context
return string
    public function normalize($html, $config, $context)
    {
        // normalize newlines to \n
        if ($config->get('Core.NormalizeNewlines')) {
            $html = str_replace("\r\n", "\n", $html);
            $html = str_replace("\r", "\n", $html);
        }
        if ($config->get('HTML.Trusted')) {
            // escape convoluted CDATA
            $html = $this->escapeCommentedCDATA($html);
        }
        // escape CDATA
        $html = $this->escapeCDATA($html);
        $html = $this->removeIEConditional($html);
        // extract body from document if applicable
        if ($config->get('Core.ConvertDocumentToFragment')) {
            $e = false;
            if ($config->get('Core.CollectErrors')) {
                $e =& $context->get('ErrorCollector');
            }
            $new_html = $this->extractBody($html);
            if ($e && $new_html != $html) {
                $e->send(E_WARNING, 'Lexer: Extracted body');
            }
            $html = $new_html;
        }
        // expand entities that aren't the big five
        $html = $this->_entity_parser->substituteNonSpecialEntities($html);
        // clean into wellformed UTF-8 string for an SGML context: this has
        // to be done after entity expansion because the entities sometimes
        // represent non-SGML characters (horror, horror!)
        $html = HTMLPurifier_Encoder::cleanUTF8($html);
        // if processing instructions are to removed, remove them now
        if ($config->get('Core.RemoveProcessingInstructions')) {
            $html = preg_replace('#<\\?.+?\\?>#s', '', $html);
        }
        return $html;
    }