PHPHtmlParser\Dom::clean PHP Method

clean() protected method

Cleans the html of any none-html information.
protected clean ( string $str ) : string
$str string
return string
    protected function clean($str)
    {
        if ($this->options->get('cleanupInput') != true) {
            // skip entire cleanup step
            return $str;
        }
        // remove white space before closing tags
        $str = mb_eregi_replace("'\\s+>", "'>", $str);
        $str = mb_eregi_replace('"\\s+>', '">', $str);
        // clean out the \n\r
        $replace = ' ';
        if ($this->options->get('preserveLineBreaks')) {
            $replace = '
';
        }
        $str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
        // strip the doctype
        $str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
        // strip out comments
        $str = mb_eregi_replace("<!--(.*?)-->", '', $str);
        // strip out cdata
        $str = mb_eregi_replace("<!\\[CDATA\\[(.*?)\\]\\]>", '', $str);
        // strip out <script> tags
        if ($this->options->get('removeScripts') == true) {
            $str = mb_eregi_replace("<\\s*script[^>]*[^/]>(.*?)<\\s*/\\s*script\\s*>", '', $str);
            $str = mb_eregi_replace("<\\s*script\\s*>(.*?)<\\s*/\\s*script\\s*>", '', $str);
        }
        // strip out <style> tags
        if ($this->options->get('removeStyles') == true) {
            $str = mb_eregi_replace("<\\s*style[^>]*[^/]>(.*?)<\\s*/\\s*style\\s*>", '', $str);
            $str = mb_eregi_replace("<\\s*style\\s*>(.*?)<\\s*/\\s*style\\s*>", '', $str);
        }
        // strip out server side scripts
        $str = mb_eregi_replace("(<\\?)(.*?)(\\?>)", '', $str);
        // strip smarty scripts
        $str = mb_eregi_replace("(\\{\\w)(.*?)(\\})", '', $str);
        return $str;
    }