Jyxo\Html::repair PHP Method

repair() public static method

Required the Tidy PHP extension.
public static repair ( string $html ) : string
$html string Input HTML source
return string
    public static function repair(string $html) : string
    {
        // HTML fixing
        static $config = ['newline' => 'LF', 'indent' => false, 'output-xhtml' => true, 'output-bom' => false, 'doctype' => 'auto', 'bare' => true, 'wrap' => 0, 'wrap-sections' => false, 'enclose-text' => true, 'merge-divs' => false, 'merge-spans' => false, 'force-output' => true, 'show-errors' => 0, 'show-warnings' => false, 'escape-cdata' => true, 'preserve-entities' => true];
        $html = tidy_repair_string($html, $config, 'utf8');
        // Removes namespace <?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" /? > generated by MS Word
        $html = preg_replace('~<\\?xml:namespace[^>]*>~i', '', $html);
        // Removes unnecessary line breaks and keeps them inside <pre> elements
        // Tidy adds one more line breaks inside <pre> elements
        $html = preg_replace("~(<pre[^>]*>)\n~", '\\1', $html);
        $html = preg_replace("~\n</pre>~", '</pre>', $html);
        $html = preg_replace_callback('~(<pre[^>]*>)(.+?)(</pre>)~s', function ($matches) {
            return $matches[1] . strtr(nl2br($matches[2]), ['\\"' => '"']) . $matches[3];
        }, $html);
        // Strip line breaks
        $html = strtr($html, ["\r" => '', "\n" => '']);
        // Replace single quotes with double quotes (for easier processing later)
        $html = preg_replace('~(<[a-z][a-z0-9]*[^>]+[a-z]+=)\'([^\']*)\'~i', '\\1"\\2"', $html);
        // Remove unnecessary spaces inside elements (for easier processing later)
        $html = preg_replace('~(<[a-z][a-z0-9]*[^>]+[a-z]+=")\\s+([^"]*")~i', '\\1\\2', $html);
        $html = preg_replace('~(<[a-z][a-z0-9]*[^>]+[a-z]+="[^"]*)\\s+(")~i', '\\1\\2', $html);
        return $html;
    }