public static function repair(string $html) : string
{
// HTML fixing
static $config = ['newline' => 'LF', 'indent' => false, 'output-xhtml' => true, 'output-bom' => false, 'doctype' => 'auto', 'bare' => true, 'wrap' => 0, 'wrap-sections' => false, 'enclose-text' => true, 'merge-divs' => false, 'merge-spans' => false, 'force-output' => true, 'show-errors' => 0, 'show-warnings' => false, 'escape-cdata' => true, 'preserve-entities' => true];
$html = tidy_repair_string($html, $config, 'utf8');
// Removes namespace <?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" /? > generated by MS Word
$html = preg_replace('~<\\?xml:namespace[^>]*>~i', '', $html);
// Removes unnecessary line breaks and keeps them inside <pre> elements
// Tidy adds one more line breaks inside <pre> elements
$html = preg_replace("~(<pre[^>]*>)\n~", '\\1', $html);
$html = preg_replace("~\n</pre>~", '</pre>', $html);
$html = preg_replace_callback('~(<pre[^>]*>)(.+?)(</pre>)~s', function ($matches) {
return $matches[1] . strtr(nl2br($matches[2]), ['\\"' => '"']) . $matches[3];
}, $html);
// Strip line breaks
$html = strtr($html, ["\r" => '', "\n" => '']);
// Replace single quotes with double quotes (for easier processing later)
$html = preg_replace('~(<[a-z][a-z0-9]*[^>]+[a-z]+=)\'([^\']*)\'~i', '\\1"\\2"', $html);
// Remove unnecessary spaces inside elements (for easier processing later)
$html = preg_replace('~(<[a-z][a-z0-9]*[^>]+[a-z]+=")\\s+([^"]*")~i', '\\1\\2', $html);
$html = preg_replace('~(<[a-z][a-z0-9]*[^>]+[a-z]+="[^"]*)\\s+(")~i', '\\1\\2', $html);
return $html;
}