static function convert($html)
{
// replace with spaces
$html = str_replace(" ", " ", $html);
$html = str_replace(" ", " ", $html);
if (static::isOfficeDocument($html)) {
// remove office namespace
$html = str_replace(array("<o:p>", "</o:p>"), "", $html);
}
$html = static::fixNewlines($html);
if (mb_detect_encoding($html, "UTF-8", true)) {
$html = mb_convert_encoding($html, "HTML-ENTITIES", "UTF-8");
}
$doc = new \DOMDocument();
if (!$doc->loadHTML($html)) {
throw new Html2TextException("Could not load HTML - badly formed?", $html);
}
if (static::isOfficeDocument($html)) {
// remove office namespace
$doc = static::fixMSEncoding($doc);
}
$output = static::iterateOverNode($doc);
// remove leading and trailing spaces on each line
$output = preg_replace("/[ \t]*\n[ \t]*/im", "\n", $output);
$output = preg_replace("/ *\t */im", "\t", $output);
// remove unnecessary empty lines
$output = preg_replace("/\n\n\n*/im", "\n\n", $output);
// remove leading and trailing whitespace
$output = trim($output);
return $output;
}