League\HTMLToMarkdown\HtmlConverter::sanitize PHP Method

sanitize() protected method

protected sanitize ( string $markdown ) : string
$markdown string
return string
    protected function sanitize($markdown)
    {
        $markdown = html_entity_decode($markdown, ENT_QUOTES, 'UTF-8');
        $markdown = preg_replace('/<!DOCTYPE [^>]+>/', '', $markdown);
        // Strip doctype declaration
        $markdown = trim($markdown);
        // Remove blank spaces at the beggining of the html
        /*
         * Removing unwanted tags. Tags should be added to the array in the order they are expected.
         * XML, html and body opening tags should be in that order. Same case with closing tags
         */
        $unwanted = array('<?xml encoding="UTF-8">', '<html>', '</html>', '<body>', '</body>', '<head>', '</head>', '&#xD;');
        foreach ($unwanted as $tag) {
            if (strpos($tag, '/') === false) {
                // Opening tags
                if (strpos($markdown, $tag) === 0) {
                    $markdown = substr($markdown, strlen($tag));
                }
            } else {
                // Closing tags
                if (strpos($markdown, $tag) === strlen($markdown) - strlen($tag)) {
                    $markdown = substr($markdown, 0, -strlen($tag));
                }
            }
        }
        $markdown = trim($markdown, "\n\r\v");
        return $markdown;
    }