Habari\Format::autop PHP Method

autop() public static method

New lines within the text of block elements are converted to linebreaks. New lines before and after tags are stripped. If you make changes to this, PLEASE add test cases here: http://svn.habariproject.org/habari/trunk/tests/data/autop/
public static autop ( string $value )
$value string The string to apply the formatting
    public static function autop($value)
    {
        $value = str_replace("\r\n", "\n", $value);
        $value = trim($value);
        $ht = new HtmlTokenizer($value, false);
        $set = $ht->parse();
        $value = '';
        // should never autop ANY content in these items
        $no_auto_p = array('pre', 'code', 'ul', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'object', 'applet', 'embed', 'table', 'ul', 'ol', 'li', 'i', 'b', 'em', 'strong', 'script', 'dl', 'dt', 'dd');
        $block_elements = array('address', 'blockquote', 'center', 'dir', 'div', 'dl', 'fieldset', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'isindex', 'menu', 'noframes', 'object', 'applet', 'embed', 'noscript', 'ol', 'p', 'pre', 'table', 'ul', 'figure', 'figcaption');
        $token = $set->current();
        // There are no tokens in the text being formatted
        if ($token === false) {
            return $value;
        }
        $open_p = false;
        do {
            if ($open_p) {
                if (($token['type'] == HTMLTokenizer::NODE_TYPE_ELEMENT_EMPTY || $token['type'] == HTMLTokenizer::NODE_TYPE_ELEMENT_OPEN || $token['type'] == HTMLTokenizer::NODE_TYPE_ELEMENT_CLOSE) && in_array(strtolower($token['name']), $block_elements)) {
                    if (strtolower($token['name']) != 'p' || $token['type'] != HTMLTokenizer::NODE_TYPE_ELEMENT_CLOSE) {
                        $value .= '</p>';
                    }
                    $open_p = false;
                }
            }
            if (($token['type'] == HTMLTokenizer::NODE_TYPE_ELEMENT_OPEN || $token['type'] == HTMLTokenizer::NODE_TYPE_ELEMENT_EMPTY) && !in_array(strtolower($token['name']), $block_elements) && !$open_p) {
                // first element, is not a block element
                $value .= '<p>';
                $open_p = true;
            }
            // no-autop, pass them through verbatim
            if ($token['type'] == HTMLTokenizer::NODE_TYPE_ELEMENT_OPEN && in_array(strtolower($token['name']), $no_auto_p)) {
                $nested_token = $token;
                do {
                    $value .= HtmlTokenSet::token_to_string($nested_token, false);
                    if ($nested_token['type'] == HTMLTokenizer::NODE_TYPE_ELEMENT_CLOSE && strtolower($nested_token['name']) == strtolower($token['name'])) {
                        break;
                    }
                } while ($nested_token = $set->next());
                continue;
            }
            // anything that's not a text node should get passed through
            if ($token['type'] != HTMLTokenizer::NODE_TYPE_TEXT) {
                $value .= HtmlTokenSet::token_to_string($token, true);
                // If the token itself is p, we need to set $open_p
                if (strtolower($token['name']) == 'p' && $token['type'] == HTMLTokenizer::NODE_TYPE_ELEMENT_OPEN) {
                    $open_p = true;
                }
                continue;
            }
            // if we get this far, token type is text
            $local_value = $token['value'];
            if (MultiByte::strlen($local_value)) {
                if (!$open_p) {
                    $local_value = '<p>' . ltrim($local_value);
                    $open_p = true;
                }
                $local_value = preg_replace('/\\s*(\\n\\s*){2,}/u', "</p><p>", $local_value);
                // at least two \n in a row (allow whitespace in between)
                $local_value = str_replace("\n", "<br>", $local_value);
                // nl2br
            }
            $value .= $local_value;
        } while ($token = $set->next());
        $value = preg_replace('#\\s*<p></p>\\s*#u', '', $value);
        // replace <p></p>
        $value = preg_replace('/<p><!--(.*?)--><\\/p>/', "<!--\\1-->", $value);
        // replace <p></p> around comments
        if ($open_p) {
            $value .= '</p>';
        }
        return $value;
    }