public function auto_typography($str, $reduce_linebreaks = FALSE)
{
if ($str === '') {
return '';
}
// Standardize Newlines to make matching easier
if (strpos($str, "\r") !== FALSE) {
$str = str_replace(array("\r\n", "\r"), "\n", $str);
}
// Reduce line breaks. If there are more than two consecutive linebreaks
// we'll compress them down to a maximum of two since there's no benefit to more.
if ($reduce_linebreaks === TRUE) {
$str = preg_replace("/\n\n+/", "\n\n", $str);
}
// HTML comment tags don't conform to patterns of normal tags, so pull them out separately, only if needed
$html_comments = array();
if (strpos($str, '<!--') !== FALSE && preg_match_all('#(<!\\-\\-.*?\\-\\->)#s', $str, $matches)) {
for ($i = 0, $total = count($matches[0]); $i < $total; $i++) {
$html_comments[] = $matches[0][$i];
$str = str_replace($matches[0][$i], '{@HC' . $i . '}', $str);
}
}
// match and yank <pre> tags if they exist. It's cheaper to do this separately since most content will
// not contain <pre> tags, and it keeps the PCRE patterns below simpler and faster
if (strpos($str, '<pre') !== FALSE) {
$str = preg_replace_callback('#<pre.*?>.*?</pre>#si', array($this, '_protect_characters'), $str);
}
// Convert quotes within tags to temporary markers.
$str = preg_replace_callback('#<.+?>#si', array($this, '_protect_characters'), $str);
// Do the same with braces if necessary
if ($this->protect_braced_quotes === TRUE) {
$str = preg_replace_callback('#\\{.+?\\}#si', array($this, '_protect_characters'), $str);
}
// Convert "ignore" tags to temporary marker. The parser splits out the string at every tag
// it encounters. Certain inline tags, like image tags, links, span tags, etc. will be
// adversely affected if they are split out so we'll convert the opening bracket < temporarily to: {@TAG}
$str = preg_replace('#<(/*)(' . $this->inline_elements . ')([ >])#i', '{@TAG}\\1\\2\\3', $str);
/* Split the string at every tag. This expression creates an array with this prototype:
*
* [array]
* {
* [0] = <opening tag>
* [1] = Content...
* [2] = <closing tag>
* Etc...
* }
*/
$chunks = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
// Build our finalized string. We cycle through the array, skipping tags, and processing the contained text
$str = '';
$process = TRUE;
for ($i = 0, $c = count($chunks) - 1; $i <= $c; $i++) {
// Are we dealing with a tag? If so, we'll skip the processing for this cycle.
// Well also set the "process" flag which allows us to skip <pre> tags and a few other things.
if (preg_match('#<(/*)(' . $this->block_elements . ').*?>#', $chunks[$i], $match)) {
if (preg_match('#' . $this->skip_elements . '#', $match[2])) {
$process = $match[1] === '/';
}
if ($match[1] === '') {
$this->last_block_element = $match[2];
}
$str .= $chunks[$i];
continue;
}
if ($process === FALSE) {
$str .= $chunks[$i];
continue;
}
// Force a newline to make sure end tags get processed by _format_newlines()
if ($i === $c) {
$chunks[$i] .= "\n";
}
// Convert Newlines into <p> and <br /> tags
$str .= $this->_format_newlines($chunks[$i]);
}
// No opening block level tag? Add it if needed.
if (!preg_match('/^\\s*<(?:' . $this->block_elements . ')/i', $str)) {
$str = preg_replace('/^(.*?)<(' . $this->block_elements . ')/i', '<p>$1</p><$2', $str);
}
// Convert quotes, elipsis, em-dashes, non-breaking spaces, and ampersands
$str = $this->format_characters($str);
// restore HTML comments
for ($i = 0, $total = count($html_comments); $i < $total; $i++) {
// remove surrounding paragraph tags, but only if there's an opening paragraph tag
// otherwise HTML comments at the ends of paragraphs will have the closing tag removed
// if '<p>{@HC1}' then replace <p>{@HC1}</p> with the comment, else replace only {@HC1} with the comment
$str = preg_replace('#(?(?=<p>\\{@HC' . $i . '\\})<p>\\{@HC' . $i . '\\}(\\s*</p>)|\\{@HC' . $i . '\\})#s', $html_comments[$i], $str);
}
// Final clean up
$table = array('/(<p[^>*?]>)<p>/' => '$1', '#(</p>)+#' => '</p>', '/(<p>\\W*<p>)+/' => '<p>', '#<p></p><(' . $this->block_elements . ')#' => '<$1', '#( \\s*)+<(' . $this->block_elements . ')#' => ' <$2', '/\\{@TAG\\}/' => '<', '/\\{@DQ\\}/' => '"', '/\\{@SQ\\}/' => "'", '/\\{@DD\\}/' => '--', '/\\{@NBS\\}/' => ' ', "/><p>\n/" => ">\n<p>", '#</p></#' => "</p>\n</");
// Do we need to reduce empty lines?
if ($reduce_linebreaks === TRUE) {
$table['#<p>\\n*</p>#'] = '';
} else {
// If we have empty paragraph tags we add a non-breaking space
// otherwise most browsers won't treat them as true paragraphs
$table['#<p></p>#'] = '<p> </p>';
}
return preg_replace(array_keys($table), $table, $str);
}