public function indent($input)
{
$this->log = array();
// Dindent does not indent <script> body. Instead, it temporary removes it from the code, indents the input, and restores the script body.
if (preg_match_all('/<script\\b[^>]*>([\\s\\S]*?)<\\/script>/mi', $input, $matches)) {
$this->temporary_replacements_script = $matches[0];
foreach ($matches[0] as $i => $match) {
$input = str_replace($match, '<script>' . ($i + 1) . '</script>', $input);
}
}
// Removing double whitespaces to make the source code easier to read.
// With exception of <pre>/ CSS white-space changing the default behaviour, double whitespace is meaningless in HTML output.
// This reason alone is sufficient not to use Dindent in production.
$input = str_replace("\t", '', $input);
$input = preg_replace('/\\s{2,}/', ' ', $input);
// Remove inline elements and replace them with text entities.
if (preg_match_all('/<(' . implode('|', $this->inline_elements) . ')[^>]*>(?:[^<]*)<\\/\\1>/', $input, $matches)) {
$this->temporary_replacements_inline = $matches[0];
foreach ($matches[0] as $i => $match) {
$input = str_replace($match, 'ᐃ' . ($i + 1) . 'ᐃ', $input);
}
}
$subject = $input;
$output = '';
$next_line_indentation_level = 0;
do {
$indentation_level = $next_line_indentation_level;
$patterns = array('/^(<([a-z]+)(?:[^>]*)>(?:[^<]*)<\\/(?:\\2)>)/' => static::MATCH_INDENT_NO, '/^<!([^>]*)>/' => static::MATCH_INDENT_NO, '/^<(input|link|meta|base|br|img|source|hr)([^>]*)>/' => static::MATCH_INDENT_NO, '/^<(animate|stop|path|circle|line|polyline|rect|use)([^>]*)\\/>/' => static::MATCH_INDENT_NO, '/^<[^\\/]([^>]*)>/' => static::MATCH_INDENT_INCREASE, '/^<\\/([^>]*)>/' => static::MATCH_INDENT_DECREASE, '/^<(.+)\\/>/' => static::MATCH_INDENT_DECREASE, '/^(\\s+)/' => static::MATCH_DISCARD, '/([^<]+)/' => static::MATCH_INDENT_NO);
$rules = array('NO', 'DECREASE', 'INCREASE', 'DISCARD');
foreach ($patterns as $pattern => $rule) {
if ($match = preg_match($pattern, $subject, $matches)) {
$this->log[] = array('rule' => $rules[$rule], 'pattern' => $pattern, 'subject' => $subject, 'match' => $matches[0]);
$subject = mb_substr($subject, mb_strlen($matches[0]));
if ($rule === static::MATCH_DISCARD) {
break;
}
if ($rule === static::MATCH_INDENT_NO) {
} else {
if ($rule === static::MATCH_INDENT_DECREASE) {
$next_line_indentation_level--;
$indentation_level--;
} else {
$next_line_indentation_level++;
}
}
if ($indentation_level < 0) {
$indentation_level = 0;
}
$output .= str_repeat($this->options['indentation_character'], $indentation_level) . $matches[0] . "\n";
break;
}
}
} while ($match);
$interpreted_input = '';
foreach ($this->log as $e) {
$interpreted_input .= $e['match'];
}
if ($interpreted_input !== $input) {
throw new Exception\RuntimeException('Did not reproduce the exact input.');
}
$output = preg_replace('/(<(\\w+)[^>]*>)\\s*(<\\/\\2>)/', '\\1\\3', $output);
foreach ($this->temporary_replacements_script as $i => $original) {
$output = str_replace('<script>' . ($i + 1) . '</script>', $original, $output);
}
foreach ($this->temporary_replacements_inline as $i => $original) {
$output = str_replace('ᐃ' . ($i + 1) . 'ᐃ', $original, $output);
}
return trim($output);
}