protected function _sanitize_naughty_html($matches)
{
static $naughty_tags = array('alert', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound', 'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer', 'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object', 'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss');
static $evil_attributes = array('on\\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime');
// First, escape unclosed tags
if (empty($matches['closeTag'])) {
return '<' . $matches[1];
} elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE)) {
return '<' . $matches[1] . '>';
} elseif (isset($matches['attributes'])) {
// We'll store the already fitlered attributes here
$attributes = array();
// Attribute-catching pattern
$attributes_pattern = '#' . '(?<name>[^\\s\\042\\047>/=]+)' . '(?:\\s*=(?<value>[^\\s\\042\\047=><`]+|\\s*\\042[^\\042]*\\042|\\s*\\047[^\\047]*\\047|\\s*(?U:[^\\s\\042\\047=><`]*)))' . '#i';
// Blacklist pattern for evil attribute names
$is_evil_pattern = '#^(' . implode('|', $evil_attributes) . ')$#i';
// Each iteration filters a single attribute
do {
// Strip any non-alpha characters that may preceed an attribute.
// Browsers often parse these incorrectly and that has been a
// of numerous XSS issues we've had.
$matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
if (!preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE)) {
// No (valid) attribute found? Discard everything else inside the tag
break;
}
if (preg_match($is_evil_pattern, $attribute['name'][0]) or trim($attribute['value'][0]) === '') {
$attributes[] = 'xss=removed';
} else {
$attributes[] = $attribute[0][0];
}
$matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
} while ($matches['attributes'] !== '');
$attributes = empty($attributes) ? '' : ' ' . implode(' ', $attributes);
return '<' . $matches['slash'] . $matches['tagName'] . $attributes . '>';
}
return $matches[0];
}