public static function filter_html_elements($str)
{
$tokenizer = new HTMLTokenizer($str);
// tokenize, baby
$tokens = $tokenizer->parse();
// filter token stream
$filtered = new HTMLTokenSet();
$stack = array();
foreach ($tokens as $node) {
switch ($node['type']) {
case HTMLTokenizer::NODE_TYPE_TEXT:
$node['value'] = html_entity_decode($node['value'], ENT_QUOTES, MultiByte::hab_encoding());
break;
case HTMLTokenizer::NODE_TYPE_ELEMENT_OPEN:
case HTMLTokenizer::NODE_TYPE_ELEMENT_EMPTY:
// is this element allowed at all?
if (!in_array(strtolower($node['name']), self::$whitelist_elements)) {
if (!in_array(strtolower($node['name']), self::$elements_empty)) {
array_push($stack, $node['name']);
}
//$node = null; //remove the node completely
// convert the node to text
$node = array('type' => HTMLTokenizer::NODE_TYPE_TEXT, 'name' => '#text', 'value' => HTMLTokenSet::token_to_string($node), 'attrs' => array());
} else {
// check attributes
foreach ($node['attrs'] as $k => $v) {
$attr_ok = false;
// if the attribute is in the global whitelist and validates
if (array_key_exists(strtolower($k), self::$whitelist_attributes['*']) && self::check_attr_value(strtolower($k), $v, self::$whitelist_attributes['*'][strtolower($k)])) {
$attr_ok = true;
}
// if there is a whitelist for this node and this attribute is in that list and it validates
if (array_key_exists(strtolower($node['name']), self::$whitelist_attributes) && array_key_exists(strtolower($k), self::$whitelist_attributes[strtolower($node['name'])]) && self::check_attr_value(strtolower($k), $v, self::$whitelist_attributes[strtolower($node['name'])][strtolower($k)])) {
$attr_ok = true;
}
// if it wasn't in one of the whitelists or failed its check, remove it
if ($attr_ok != true) {
unset($node['attrs'][$k]);
}
}
}
break;
case HTMLTokenizer::NODE_TYPE_ELEMENT_CLOSE:
if (!in_array(strtolower($node['name']), self::$whitelist_elements)) {
if (strtolower($temp = array_pop($stack)) !== strtolower($node['name'])) {
// something weird happened (Luke, use the DOM!)
array_push($stack, $temp);
}
//$node = null;
//convert the node to text
$node = array('type' => HTMLTokenizer::NODE_TYPE_TEXT, 'name' => '#text', 'value' => HTMLTokenSet::token_to_string($node), 'attrs' => array());
}
break;
case HTMLTokenizer::NODE_TYPE_PI:
case HTMLTokenizer::NODE_TYPE_COMMENT:
case HTMLTokenizer::NODE_TYPE_CDATA_SECTION:
case HTMLTokenizer::NODE_TYPE_STATEMENT:
default:
$node = null;
break;
}
if ($node != null) {
$filtered[] = $node;
}
}
// rebuild our output string
return preg_replace('#<([^>\\s]+)(?:\\s+[^>]+)?></\\1>#u', '', (string) $filtered);
}