public load ( $str, $lowercase = true, $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT ) |
function load($str, $lowercase = true, $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT)
{
global $debugObject;
// prepare
$this->prepare($str, $lowercase, $stripRN, $defaultBRText);
// strip out comments
$this->remove_noise("'<!--(.*?)-->'is");
// strip out cdata
$this->remove_noise("'<!\\[CDATA\\[(.*?)\\]\\]>'is", true);
// Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
// Script tags removal now preceeds style tag removal.
// strip out <script> tags
$this->remove_noise("'<\\s*script[^>]*[^/]>(.*?)<\\s*/\\s*script\\s*>'is");
$this->remove_noise("'<\\s*script\\s*>(.*?)<\\s*/\\s*script\\s*>'is");
// strip out <style> tags
$this->remove_noise("'<\\s*style[^>]*[^/]>(.*?)<\\s*/\\s*style\\s*>'is");
$this->remove_noise("'<\\s*style\\s*>(.*?)<\\s*/\\s*style\\s*>'is");
// strip out preformatted tags
$this->remove_noise("'<\\s*(?:code)[^>]*>(.*?)<\\s*/\\s*(?:code)\\s*>'is");
// strip out server side scripts
$this->remove_noise("'(<\\?)(.*?)(\\?>)'s", true);
// strip smarty scripts
$this->remove_noise("'(\\{\\w)(.*?)(\\})'s", true);
// parsing
while ($this->parse()) {
}
// end
$this->root->_[HDOM_INFO_END] = $this->cursor;
$this->parse_charset();
}
function a587_getPlaintext($_text, $_remove) { global $REX; foreach (explode(',', $REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['order']) as $elem) { switch ($elem) { case 'selectors': // remove elements selected by css-selectors $html = new simple_html_dom(); $html->load($_text); $html->remove($_remove); $html->load($html->outertext); $_text = $html->plaintext; break; case 'regex': // regex if (!empty($REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['regex'])) { $regex = array(); $replacement = array(); $odd = true; foreach (explode("\n", $REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['regex']) as $line) { if ($line != '') { if ($odd) { $regex[] = trim($line); } else { $replacement[] = $line; } $odd = !$odd; } } $_text = preg_replace($regex, $replacement, $_text); } break; case 'textile': // strip HTML-tags if (!empty($REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['textile']) and function_exists('rex_a79_textile')) { $_text = rex_a79_textile($_text); } break; case 'striptags': // strip HTML-tags if (!empty($REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['striptags'])) { $_text = strip_tags($_text); } break; } } return $_text; }