simple_html_dom::load PHP Method

load() public method

load html from string
public load ( $str, $lowercase = true, $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT )
    function load($str, $lowercase = true, $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT)
    {
        global $debugObject;
        // prepare
        $this->prepare($str, $lowercase, $stripRN, $defaultBRText);
        // strip out comments
        $this->remove_noise("'<!--(.*?)-->'is");
        // strip out cdata
        $this->remove_noise("'<!\\[CDATA\\[(.*?)\\]\\]>'is", true);
        // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
        // Script tags removal now preceeds style tag removal.
        // strip out <script> tags
        $this->remove_noise("'<\\s*script[^>]*[^/]>(.*?)<\\s*/\\s*script\\s*>'is");
        $this->remove_noise("'<\\s*script\\s*>(.*?)<\\s*/\\s*script\\s*>'is");
        // strip out <style> tags
        $this->remove_noise("'<\\s*style[^>]*[^/]>(.*?)<\\s*/\\s*style\\s*>'is");
        $this->remove_noise("'<\\s*style\\s*>(.*?)<\\s*/\\s*style\\s*>'is");
        // strip out preformatted tags
        $this->remove_noise("'<\\s*(?:code)[^>]*>(.*?)<\\s*/\\s*(?:code)\\s*>'is");
        // strip out server side scripts
        $this->remove_noise("'(<\\?)(.*?)(\\?>)'s", true);
        // strip smarty scripts
        $this->remove_noise("'(\\{\\w)(.*?)(\\})'s", true);
        // parsing
        while ($this->parse()) {
        }
        // end
        $this->root->_[HDOM_INFO_END] = $this->cursor;
        $this->parse_charset();
    }

Usage Example

Esempio n. 1
0
function a587_getPlaintext($_text, $_remove)
{
    global $REX;
    foreach (explode(',', $REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['order']) as $elem) {
        switch ($elem) {
            case 'selectors':
                // remove elements selected by css-selectors
                $html = new simple_html_dom();
                $html->load($_text);
                $html->remove($_remove);
                $html->load($html->outertext);
                $_text = $html->plaintext;
                break;
            case 'regex':
                // regex
                if (!empty($REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['regex'])) {
                    $regex = array();
                    $replacement = array();
                    $odd = true;
                    foreach (explode("\n", $REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['regex']) as $line) {
                        if ($line != '') {
                            if ($odd) {
                                $regex[] = trim($line);
                            } else {
                                $replacement[] = $line;
                            }
                            $odd = !$odd;
                        }
                    }
                    $_text = preg_replace($regex, $replacement, $_text);
                }
                break;
            case 'textile':
                // strip HTML-tags
                if (!empty($REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['textile']) and function_exists('rex_a79_textile')) {
                    $_text = rex_a79_textile($_text);
                }
                break;
            case 'striptags':
                // strip HTML-tags
                if (!empty($REX['ADDON']['rexsearch_plugins']['rexsearch']['plaintext']['settings']['striptags'])) {
                    $_text = strip_tags($_text);
                }
                break;
        }
    }
    return $_text;
}
All Usage Examples Of simple_html_dom::load