Readability\Readability::init PHP Method

init() public method

Workflow: 1. Prep the document by removing script tags, css, etc. 2. Build readability's DOM tree. 3. Grab the article content from the current dom tree. 4. Replace the current DOM tree with the new one. 5. Read peacefully.
public init ( ) : boolean
return boolean true if we found content, false otherwise
    public function init()
    {
        if (!isset($this->dom->documentElement)) {
            return false;
        }
        // Assume successful outcome
        $this->success = true;
        $bodyElems = $this->dom->getElementsByTagName('body');
        // WTF multiple body nodes?
        if ($this->bodyCache === null) {
            $this->bodyCache = '';
            foreach ($bodyElems as $bodyNode) {
                $this->bodyCache .= trim($bodyNode->innerHTML);
            }
        }
        if ($bodyElems->length > 0 && $this->body === null) {
            $this->body = $bodyElems->item(0);
        }
        $this->prepDocument();
        // Build readability's DOM tree.
        $overlay = $this->dom->createElement('div');
        $innerDiv = $this->dom->createElement('div');
        $articleTitle = $this->getArticleTitle();
        $articleContent = $this->grabArticle();
        if (!$articleContent) {
            $this->success = false;
            $articleContent = $this->dom->createElement('div');
            $articleContent->setAttribute('class', 'readability-content');
            $articleContent->innerHTML = '<p>Sorry, Readability was unable to parse this page for content.</p>';
        }
        $overlay->setAttribute('class', 'readOverlay');
        $innerDiv->setAttribute('class', 'readInner');
        // Glue the structure of our document together.
        $innerDiv->appendChild($articleTitle);
        $innerDiv->appendChild($articleContent);
        $overlay->appendChild($innerDiv);
        // Clear the old HTML, insert the new content.
        $this->body->innerHTML = '';
        $this->body->appendChild($overlay);
        $this->body->removeAttribute('style');
        $this->postProcessContent($articleContent);
        // Set title and content instance variables.
        $this->articleTitle = $articleTitle;
        $this->articleContent = $articleContent;
        return $this->success;
    }

Usage Example

Example #1
0
 /** Extract article from a page using php-readability */
 function getArticle($url)
 {
     $html = file_get_contents($url);
     $Readability = new Readability($html, $url);
     $result = $Readability->init();
     $results = array('title' => $Readability->getTitle()->textContent, 'content' => $Readability->getContent()->textContent);
     return $results;
 }
All Usage Examples Of Readability\Readability::init