Jyxo\Html::toText PHP Метод

toText() публичный статический Метод

Converts HTML source code to plaintext.
public static toText ( string $html ) : string
$html string HTML source code
Результат string
    public static function toText(string $html) : string
    {
        $text = $html;
        // Remove styles a scripts
        $text = self::removeTags($text, ['style', 'script']);
        // Re-format lines
        // <pre>
        $text = preg_replace_callback('~<pre[^>]*>(.+?)</pre>~is', function ($matches) {
            // Line breaks are converted to <br />, that are removed later
            return nl2br($matches[1]);
        }, $text);
        // \r, redundant line breaks, tabs and <br />
        $text = preg_replace(["~\r~", "~[\n\t]+~", '~<br[^>]*>~i'], ['', ' ', "\n"], $text);
        // Processing of most tags and entities
        static $search = ['~<h[3-6][^>]*>(.+?)</h[3-6]>~is', '~(<div[^>]*>)|(</div>)~i', '~(<p(?:\\s+[^>]+)?>)|(</p>)~i', '~(<table[^>]*>)|(</table>)~i', '~</tr>*~i', '~<td[^>]*>(.+?)</td>~is', '~(&hellip;)~i', '~(&#8220;)|(&#8221;)~i', '~(&apos;)~i', '~(&copy;)|(&#169;)~i', '~&trade;~i', '~&reg;~i', '~(&mdash;)|(&ndash;)~i'];
        static $replace = ["\n\n\\1\n\n", "\n\n", "\n\n", "\n\n", "\n", "\\1\t", '...', '"', '\'', '(c)', '(tm)', '(R)', '-'];
        $text = preg_replace($search, $replace, $text);
        // <h1> and <h2>
        $text = preg_replace_callback('~<h[12][^>]*>(.+?)</h[12]>~is', function ($matches) {
            return "\n\n\n" . mb_strtoupper($matches[1], 'utf-8') . "\n\n";
        }, $text);
        // <strong>
        $text = preg_replace_callback('~<strong[^>]*>(.+?)</strong>~is', function ($matches) {
            return mb_strtoupper($matches[1], 'utf-8');
        }, $text);
        // <hr />
        $text = preg_replace_callback('~<hr[^>]*>~i', function ($matches) {
            return "\n" . str_repeat('-', 50) . "\n";
        }, $text);
        // <th>
        $text = preg_replace_callback('~<th[^>]*>(.+?)</th>~is', function ($matches) {
            return mb_strtoupper($matches[1], 'utf-8') . "\t";
        }, $text);
        // <a>
        $text = self::linkToText($text);
        // <ul> and <ol>
        $text = self::listToText($text);
        // Two empty lines at most
        $text = trim($text, "\n ");
        $text = preg_replace("~\n\\s+\n~", "\n\n", $text);
        // Process <blockquote> (empty lines are removed before <blockquote> processing on purpose)
        $text = self::blockquoteToText($text);
        // Remove all left tags
        $text = strip_tags($text);
        // Replacing [textlink] for <> (must be done after strip_tags)
        $text = preg_replace('~\\[textlink\\]\\s*~s', '<', $text);
        $text = preg_replace('~\\s*\\[/textlink\\]~s', '>', $text);
        // Replaces non-breaking spaces
        $text = preg_replace(['~&nbsp;&nbsp;&nbsp;&nbsp;~i', '~&nbsp;~i'], ["\t", ' '], $text);
        // Remove other entities (must not be performed before)
        // After previous processing some entities are upper case, that is why we have to use strtolower
        $text = preg_replace_callback('~(&#?[a-z0-9]+;)~i', function ($matches) {
            return html_entity_decode(strtolower($matches[1]), ENT_QUOTES, 'utf-8');
        }, $text);
        // Two empty lines at most (performed second times on purpose)
        $text = trim($text, "\n ");
        $text = preg_replace("~\n\\s+\n~", "\n\n", $text);
        // Because of <blockquote> converting
        $text = preg_replace("~(\n>\\s*)+\n~", "\n>\n", $text);
        // One space at most
        $text = preg_replace("~(\n|\t)( )+~", '\\1', $text);
        $text = preg_replace('~( ){2,}~', ' ', $text);
        // No space at line ends
        $text = preg_replace("~[ \t]+\n~", "\n", $text);
        return $text;
    }

Usage Example

Пример #1
0
 /**
  * Runs the test.
  */
 public function test()
 {
     $html = file_get_contents(DIR_FILES . '/mail/email.html');
     $text = \Jyxo\Html::toText($html);
     // HTML and plaintext given
     $body = new Body($html, $text);
     $this->assertEquals($html, $body->getMain());
     $this->assertEquals($text, $body->getAlternative());
     $this->assertTrue($body->isHtml());
     // Only HTML
     $body = new Body($html);
     $this->assertEquals($html, $body->getMain());
     $this->assertTrue($body->isHtml());
     // Only plaintext
     $body = new Body($text);
     $this->assertEquals($text, $body->getMain());
     $this->assertFalse($body->isHtml());
 }
All Usage Examples Of Jyxo\Html::toText