Html2Text\Html2Text::iterateOverNode PHP Method

iterateOverNode() static public method

static public iterateOverNode ( $node )
    static function iterateOverNode($node)
    {
        if ($node instanceof \DOMText) {
            // Replace whitespace characters with a space (equivilant to \s)
            return preg_replace("/[\\t\\n\\f\\r ]+/im", " ", $node->wholeText);
        }
        if ($node instanceof \DOMDocumentType) {
            // ignore
            return "";
        }
        $nextName = static::nextChildName($node);
        $prevName = static::prevChildName($node);
        $name = strtolower($node->nodeName);
        // start whitespace
        switch ($name) {
            case "hr":
                return "---------------------------------------------------------------\n";
            case "style":
            case "head":
            case "title":
            case "meta":
            case "script":
                // ignore these tags
                return "";
            case "h1":
            case "h2":
            case "h3":
            case "h4":
            case "h5":
            case "h6":
            case "ol":
            case "ul":
                // add two newlines, second line is added below
                $output = "\n";
                break;
            case "td":
            case "th":
                // add tab char to separate table fields
                $output = "\t";
                break;
            case "tr":
            case "p":
            case "div":
                // add one line
                $output = "\n";
                break;
            case "li":
                $output = "- ";
                break;
            default:
                // print out contents of unknown tags
                $output = "";
                break;
        }
        // debug
        //$output .= "[$name,$nextName]";
        if (isset($node->childNodes)) {
            for ($i = 0; $i < $node->childNodes->length; $i++) {
                $n = $node->childNodes->item($i);
                $text = static::iterateOverNode($n);
                $output .= $text;
            }
        }
        // end whitespace
        switch ($name) {
            case "h1":
            case "h2":
            case "h3":
            case "h4":
            case "h5":
            case "h6":
                $output .= "\n";
                break;
            case "p":
            case "br":
                // add one line
                if ($nextName != "div") {
                    $output .= "\n";
                }
                break;
            case "div":
                // add one line only if the next child isn't a div
                if ($nextName != "div" && $nextName != null) {
                    $output .= "\n";
                }
                break;
            case "a":
                // links are returned in [text](link) format
                $href = $node->getAttribute("href");
                $output = trim($output);
                // remove double [[ ]] s from linking images
                if (substr($output, 0, 1) == "[" && substr($output, -1) == "]") {
                    $output = substr($output, 1, strlen($output) - 2);
                    // for linking images, the title of the <a> overrides the title of the <img>
                    if ($node->getAttribute("title")) {
                        $output = $node->getAttribute("title");
                    }
                }
                // if there is no link text, but a title attr
                if (!$output && $node->getAttribute("title")) {
                    $output = $node->getAttribute("title");
                }
                if ($href == null) {
                    // it doesn't link anywhere
                    if ($node->getAttribute("name") != null) {
                        $output = "[{$output}]";
                    }
                } else {
                    if ($href == $output || $href == "mailto:{$output}" || $href == "http://{$output}" || $href == "https://{$output}") {
                        // link to the same address: just use link
                        $output;
                    } else {
                        // replace it
                        if ($output) {
                            $output = "[{$output}]({$href})";
                        } else {
                            // empty string
                            $output = $href;
                        }
                    }
                }
                // does the next node require additional whitespace?
                switch ($nextName) {
                    case "h1":
                    case "h2":
                    case "h3":
                    case "h4":
                    case "h5":
                    case "h6":
                        $output .= "\n";
                        break;
                }
                break;
            case "img":
                if ($node->getAttribute("title")) {
                    $output = "[" . $node->getAttribute("title") . "]";
                } elseif ($node->getAttribute("alt")) {
                    $output = "[" . $node->getAttribute("alt") . "]";
                } else {
                    $output = "";
                }
                break;
            case "li":
                $output .= "\n";
                break;
            default:
                // do nothing
        }
        return $output;
    }