Pressbooks\Modules\Import\WordPress\Parser::parse PHP Метод

parse() публичный Метод

public parse ( string $file ) : array
$file string
Результат array
    function parse($file)
    {
        // ------------------------------------------------------------------------------------------------------------
        // Setup & sanity check
        // ------------------------------------------------------------------------------------------------------------
        $authors = $posts = $categories = $tags = $terms = array();
        libxml_use_internal_errors(true);
        $oldValue = libxml_disable_entity_loader(true);
        $dom = new \DOMDocument();
        $dom->recover = true;
        // Try to parse non-well formed documents
        $success = $dom->loadXML(file_get_contents($file));
        foreach ($dom->childNodes as $child) {
            if (XML_DOCUMENT_TYPE_NODE === $child->nodeType) {
                // Invalid XML: Detected use of disallowed DOCTYPE
                $success = false;
                break;
            }
        }
        libxml_disable_entity_loader($oldValue);
        if (!$success || isset($dom->doctype)) {
            throw new \Exception(print_r(libxml_get_errors(), true));
        }
        $xml = simplexml_import_dom($dom);
        unset($dom);
        // halt if loading produces an error
        if (!$xml) {
            throw new \Exception(print_r(libxml_get_errors(), true));
        }
        $wxr_version = $xml->xpath('/rss/channel/wp:wxr_version');
        if (!$wxr_version) {
            throw new \Exception(__('This does not appear to be a WXR file, missing/invalid WXR version number', 'pressbooks'));
        }
        $wxr_version = (string) trim($wxr_version[0]);
        // confirm that we are dealing with the correct file format
        if (!preg_match('/^\\d+\\.\\d+$/', $wxr_version)) {
            throw new \Exception(__('This does not appear to be a WXR file, missing/invalid WXR version number', 'pressbooks'));
        }
        // ------------------------------------------------------------------------------------------------------------
        // Ladies and gentlemen, start your parsing
        // ------------------------------------------------------------------------------------------------------------
        $base_url = $xml->xpath('/rss/channel/wp:base_site_url');
        $base_url = (string) trim($base_url[0]);
        $namespaces = $xml->getDocNamespaces();
        if (!isset($namespaces['wp'])) {
            $namespaces['wp'] = 'http://wordpress.org/export/1.1/';
        }
        if (!isset($namespaces['excerpt'])) {
            $namespaces['excerpt'] = 'http://wordpress.org/export/1.1/excerpt/';
        }
        // grab authors
        foreach ($xml->xpath('/rss/channel/wp:author') as $author_arr) {
            $a = $author_arr->children($namespaces['wp']);
            $login = (string) $a->author_login;
            $authors[$login] = array('author_id' => (int) $a->author_id, 'author_login' => $login, 'author_email' => (string) $a->author_email, 'author_display_name' => (string) $a->author_display_name, 'author_first_name' => (string) $a->author_first_name, 'author_last_name' => (string) $a->author_last_name);
        }
        // grab cats, tags and terms
        foreach ($xml->xpath('/rss/channel/wp:category') as $term_arr) {
            $t = $term_arr->children($namespaces['wp']);
            $categories[] = array('term_id' => (int) $t->term_id, 'category_nicename' => (string) $t->category_nicename, 'category_parent' => (string) $t->category_parent, 'cat_name' => (string) $t->cat_name, 'category_description' => (string) $t->category_description);
        }
        foreach ($xml->xpath('/rss/channel/wp:tag') as $term_arr) {
            $t = $term_arr->children($namespaces['wp']);
            $tags[] = array('term_id' => (int) $t->term_id, 'tag_slug' => (string) $t->tag_slug, 'tag_name' => (string) $t->tag_name, 'tag_description' => (string) $t->tag_description);
        }
        foreach ($xml->xpath('/rss/channel/wp:term') as $term_arr) {
            $t = $term_arr->children($namespaces['wp']);
            $terms[] = array('term_id' => (int) $t->term_id, 'term_taxonomy' => (string) $t->term_taxonomy, 'slug' => (string) $t->term_slug, 'term_parent' => (string) $t->term_parent, 'term_name' => (string) $t->term_name, 'term_description' => (string) $t->term_description);
        }
        // grab posts
        foreach ($xml->channel->item as $item) {
            $post = array('post_title' => (string) $item->title, 'guid' => (string) $item->guid);
            $dc = $item->children('http://purl.org/dc/elements/1.1/');
            $post['post_author'] = (string) $dc->creator;
            $content = $item->children('http://purl.org/rss/1.0/modules/content/');
            $excerpt = $item->children($namespaces['excerpt']);
            $post['post_content'] = (string) $content->encoded;
            $post['post_excerpt'] = (string) $excerpt->encoded;
            /** @var \WP_Post $wp */
            $wp = $item->children($namespaces['wp']);
            $post['post_id'] = (int) $wp->post_id;
            $post['post_date'] = (string) $wp->post_date;
            $post['post_date_gmt'] = (string) $wp->post_date_gmt;
            $post['comment_status'] = (string) $wp->comment_status;
            $post['ping_status'] = (string) $wp->ping_status;
            $post['post_name'] = (string) $wp->post_name;
            $post['status'] = (string) $wp->status;
            $post['post_parent'] = (int) $wp->post_parent;
            $post['menu_order'] = (int) $wp->menu_order;
            $post['post_type'] = (string) $wp->post_type;
            $post['post_password'] = (string) $wp->post_password;
            $post['is_sticky'] = (int) $wp->is_sticky;
            if (isset($wp->attachment_url)) {
                $post['attachment_url'] = (string) $wp->attachment_url;
            }
            foreach ($item->category as $c) {
                $att = $c->attributes();
                if (isset($att['nicename'])) {
                    $post['terms'][] = array('name' => (string) $c, 'slug' => (string) $att['nicename'], 'domain' => (string) $att['domain']);
                }
            }
            foreach ($wp->postmeta as $meta) {
                $post['postmeta'][] = array('key' => (string) $meta->meta_key, 'value' => (string) $meta->meta_value);
            }
            foreach ($wp->comment as $comment) {
                $meta = array();
                if (isset($comment->commentmeta)) {
                    foreach ($comment->commentmeta as $m) {
                        $meta[] = array('key' => (string) $m->meta_key, 'value' => (string) $m->meta_value);
                    }
                }
                $post['comments'][] = array('comment_id' => (int) $comment->comment_id, 'comment_author' => (string) $comment->comment_author, 'comment_author_email' => (string) $comment->comment_author_email, 'comment_author_IP' => (string) $comment->comment_author_IP, 'comment_author_url' => (string) $comment->comment_author_url, 'comment_date' => (string) $comment->comment_date, 'comment_date_gmt' => (string) $comment->comment_date_gmt, 'comment_content' => (string) $comment->comment_content, 'comment_approved' => (string) $comment->comment_approved, 'comment_type' => (string) $comment->comment_type, 'comment_parent' => (string) $comment->comment_parent, 'comment_user_id' => (int) $comment->comment_user_id, 'commentmeta' => $meta);
            }
            $posts[] = $post;
        }
        return array('authors' => $authors, 'posts' => $posts, 'categories' => $categories, 'tags' => $tags, 'terms' => $terms, 'base_url' => $base_url, 'version' => $wxr_version);
    }

Usage Example

Пример #1
0
 /**
  * @param array $current_import
  *
  * @return bool
  */
 function import(array $current_import)
 {
     try {
         $parser = new Parser();
         $xml = $parser->parse($current_import['file']);
     } catch (\Exception $e) {
         return false;
     }
     $this->pbCheck($xml);
     if ($this->isPbWxr) {
         $xml['posts'] = $this->customNestedSort($xml['posts']);
     }
     $match_ids = array_flip(array_keys($current_import['chapters']));
     $chapter_parent = $this->getChapterParent();
     $total = 0;
     libxml_use_internal_errors(true);
     foreach ($xml['posts'] as $p) {
         // Skip
         if (!$this->flaggedForImport($p['post_id'])) {
             continue;
         }
         if (!isset($match_ids[$p['post_id']])) {
             continue;
         }
         // Insert
         $post_type = $this->determinePostType($p['post_id']);
         // Load HTMl snippet into DOMDocument using UTF-8 hack
         $utf8_hack = '<?xml version="1.0" encoding="UTF-8"?>';
         $doc = new \DOMDocument();
         $doc->loadHTML($utf8_hack . $this->tidy($p['post_content']));
         // Download images, change image paths
         $doc = $this->scrapeAndKneadImages($doc);
         $html = $doc->saveXML($doc->documentElement);
         // Remove auto-created <html> <body> and <!DOCTYPE> tags.
         $html = preg_replace('/^<!DOCTYPE.+?>/', '', str_replace(array('<html>', '</html>', '<body>', '</body>'), array('', '', '', ''), $html));
         if ('metadata' == $post_type) {
             $pid = $this->bookInfoPid();
         } else {
             $pid = $this->insertNewPost($post_type, $p, $html, $chapter_parent);
             if ('part' == $post_type) {
                 $chapter_parent = $pid;
             }
         }
         if (isset($p['postmeta']) && is_array($p['postmeta'])) {
             $this->importPbPostMeta($pid, $post_type, $p);
         }
         Book::consolidatePost($pid, get_post($pid));
         // Reorder
         ++$total;
     }
     $errors = libxml_get_errors();
     // TODO: Handle errors gracefully
     libxml_clear_errors();
     // Done
     $_SESSION['pb_notices'][] = sprintf(__('Imported %s chapters.', 'pressbooks'), $total);
     return $this->revokeCurrentImport();
 }
All Usage Examples Of Pressbooks\Modules\Import\WordPress\Parser::parse
Parser