Ticket #6269: rss-importer-rewrite.diff
File rss-importer-rewrite.diff, 19.7 KB (added by , 14 years ago) |
---|
-
uninstall.php
1 <?php 2 if( ! defined('ABSPATH') && ! defined( 'WP_UNINSTALL_PLUGIN' ) ) 3 exit; 4 delete_option('rss_importer'); 5 ?> 6 No newline at end of file -
parsers.php
1 <?php 2 /** 3 * WordPress RSS file parser implementations 4 * 5 * @package WordPress 6 * @subpackage Importer 7 */ 8 9 /** 10 * WordPress Importer class for managing parsing of RSS files. 11 */ 12 class RSS_Parser { 13 function parse( $file ) { 14 // Attempt to use simpleXML 15 if ( extension_loaded( 'simplexml' ) ) { 16 $parser = new RSS_Parser_SimpleXML; 17 $result = $parser->parse( $file ); 18 19 // If SimpleXML succeeds or this is an invalid file then return the results 20 if ( ! is_wp_error( $result ) || 'SimpleXML_parse_error' != $result->get_error_code() ) 21 return $result; 22 } 23 else { 24 $parser = new RSS_Parser_XML; 25 $result = $parser->parse( $file ); 26 27 // If XMLParser succeeds or this is an invalid file then return the results 28 if ( ! is_wp_error( $result ) || 'XML_parse_error' != $result->get_error_code() ) 29 return $result; 30 } 31 32 // We have a malformed XML file, so display the error and fallthrough to regex 33 if ( isset( $result ) && defined( 'IMPORT_DEBUG' ) && IMPORT_DEBUG ) { 34 echo '<pre>'; 35 if ( 'SimpleXML_parse_error' == $result->get_error_code() ) { 36 foreach ( $result->get_error_data() as $error ) 37 echo $error->line . ':' . $error->column . ' ' . esc_html( $error->message ) . "\n"; 38 } else if ( 'XML_parse_error' == $result->get_error_code() ) { 39 $error = $result->get_error_data(); 40 echo $error[0] . ':' . $error[1] . ' ' . esc_html( $error[2] ); 41 } 42 echo '</pre>'; 43 echo '<p><strong>' . __( 'There was an error when reading this RSS file', 'rss-importer' ) . '</strong><br />'; 44 } 45 } 46 } 47 48 /** 49 * RSS Parser that makes use of the SimpleXML PHP extension. 50 */ 51 class RSS_Parser_SimpleXML { 52 function parse( $file ) { 53 libxml_use_internal_errors( true ); 54 55 $posts = array(); 56 57 $xml = simplexml_load_file( $file ); 58 // halt if loading produces an error 59 if ( ! $xml ) 60 return new WP_Error( 'SimpleXML_parse_error', __( 'There was an error when reading this RSS file.', 'rss-importer' ), libxml_get_errors() ); 61 62 $attributes = $xml->attributes(); 63 error_log( var_export( $attributes, true ) ); 64 if ( ! isset( $attributes['version'] ) || strval( $attributes['version'] ) != '2.0' ) 65 return new WP_Error( 'RSS_parse_error', __( 'This does not appear to be a RSS 2.0 file, missing/invalid RSS version number.', 'rss-importer' ) ); 66 67 // grab posts 68 foreach ( $xml->channel->item as $item ) { 69 $post = array(); 70 $dc = $item->children( 'http://purl.org/dc/elements/1.1/' ); 71 $content = $item->children( 'http://purl.org/rss/1.0/modules/content/' ); 72 73 $post['post_title'] = (string) $item->title; 74 if( isset( $item->guid ) ) 75 $post['guid'] = (string) $item->guid; 76 77 $published = false; 78 // attempt to find publication date 79 if ( isset( $item->pubDate ) ) 80 $published = strtotime( (string) $item->pubDate ); 81 else if ( $dc->date ) { 82 // if we don't already have something from pubDate 83 $published = preg_replace( '|([-+])([0-9]+):([0-9]+)$|', '\1\2\3', (string) $dc->date ); 84 $published = str_replace( 'T', ' ', $published ); 85 $published = strtotime( $published ); 86 } 87 88 if( $published ) { 89 $post['post_date_gmt'] = gmdate( 'Y-m-d H:i:s', $published ); 90 $post['post_date'] = get_date_from_gmt( $post['post_date_gmt'] ); 91 } 92 93 $categories = array(); 94 foreach ( $item->category as $c ) 95 $categories[] = html_entity_decode( (string) $c ); 96 foreach( $dc->subject as $c ) 97 $categories[] = html_entity_decode( (string) $c ); // CHECK 98 99 // need to handle these categories 100 $post['categories'] = $categories; // need to check this 101 102 // if no content, use the description 103 $post['post_content'] = isset( $content->encoded ) ? (string) $content->encoded : (string) $item->description; 104 $post['post_excerpt'] = isset( $content->encoded ) ? (string) $item->description : ''; 105 106 if( isset( $dc->creator ) ) 107 $post['imported_author_name'] = (string) $dc->creator; // for later 108 109 $posts[] = $post; 110 } 111 112 return $posts; 113 } 114 } 115 116 /** 117 * RSS Parser that makes use of the XML Parser PHP extension. 118 */ 119 class RSS_Parser_XML { 120 function parse( $file ) { 121 $this->cdata = false; 122 $this->posts = array(); 123 $this->post = array(); 124 $this->post['categories'] = array(); 125 $this->in_item = false; 126 $this->rss_version = false; 127 128 $xml = xml_parser_create( 'UTF-8' ); 129 xml_parser_set_option( $xml, XML_OPTION_SKIP_WHITE, 1 ); 130 xml_parser_set_option( $xml, XML_OPTION_CASE_FOLDING, 0 ); 131 xml_set_object( $xml, $this ); 132 xml_set_character_data_handler( $xml, 'cdata' ); 133 xml_set_element_handler( $xml, 'tag_open', 'tag_close' ); 134 135 if ( ! xml_parse( $xml, file_get_contents( $file ), true ) ) { 136 $current_line = xml_get_current_line_number( $xml ); 137 $current_column = xml_get_current_column_number( $xml ); 138 $error_code = xml_get_error_code( $xml ); 139 $error_string = xml_error_string( $error_code ); 140 return new WP_Error( 'XML_parse_error', __( 'There was an error when reading this RSS file.', 'rss-importer' ), array( $current_line, $current_column, $error_string ) ); 141 } 142 xml_parser_free( $xml ); 143 144 if( '2.0' != $this->rss_version ) 145 return new WP_Error( 'RSS_parse_error', __( 'This does not appear to be a RSS 2.0 file, missing/invalid RSS version number.', 'rss-importer' ) ); 146 147 return $this->posts; 148 } 149 150 function tag_open( $parse, $tag, $attr ) { 151 if( 'rss' == $tag ) 152 $this->rss_version = isset( $attr['version'] ) ? $attr['version'] : false; 153 if( 'item' == $tag ) 154 $this->in_item = true; 155 } 156 157 function cdata( $parser, $cdata ) { 158 if ( ! trim( $cdata ) ) 159 return; 160 161 $this->cdata .= trim( $cdata ); 162 } 163 164 function tag_close( $parser, $tag ) { 165 if( ! $this->in_item ) { 166 $this->cdata = false; 167 return; 168 } 169 170 switch ( $tag ) { 171 case 'title': 172 $this->post['post_title'] = $this->cdata; 173 break; 174 case 'guid': 175 $this->post['guid'] = $this->cdata; 176 break; 177 case 'pubDate': 178 case 'dc:date': 179 if( isset( $this->post['post_date'] ) ) 180 break; 181 if( 'pubDate' == $tag ) { 182 $published = strtotime( $this->cdata ); 183 } 184 else { 185 // if we don't already have something from pubDate 186 $published = preg_replace( '|([-+])([0-9]+):([0-9]+)$|', '\1\2\3', $this->cdata ); 187 $published = str_replace( 'T', ' ', $published ); 188 $published = strtotime( $published ); 189 } 190 $this->post['post_date_gmt'] = gmdate( 'Y-m-d H:i:s', $published ); 191 $this->post['post_date'] = get_date_from_gmt( $this->post['post_date_gmt'] ); 192 break; 193 case 'category': 194 $this->post['categories'][] = $this->cdata; 195 break; 196 case 'content:encoded': 197 $this->post['post_content'] = $this->cdata; 198 break; 199 case 'description': 200 $this->post['post_excerpt'] = $this->cdata; 201 break; 202 case 'dc:creator': 203 $this->post['imported_author_name'] = $this->cdata; 204 break; 205 case 'item': 206 // tidy up 207 if( empty( $this->post['post_content'] ) && ! empty( $this->post['post_excerpt'] ) ) { 208 $this->post['post_content'] = $this->post['post_excerpt']; 209 unset( $this->post['post_excerpt'] ); 210 } 211 212 $this->posts[] = $this->post; 213 214 // reset 215 $this->post = array(); 216 $this->post['categories'] = array(); 217 $this->in_item = false; 218 219 break; 220 } 221 222 $this->cdata = false; 223 } 224 } 225 No newline at end of file -
rss-importer.php
5 5 Description: Import posts from an RSS feed. 6 6 Author: wordpressdotorg 7 7 Author URI: http://wordpress.org/ 8 Version: 0. 28 Version: 0.3b 9 9 Stable tag: 0.2 10 10 License: GPL version 2 or later - http://www.gnu.org/licenses/old-licenses/gpl-2.0.html 11 11 */ 12 12 13 if ( ! defined('WP_LOAD_IMPORTERS') )13 if ( ! defined( 'WP_LOAD_IMPORTERS' ) ) 14 14 return; 15 15 16 // use for debug 17 define( 'IMPORT_DEBUG', false ); 18 16 19 // Load Importer API 17 20 require_once ABSPATH . 'wp-admin/includes/import.php'; 18 21 19 if ( ! class_exists( 'WP_Importer' ) ) {22 if ( ! class_exists( 'WP_Importer' ) ) { 20 23 $class_wp_importer = ABSPATH . 'wp-admin/includes/class-wp-importer.php'; 21 24 if ( file_exists( $class_wp_importer ) ) 22 require _once$class_wp_importer;25 require $class_wp_importer; 23 26 } 24 27 28 require dirname( __FILE__ ) . '/parsers.php'; 29 25 30 /** 26 31 * RSS Importer 27 32 * … … 38 43 * 39 44 * @since unknown 40 45 */ 41 if ( class_exists( 'WP_Importer' ) ) {46 if ( class_exists( 'WP_Importer' ) ) : 42 47 class RSS_Import extends WP_Importer { 43 48 var $id; // WXR attachment ID 44 49 var $posts = array (); 45 50 var $file; 46 47 function header() { 48 echo '<div class="wrap">'; 49 screen_icon(); 50 echo '<h2>'.__('Import RSS', 'rss-importer').'</h2>'; 51 } 52 53 function footer() { 54 echo '</div>'; 55 } 56 51 57 52 function greet() { 58 53 echo '<div class="narrow">'; 59 echo '<p>' .__('Howdy! This importer allows you to extract posts from an RSS 2.0 file into your WordPress site. This is useful if you want to import your posts from a system that is not handled by a custom import tool. Pick an RSS file to upload and click Import.', 'rss-importer').'</p>';60 wp_import_upload_form( "admin.php?import=rss&step=1");54 echo '<p>' . __( 'Howdy! This importer allows you to extract posts from an RSS 2.0 file into your WordPress site. This is useful if you want to import your posts from a system that is not handled by a custom import tool. Pick an RSS file to upload and click Import.', 'rss-importer' ) . '</p>'; 55 wp_import_upload_form( 'admin.php?import=rss&step=1' ); 61 56 echo '</div>'; 62 57 } 63 58 64 function _normalize_tag( $matches ) { 65 return '<' . strtolower( $matches[1] ); 66 } 67 68 function get_posts() { 69 global $wpdb; 70 71 set_magic_quotes_runtime(0); 72 $datalines = file($this->file); // Read the file into an array 73 $importdata = implode('', $datalines); // squish it 74 $importdata = str_replace(array ("\r\n", "\r"), "\n", $importdata); 75 76 preg_match_all('|<item>(.*?)</item>|is', $importdata, $this->posts); 77 $this->posts = $this->posts[1]; 78 $index = 0; 79 foreach ($this->posts as $post) { 80 preg_match('|<title>(.*?)</title>|is', $post, $post_title); 81 $post_title = str_replace(array('<![CDATA[', ']]>'), '', $wpdb->escape( trim($post_title[1]) )); 82 83 preg_match('|<pubdate>(.*?)</pubdate>|is', $post, $post_date_gmt); 84 85 if ($post_date_gmt) { 86 $post_date_gmt = strtotime($post_date_gmt[1]); 87 } else { 88 // if we don't already have something from pubDate 89 preg_match('|<dc:date>(.*?)</dc:date>|is', $post, $post_date_gmt); 90 $post_date_gmt = preg_replace('|([-+])([0-9]+):([0-9]+)$|', '\1\2\3', $post_date_gmt[1]); 91 $post_date_gmt = str_replace('T', ' ', $post_date_gmt); 92 $post_date_gmt = strtotime($post_date_gmt); 93 } 94 95 $post_date_gmt = gmdate('Y-m-d H:i:s', $post_date_gmt); 96 $post_date = get_date_from_gmt( $post_date_gmt ); 97 98 preg_match_all('|<category>(.*?)</category>|is', $post, $categories); 99 $categories = $categories[1]; 100 101 if (!$categories) { 102 preg_match_all('|<dc:subject>(.*?)</dc:subject>|is', $post, $categories); 103 $categories = $categories[1]; 104 } 105 106 $cat_index = 0; 107 foreach ($categories as $category) { 108 $categories[$cat_index] = $wpdb->escape( html_entity_decode( $category ) ); 109 $cat_index++; 110 } 111 112 preg_match('|<guid.*?>(.*?)</guid>|is', $post, $guid); 113 if ($guid) 114 $guid = $wpdb->escape(trim($guid[1])); 115 else 116 $guid = ''; 117 118 preg_match('|<content:encoded>(.*?)</content:encoded>|is', $post, $post_content); 119 $post_content = str_replace(array ('<![CDATA[', ']]>'), '', $wpdb->escape(trim($post_content[1]))); 120 121 if (!$post_content) { 122 // This is for feeds that put content in description 123 preg_match('|<description>(.*?)</description>|is', $post, $post_content); 124 $post_content = $wpdb->escape( html_entity_decode( trim( $post_content[1] ) ) ); 125 } 126 127 // Clean up content 128 $post_content = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_content); 129 $post_content = str_replace('<br>', '<br />', $post_content); 130 $post_content = str_replace('<hr>', '<hr />', $post_content); 131 132 $post_author = 1; 133 $post_status = 'publish'; 134 $this->posts[$index] = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_title', 'post_status', 'guid', 'categories'); 135 $index++; 136 } 137 } 138 139 function import_posts() { 59 function import_posts( $posts ) { 60 $authors = array(); // author name => array (post_ids) 61 140 62 echo '<ol>'; 63 foreach ( $posts as $post ) { 64 echo '<li>' . __( 'Importing post...', 'rss-importer' ); 141 65 142 foreach ($this->posts as $post) { 143 echo "<li>".__('Importing post...', 'rss-importer'); 144 145 extract($post); 146 147 if ($post_id = post_exists($post_title, $post_content, $post_date)) { 148 _e('Post already imported', 'rss-importer'); 66 $post['post_author'] = get_current_user_id(); 67 $post['post_status'] = 'publish'; 68 69 if ( $post_id = post_exists( $post['post_title'], $post['post_content'], $post['post_date'] ) ) { 70 _e( 'Post already imported.', 'rss-importer' ); 149 71 } else { 150 $post_id = wp_insert_post($post); 72 $post_id = wp_insert_post( $post ); 73 151 74 if ( is_wp_error( $post_id ) ) 152 75 return $post_id; 153 if (!$post_id) { 154 _e('Couldn’t get post ID', 'rss-importer'); 76 77 if ( ! $post_id ) { 78 _e( 'Couldn’t get post ID', 'rss-importer' ); 155 79 return; 156 80 } 157 81 158 if (0 != count($categories)) 159 wp_create_categories($categories, $post_id); 82 if ( ! empty( $post['categories'] ) ) 83 wp_create_categories( $post['categories'], $post_id ); 84 160 85 _e('Done!', 'rss-importer'); 161 86 } 87 88 if ( isset( $post['imported_author_name'] ) ) { 89 $n = $post['imported_author_name']; 90 if ( isset( $authors[$n] ) ) 91 $authors[$n][] = (int) $post_id; 92 else 93 $authors[$n] = array( (int) $post_id ); 94 } 95 162 96 echo '</li>'; 163 97 } 164 98 165 99 echo '</ol>'; 166 100 101 if( ! empty( $authors ) ) 102 update_option( 'rss_importer', $authors ); 167 103 } 168 104 169 function import() { 105 /** 106 * Handles the WXR upload and initial parsing of the file to prepare for 107 * displaying author import options 108 * 109 * @return bool False if error uploading or invalid file, true otherwise 110 */ 111 function handle_upload() { 112 check_admin_referer( 'import-upload' ); 170 113 $file = wp_import_handle_upload(); 171 if ( isset($file['error']) ) { 172 echo $file['error']; 173 return; 114 115 if ( isset( $file['error'] ) ) { 116 echo '<p><strong>' . __( 'Sorry, there has been an error.', 'rss-importer' ) . '</strong><br />'; 117 echo esc_html( $file['error'] ) . '</p>'; 118 return false; 174 119 } 175 120 176 $this->file = $file['file']; 177 $this->get_posts(); 178 $result = $this->import_posts(); 121 $parser = new RSS_Parser(); 122 $posts = $parser->parse( $file['file'] ); 123 124 if ( is_wp_error( $posts ) ) { 125 echo '<p><strong>' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '</strong><br />' . esc_html( $posts->get_error_message() ) . '</p>'; 126 return false; 127 } 128 129 $result = $this->import_posts( $posts ); 179 130 if ( is_wp_error( $result ) ) 180 131 return $result; 181 wp_import_cleanup($file['id']); 182 do_action('import_done', 'rss'); 132 133 wp_import_cleanup( $file['id'] ); 134 do_action( 'import_done', 'rss' ); 183 135 184 echo '< h3>';185 printf(__('All done. <a href="%s">Have fun!</a>', 'rss-importer'), get_option('home'));186 echo '</h3>';136 echo '<p>' . __('Import complete.', 'rss-importer') . '</p>'; 137 138 $this->get_author_form(); 187 139 } 188 140 141 /** 142 * Registered callback function for the WordPress Importer 143 * 144 * Manages the three separate stages of the import process 145 */ 189 146 function dispatch() { 190 if (empty ($_GET['step'])) 191 $step = 0; 192 else 193 $step = (int) $_GET['step']; 147 echo '<div class="wrap">'; 148 screen_icon(); 149 echo '<h2>'.__('Import RSS', 'rss-importer').'</h2>'; 194 150 195 $this->header(); 196 197 switch ($step) { 198 case 0 : 151 $step = empty( $_GET['step'] ) ? 0 : (int) $_GET['step']; 152 switch ( $step ) { 153 case 0: 199 154 $this->greet(); 200 155 break; 201 case 1 : 202 check_admin_referer('import-upload'); 203 $result = $this->import(); 204 if ( is_wp_error( $result ) ) 205 echo $result->get_error_message(); 156 case 1: 157 158 $this->handle_upload(); 206 159 break; 160 case 2: 161 $this->update_authors(); 162 break; 207 163 } 208 164 209 $this->footer();165 echo '</div>'; 210 166 } 211 167 212 function RSS_Import() { 213 // Nothing. 168 /* 169 * Allows the user to assign authors to posts after import 170 */ 171 function get_author_form() { 172 $authors = get_option( 'rss_importer', array() ); 173 if( empty( $authors ) ) 174 return; 175 176 $directions = __( 'All posts were imported with the current user as author. Wordpress detected the following author names in the imported posts. Use this form to assign each imported post to a different WordPress user.', 'rss-importer' ); 177 $heading = __( 'Author mapping', 'rss-importer' ); 178 $mapthis = __( 'Map this name', 'rss-importer' ); 179 $tothis = __( 'To this Wordpress user', 'rss-importer' ); 180 $submit = esc_html( __( 'Save Changes', 'rss-importer' ) ); 181 182 $rows= ''; 183 $options = ''; 184 185 $blog_users = get_users( array( 'blog_id' => get_current_blog_id() ) ) ; 186 foreach ( $blog_users as $user ) { 187 $sel = selected( $user->ID, get_current_user_id(), false ); 188 $options .= "<option value='$user->ID'$sel>$user->display_name</option>"; 189 } 190 191 foreach ( array_keys( $authors ) as $author ) { 192 $a = esc_html( $author ); 193 $rows .= "<tr><td><label for='author-$a'>$a</label></td><td><select name='authors[$a]' id='author-$a'>" . $options . "</select></td></tr>"; 194 } 195 196 echo '<style type="text/css">#rss_import_authors th, #rss_import_authors td {padding: 3px 10px} </style>'; 197 echo "<div class='wrap'><h2>$heading</h2><p>$directions</p><form action='index.php?import=rss&step=2' method='post'>"; 198 wp_nonce_field( 'import-rss' ); 199 echo "<table id='rss_import_authors'><thead><th>$mapthis</th><th>$tothis</th></thead><tbody>$rows</tbody></table><input type='submit' class='button primary' value='$submit' /></form></div>"; 214 200 } 201 202 function update_authors() { 203 check_admin_referer( 'import-rss' ); 204 global $wpdb; 205 206 $authors = get_option( 'rss_importer', array() ); 207 if( empty( $_POST['authors'] ) || empty( $authors ) ) 208 return; 209 210 foreach( $_POST['authors'] as $imported_name => $user_id_to_assign ) { 211 $post_ids = implode( ', ', $authors[$imported_name] ); 212 $result = $wpdb->query( $wpdb->prepare("UPDATE $wpdb->posts SET post_author = %d WHERE ID IN ($post_ids)", $user_id_to_assign ) ); 213 } 214 215 if ( false !== $result ) 216 echo '<p>' . sprintf( __( 'Post authors updated. All done!' ) . ' <a href="' . trailingslashit( get_bloginfo( 'url' ) ) . '">%s</a>', __( 'Have fun!' ) ) . '</p>'; 217 else 218 echo '<p>' . __( 'An error occurred while trying to reassing post authors. Please try doing it manually.') . '</p>'; 219 } 215 220 } 216 221 217 222 $rss_import = new RSS_Import(); 218 223 219 register_importer( 'rss', __('RSS', 'rss-importer'), __('Import posts from an RSS feed.', 'rss-importer'), array ($rss_import, 'dispatch'));224 register_importer( 'rss', __( 'RSS', 'rss-importer' ), __( 'Import posts from an RSS feed.', 'rss-importer' ), array ( $rss_import, 'dispatch' ) ); 220 225 221 }// class_exists( 'WP_Importer' )226 endif; // class_exists( 'WP_Importer' ) 222 227 223 228 function rss_importer_init() { 224 229 load_plugin_textdomain( 'rss-importer', false, dirname( plugin_basename( __FILE__ ) ) . '/languages' );