| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | //Xanga archive importer by Jeremy Jay |
|---|
| 4 | //Borrows heavily from the LiveJournal import script. |
|---|
| 5 | // |
|---|
| 6 | //Modified for current WP (2.1+) release and for current Xanga archive style by Daniel Kozlowski |
|---|
| 7 | // |
|---|
| 8 | //ATTN: unhtmlentities() has been disabled due to it's turning all ". " into ".? " |
|---|
| 9 | // once the post was imported into wordpress 2.1. Thus, it has been disabled. If |
|---|
| 10 | // you're using a version of PHP earlier than 4.3, you'll need to un-comment |
|---|
| 11 | // calls to unhtmlentities() in this script. They are on lines 82 and 120. |
|---|
| 12 | // |
|---|
| 13 | //ATTN: I've run into some Xanga archives where the timestamp for a post or comment is |
|---|
| 14 | // missing a zero. For example, "12:08" is printed as "12:8". This script |
|---|
| 15 | // cannot read those! Please read through your code if some of the times are |
|---|
| 16 | // coming up weird once you've imported the archive files. |
|---|
| 17 | |
|---|
| 18 | |
|---|
| 19 | class Xanga_Import { |
|---|
| 20 | |
|---|
| 21 | var $file; |
|---|
| 22 | |
|---|
| 23 | function header() { |
|---|
| 24 | echo '<div class="wrap">'; |
|---|
| 25 | echo '<h2>'.__('Import Xanga').'</h2>'; |
|---|
| 26 | } |
|---|
| 27 | |
|---|
| 28 | function footer() { |
|---|
| 29 | echo '</div>'; |
|---|
| 30 | } |
|---|
| 31 | |
|---|
| 32 | function unhtmlentities($string) { // From php.net for < 4.3 compatability |
|---|
| 33 | $trans_tbl = get_html_translation_table(HTML_ENTITIES); |
|---|
| 34 | $trans_tbl = array_flip($trans_tbl); |
|---|
| 35 | return strtr($string, $trans_tbl); |
|---|
| 36 | } |
|---|
| 37 | |
|---|
| 38 | function greet() { |
|---|
| 39 | echo '<p>'.__('Howdy! This importer allows you to extract posts and comments from Xanga Premium Archive files into your blog. If you do not have Premium but have enough posts to be looking at this, just pay $4 for a month to get the archive and you will at least be supporting Xanga for all the hosting they have done for you. Pick an archive file to upload and click Import.').'</p>'; |
|---|
| 40 | wp_import_upload_form("admin.php?import=xanga&step=1"); |
|---|
| 41 | } |
|---|
| 42 | |
|---|
| 43 | function import_posts() { |
|---|
| 44 | global $wpdb, $current_user; |
|---|
| 45 | |
|---|
| 46 | set_magic_quotes_runtime(0); |
|---|
| 47 | $importdata = file($this->file); // Read the file into an array |
|---|
| 48 | $importdata = implode('', $importdata); // squish it |
|---|
| 49 | $importdata = str_replace(array ("\r\n", "\r"), "\n", $importdata); |
|---|
| 50 | |
|---|
| 51 | preg_match_all('|<div class="blogheader">(.*?)<hr size=1 noshade>(<div class="blogheader">)*?|is', $importdata, $posts); |
|---|
| 52 | $posts = $posts[1]; |
|---|
| 53 | unset($importdata); |
|---|
| 54 | echo '<ol>'; |
|---|
| 55 | |
|---|
| 56 | foreach ($posts as $post) { |
|---|
| 57 | flush(); |
|---|
| 58 | preg_match('|^(.*?)</div>|is', $post, $post_title); |
|---|
| 59 | $post_title = $wpdb->escape(trim($post_title[1])); |
|---|
| 60 | |
|---|
| 61 | preg_match('/<div class="smalltext">Posted (.*?)\/(.*?)\/(.*?) at (.*?) (.*?)<\/div>/is', $post, $match); |
|---|
| 62 | list($hour,$min) = explode(':',$match[4]); |
|---|
| 63 | switch($match[5]) { |
|---|
| 64 | case 'AM' : if($hour == 12) $hour = 0; break; |
|---|
| 65 | case 'PM': if($hour < 12) $hour += 12; break; |
|---|
| 66 | } |
|---|
| 67 | $match[4] = "$hour:$min"; |
|---|
| 68 | $post_date = "$match[3]-$match[1]-$match[2] $match[4]:00"; |
|---|
| 69 | echo "Date: $post_date"; |
|---|
| 70 | |
|---|
| 71 | $com = split(' ', $post_title); |
|---|
| 72 | |
|---|
| 73 | if( $com[1]=='Comments' ) { |
|---|
| 74 | preg_match_all('|<div class="ctextfooterwrap"><div class="ctext">(.*?)</div></div>|is', $post, $comments); |
|---|
| 75 | $comments = $comments[1]; |
|---|
| 76 | |
|---|
| 77 | $comment_post_ID = $post_id; |
|---|
| 78 | $num_comments = 0; |
|---|
| 79 | foreach ($comments as $comment) { |
|---|
| 80 | preg_match('|^(.*?)</div><div class="cfooter">|is', $comment, $comment_content); |
|---|
| 81 | $comment_content = str_replace(array ('<![CDATA[', ']]>'), '', trim($comment_content[1])); |
|---|
| 82 | //$comment_content = $this->unhtmlentities($comment_content); |
|---|
| 83 | |
|---|
| 84 | // Clean up content |
|---|
| 85 | $comment_content = preg_replace('|<(/?[A-Z]+)|e', "'<' . strtolower('$1')", $comment_content); |
|---|
| 86 | $comment_content = str_replace('<br>', '<br />', $comment_content); |
|---|
| 87 | $comment_content = str_replace('<hr>', '<hr />', $comment_content); |
|---|
| 88 | $comment_content = $wpdb->escape($comment_content); |
|---|
| 89 | |
|---|
| 90 | preg_match('/<div class="cfooter">Posted (.*?)\/(.*?)\/(.*?) at (.*?) (.*?) by/i', $comment, $match); |
|---|
| 91 | list($hour,$min) = explode(':',$match[4]); |
|---|
| 92 | switch($match[5]) { |
|---|
| 93 | case 'AM' : if($hour == 12) $hour = 0; break; |
|---|
| 94 | case 'PM': if($hour < 12) $hour += 12; break; |
|---|
| 95 | } |
|---|
| 96 | $match[4] = "$hour:$min"; |
|---|
| 97 | $comment_date = "$match[3]-$match[1]-$match[2] $match[4]:00"; |
|---|
| 98 | |
|---|
| 99 | preg_match('|<a href="http://www\.xanga\.com/home\.aspx\?user=(.*?)">(.*?)</a>|i', $comment, $comment_author); |
|---|
| 100 | $comment_author = $wpdb->escape(trim($comment_author[1])); |
|---|
| 101 | $comment_author_url = 'http://www.xanga.com/home.aspx?user='.$comment_author; |
|---|
| 102 | |
|---|
| 103 | $comment_approved = 1; |
|---|
| 104 | // Check if it's already there |
|---|
| 105 | if (!comment_exists($comment_author, $comment_date)) { |
|---|
| 106 | $commentdata = compact('comment_post_ID', 'comment_author', 'comment_author_url', 'comment_date', 'comment_content', 'comment_approved'); |
|---|
| 107 | $commentdata = wp_filter_comment($commentdata); |
|---|
| 108 | wp_insert_comment($commentdata); |
|---|
| 109 | $num_comments++; |
|---|
| 110 | } |
|---|
| 111 | } |
|---|
| 112 | if ( $num_comments ) { |
|---|
| 113 | echo ' '; |
|---|
| 114 | printf(__('(%s comments)'), $num_comments); |
|---|
| 115 | } |
|---|
| 116 | } else { |
|---|
| 117 | |
|---|
| 118 | preg_match('|<td style="padding-left:20; padding-bottom:20">(.*?)<div class="smalltext">Posted (\d{1,2}/\d{1,2}/\d{4}) at (.*?)<\/div>|is', $post, $post_content); |
|---|
| 119 | $post_content = str_replace(array ('<![CDATA[', ']]>'), '', trim($post_content[1])); |
|---|
| 120 | //$post_content = $this->unhtmlentities($post_content); |
|---|
| 121 | |
|---|
| 122 | // Clean up content |
|---|
| 123 | $post_content = preg_replace('|<(/?[A-Z]+)|e', "'<' . strtolower('$1')", $post_content); |
|---|
| 124 | $post_content = str_replace('<br>', '<br />', $post_content); |
|---|
| 125 | $post_content = str_replace('<hr>', '<hr />', $post_content); |
|---|
| 126 | |
|---|
| 127 | //Xanga archives have some pretty crappy formatting, so this reduces the string to a single line. |
|---|
| 128 | //THIS WILL NOT REMOVE YOUR OWN FORMATTING. Any formatting changes you created in your posts |
|---|
| 129 | //are tagged, and thus will not be affected by the removal. |
|---|
| 130 | $post_content = str_replace("\n", " ", $post_content); |
|---|
| 131 | $post_content = $wpdb->escape($post_content); |
|---|
| 132 | |
|---|
| 133 | $post_author = $current_user->ID; |
|---|
| 134 | $post_status = 'publish'; |
|---|
| 135 | |
|---|
| 136 | echo '<li>'; |
|---|
| 137 | if ($post_id = post_exists($post_title, $post_content, $post_date)) { |
|---|
| 138 | printf(__('Post <i>%s</i> already exists.'), stripslashes($post_title)); |
|---|
| 139 | } else { |
|---|
| 140 | printf(__('Importing post <i>%s</i>...'), stripslashes($post_title)); |
|---|
| 141 | $postdata = compact('post_author', 'post_date', 'post_content', 'post_title', 'post_status'); |
|---|
| 142 | $post_id = wp_insert_post($postdata); |
|---|
| 143 | if (!$post_id) { |
|---|
| 144 | _e("Couldn't get post ID"); |
|---|
| 145 | echo '</li>'; |
|---|
| 146 | break; |
|---|
| 147 | } |
|---|
| 148 | } |
|---|
| 149 | } |
|---|
| 150 | |
|---|
| 151 | echo '</li>'; |
|---|
| 152 | flush(); |
|---|
| 153 | ob_flush(); |
|---|
| 154 | } |
|---|
| 155 | echo '</ol>'; |
|---|
| 156 | } |
|---|
| 157 | |
|---|
| 158 | function import() { |
|---|
| 159 | $file = wp_import_handle_upload(); |
|---|
| 160 | if ( isset($file['error']) ) { |
|---|
| 161 | echo $file['error']; |
|---|
| 162 | return; |
|---|
| 163 | } |
|---|
| 164 | |
|---|
| 165 | $this->file = $file['file']; |
|---|
| 166 | $this->import_posts(); |
|---|
| 167 | wp_import_cleanup($file['id']); |
|---|
| 168 | |
|---|
| 169 | echo '<h3>'; |
|---|
| 170 | printf(__('All done. <a href="%s">Have fun!</a>'), get_option('home')); |
|---|
| 171 | echo '</h3>'; |
|---|
| 172 | } |
|---|
| 173 | |
|---|
| 174 | function dispatch() { |
|---|
| 175 | if (empty ($_GET['step'])) |
|---|
| 176 | $step = 0; |
|---|
| 177 | else |
|---|
| 178 | $step = (int) $_GET['step']; |
|---|
| 179 | |
|---|
| 180 | $this->header(); |
|---|
| 181 | |
|---|
| 182 | switch ($step) { |
|---|
| 183 | case 0 : |
|---|
| 184 | $this->greet(); |
|---|
| 185 | break; |
|---|
| 186 | case 1 : |
|---|
| 187 | $this->import(); |
|---|
| 188 | break; |
|---|
| 189 | } |
|---|
| 190 | |
|---|
| 191 | $this->footer(); |
|---|
| 192 | } |
|---|
| 193 | |
|---|
| 194 | function Xanga_Import() { |
|---|
| 195 | // Nothing. |
|---|
| 196 | } |
|---|
| 197 | } |
|---|
| 198 | |
|---|
| 199 | $xanga_import = new Xanga_Import(); |
|---|
| 200 | |
|---|
| 201 | register_importer('xanga', 'Xanga', __('Import posts from Xanga Archives'), array ($xanga_import, 'dispatch')); |
|---|
| 202 | ?> |
|---|
| 203 | |
|---|