* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; version 2 or later of * the licence. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA *******************************************************************/ // Delete any nid > x - When debugging, this lets you run this script over and over again. // Disable: -1 define("WP_CLEAN_NID", 23); // Debug a particular post? // Disable: -1 define("WP_DEBUG_POST", -1); // Node type for posts: define("WP_POST_TYPE", 'post'); // Encoding define("WP_ENCODING", 'UTF-8'); // Allow comments? 0: No, 1: Read-only, 2: Yes define("WP_COMMENTS", 2); // Teaser Size: -1 = Infinite define("WP_TEASER_LEN", -1); // Drupal User ID define("WP_UID", 1); // Wordpress User ID define("WP_WP_UID", 1); // Username define("WP_USER", 'tumbleweed'); // Post Input Format define("WP_POST_IF", 4); // Comment Input Format define("WP_COM_IF", 1); // Pingback Input Format define("WP_COM_PING_IF", 5); // Characters to trim off the start of URLs i.e. http://tumbleweed.org.za/ = 25 define("WP_URL_TRIM", 25); // Taxonomy id for the tags taxonomy (we only use tags) define("WP_TAGS_TAX_ID", 1); // Comment Subject length (taken from body) define("WP_COM_SUM_LEN", 30); // Files path: Use a trailing slash, but not preceding define("WP_FILEPATH", 'files/'); require_once './includes/bootstrap.inc'; drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL); header("Content-Type: text/plain"); // Clean up on multiple runs if (WP_CLEAN_NID > -1) { $r = db_query('SELECT nid FROM {node} WHERE nid > %d', WP_CLEAN_NID); while($row = db_fetch_object($r)) { node_delete($row->nid); } } // Iterate over posts $r = db_query('SELECT ID, post_status, post_title, post_content, guid, ' . 'unix_timestamp(post_date_gmt) AS post_date_gmt, ' . 'unix_timestamp(post_modified_gmt) as post_modified_gmt ' . 'FROM wp_posts WHERE post_type="post" ' . (WP_DEBUG_POST == -1 ? '' : 'AND ID = ' . WP_DEBUG_POST . ' ') . 'ORDER BY post_date_gmt'); while($row = db_fetch_object($r)) { $wp_id = $row->ID; $p = array( 'type' => WP_POST_TYPE, 'created' => $row->post_date_gmt, 'changed' => $row->post_modified_gmt, 'revision_timestmap' => $row->post_modified_gmt, 'status' => $row->post_status == 'draft' ? 0 : 1, 'comment' => WP_COMMENTS, 'promote' => 1, 'sticky' => 0, 'title' => drupal_convert_to_utf8($row->post_title, WP_ENCODING), 'body' => drupal_convert_to_utf8($row->post_content, WP_ENCODING), 'format' => WP_POST_IF, 'uid' => WP_UID, 'name' => WP_USER, 'path' => drupal_substr(drupal_convert_to_utf8($row->guid, WP_ENCODING), WP_URL_TRIM, -1), ); $post = (object) $p; // Hack - my wordpress seems to have broken encoding if (strpos($post->body, 'Â') > 0) { print ("Bad encoding detected: {$post->path}\n"); $post->body = str_replace('Â', '', $post->body); } $post->teaser = $post->body; // Trim teaser? if (WP_TEASER_LEN > -1) { $post->teaser = drupal_substr($post->body, 0, WP_TEASER_LEN); } node_save($post); $nid = $post->nid; // Tags: $taxr = db_query('SELECT slug FROM wp_terms NATURAL JOIN wp_term_taxonomy NATURAL JOIN wp_term_relationships WHERE object_id = %d', $wp_id); $tags = ''; while($tax = db_fetch_object($taxr)) { $tags .= drupal_convert_to_utf8($tax->slug, WP_ENCODING) . ', '; } if ('' != $tags) { $tags = drupal_substr($tags, 0, -2); taxonomy_node_save($nid, array('tags' => array(WP_TAGS_TAX_ID => $tags))); } // Comments & Pingbacks: $comr = db_query('SELECT comment_content, comment_author_IP, comment_author, comment_author_email, comment_author_url, user_id, ' . 'unix_timestamp(comment_date_gmt) AS comment_date_gmt, comment_type ' . 'FROM wp_comments WHERE comment_post_ID = %d ORDER BY comment_date_gmt', $wp_id); while($com = db_fetch_object($comr)) { $max = db_result(db_query('SELECT MAX(thread) FROM {comments} WHERE nid = %d', $nid)); $max = rtrim($max, '/'); $c = array( 'nid' => $nid, 'cid' => db_next_id('{comments}_cid'), 'pid' => 0, 'uid' => $com->user_id == WP_WP_UID ? WP_UID : 0, 'subject' => $com->comment_type == 'pingback' ? 'Pingback' : drupal_substr(drupal_convert_to_utf8(strip_tags($com->comment_content), WP_ENCODING), 0, WP_COM_SUM_LEN), 'comment' => drupal_convert_to_utf8($com->comment_content, WP_ENCODING), 'hostname' => drupal_convert_to_utf8($com->comment_author_IP, WP_ENCODING), 'timestamp' => $com->comment_date_gmt, 'format' => $com->comment_type == 'pingback' ? WP_COM_PING_IF : WP_COM_IF, 'name' => decode_entities(drupal_convert_to_utf8($com->comment_author, WP_ENCODING)), 'mail' => drupal_convert_to_utf8($com->comment_author_email, WP_ENCODING), 'homepage' => drupal_convert_to_utf8($com->comment_author_url, WP_ENCODING), 'thread' => int2vancode(vancode2int($max) + 1) .'/', 'score' => 0, 'users' => serialize(array(0 => 0)), ); db_query("INSERT INTO {comments} (cid, nid, pid, uid, subject, comment, format, hostname, " . "timestamp, status, score, users, thread, name, mail, homepage) VALUES " . "(%d, %d, %d, %d, '%s', '%s', %d, '%s', %d, %d, %d, '%s', '%s', '%s', '%s', '%s')", $c['cid'], $c['nid'], $c['pid'], $c['uid'], $c['subject'], $c['comment'], $c['format'], $c['hostname'], $c['timestamp'], $c['status'], $c['score'], $c['users'], $c['thread'], $c['name'], $c['mail'], $c['homepage']); comment_invoke_comment($c, 'insert'); watchdog('content', t('Comment: added %subject.', array('%subject' => $c['subject'])), WATCHDOG_NOTICE, l(t('view'), 'node/'. $c['nid'], NULL, NULL, 'comment-'. $c['cid'])); } _comment_update_node_statistics($nid); // Attachments: $filer = db_query('SELECT post_content, post_mime_type, guid FROM wp_posts WHERE post_type="attachment" AND post_parent=%d ORDER BY post_date_gmt', $wp_id); while($file = db_fetch_object($filer)) { $name = upload_munge_filename(drupal_convert_to_utf8(basename($file->guid), WP_ENCODING)); $f = array( 'fid' => db_next_id('{files}_fid'), 'nid' => $nid, 'filename' => $name, 'filepath' => WP_FILEPATH . $name, 'filemime' => drupal_convert_to_utf8($file->post_mime_type, WP_ENCODING), 'filesize' => 0, 'description' => drupal_convert_to_utf8($file->post_content, WP_ENCODING), 'list' => 0, ); $f = (object) $f; // Increment filename? while (0 < db_num_rows(db_query("SELECT fid FROM {files} WHERE filename = '%s'", $name))) { $m = array(); if (preg_match('/^(.+)-(\d+)\.([^.]+)$/', $name, $m)) { $name = $m[1] . '-' . (1 + $m[2]) . '.' . $m[3]; } elseif (preg_match('/^(.+)\.([^.]+)$/', $name, $m)) { $name = $m[1] . '-1.' . $m[2]; } else { print("Unable to increment filename: $name"); continue; } } $f->filename = $name; $f->filepath = WP_FILEPATH . $name; // Download: copy($file->guid, $f->filepath) || die ("Unable to download " . $file->guid); $f->filesize = filesize($f->filepath); $node = node_load($nid); db_query("INSERT INTO {files} (fid, nid, filename, filepath, filemime, filesize) VALUES (%d, %d, '%s', '%s', '%s', %d)", $f->fid, $f->nid, $f->filename, $f->filepath, $f->filemime, $f->filesize); db_query("INSERT INTO {file_revisions} (fid, vid, list, description) VALUES (%d, %d, %d, '%s')", $f->fid, $node->vid, $f->list, $f->description); $node->files[$fid] = $f; $node->body = str_replace($file->guid, '/' . $f->filepath, $node->body); $node->teaser = $node->body; // Trim teaser? if (WP_TEASER_LEN > -1) { $node->teaser = drupal_substr($node->body, 0, WP_TEASER_LEN); } node_save($node); print("Contains attachments: {$node->path}\n"); } }