Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'mysql2'
- require File.expand_path(File.dirname(__FILE__) + "/base.rb")
- require 'htmlentities'
- class ImportScripts::VBulletin < ImportScripts::Base
- BATCH_SIZE = 1000
- DBPREFIX = "vb_"
- ROOT_NODE=2
- # CHANGE THESE BEFORE RUNNING THE IMPORTER
- DATABASE = "vb4"
- TIMEZONE = "Australia/Sydney"
- ATTACHMENT_DIR = '/vagrant/attachments'
- AVATAR_DIR = '/vagrant/avatars'
- def initialize
- super
- @old_username_to_new_usernames = {}
- @tz = TZInfo::Timezone.get(TIMEZONE)
- @htmlentities = HTMLEntities.new
- @client = Mysql2::Client.new(
- host: "localhost",
- username: "root",
- database: DATABASE,
- password: "root"
- )
- end
- def execute
- import_groups
- import_users
- import_categories
- import_topics
- import_posts
- import_attachments
- close_topics
- post_process_posts
- end
- def import_groups
- puts "", "importing groups..."
- groups = mysql_query <<-SQL
- SELECT usergroupid, title
- FROM #{DBPREFIX}usergroup
- ORDER BY usergroupid
- SQL
- create_groups(groups) do |group|
- {
- id: group["usergroupid"],
- name: @htmlentities.decode(group["title"]).strip
- }
- end
- end
- def import_users
- puts "", "importing users"
- user_count = mysql_query("SELECT COUNT(userid) count FROM #{DBPREFIX}user").first["count"]
- batches(BATCH_SIZE) do |offset|
- users = mysql_query <<-SQL
- SELECT u.userid, u.username, u.homepage, u.usertitle, u.usergroupid, u.joindate, u.email,
- CASE WHEN u.scheme='blowfish:10' THEN token
- WHEN u.scheme='legacy' THEN REPLACE(token, ' ', ':')
- END AS password,
- IF(ug.title = 'Administrators', 1, 0) AS admin
- FROM #{DBPREFIX}user u
- LEFT JOIN #{DBPREFIX}usergroup ug ON ug.usergroupid = u.usergroupid
- ORDER BY userid
- LIMIT #{BATCH_SIZE}
- OFFSET #{offset}
- SQL
- break if users.size < 1
- # disabled line below, caused issues
- # next if all_records_exist? :users, users.map {|u| u["userid"].to_i}
- create_users(users, total: user_count, offset: offset) do |user|
- username = @htmlentities.decode(user["username"]).strip
- {
- id: user["userid"],
- name: username,
- username: username,
- email: user["email"].presence || fake_email,
- admin: user['admin'] == 1,
- password: user["password"],
- website: user["homepage"].strip,
- title: @htmlentities.decode(user["usertitle"]).strip,
- primary_group_id: group_id_from_imported_group_id(user["usergroupid"]),
- created_at: parse_timestamp(user["joindate"]),
- post_create_action: proc do |u|
- @old_username_to_new_usernames[user["username"]] = u.username
- import_profile_picture(user, u)
- import_profile_background(user, u)
- end
- }
- end
- end
- end
- def import_profile_picture(old_user, imported_user)
- query = mysql_query <<-SQL
- SELECT filedata, filename
- FROM #{DBPREFIX}customavatar
- WHERE userid = #{old_user["userid"]}
- ORDER BY dateline DESC
- LIMIT 1
- SQL
- picture = query.first
- return if picture.nil?
- if picture['filedata']
- file = Tempfile.new("profile-picture")
- file.write(picture["filedata"].encode("ASCII-8BIT").force_encoding("UTF-8"))
- file.rewind
- upload = Upload.create_for(imported_user.id, file, picture["filename"], file.size)
- else
- filename = File.join(AVATAR_DIR, picture['filename'])
- unless File.exists?(filename)
- puts "Avatar file doesn't exist: #{filename}"
- return nil
- end
- upload = create_upload(imported_user.id, filename, picture['filename'])
- end
- return if !upload.persisted?
- imported_user.create_user_avatar
- imported_user.user_avatar.update(custom_upload_id: upload.id)
- imported_user.update(uploaded_avatar_id: upload.id)
- ensure
- file.close rescue nil
- file.unlind rescue nil
- end
- def import_profile_background(old_user, imported_user)
- query = mysql_query <<-SQL
- SELECT filedata, filename
- FROM #{DBPREFIX}customprofilepic
- WHERE userid = #{old_user["userid"]}
- ORDER BY dateline DESC
- LIMIT 1
- SQL
- background = query.first
- return if background.nil?
- file = Tempfile.new("profile-background")
- file.write(background["filedata"].encode("ASCII-8BIT").force_encoding("UTF-8"))
- file.rewind
- upload = Upload.create_for(imported_user.id, file, background["filename"], file.size)
- return if !upload.persisted?
- imported_user.user_profile.update(profile_background: upload.url)
- ensure
- file.close rescue nil
- file.unlink rescue nil
- end
- def import_categories
- puts "", "importing top level categories..."
- categories = mysql_query("SELECT nodeid AS forumid, title, description, displayorder, parentid
- FROM #{DBPREFIX}node
- WHERE parentid=#{ROOT_NODE}
- UNION
- SELECT nodeid, title, description, displayorder, parentid
- FROM #{DBPREFIX}node
- WHERE contenttypeid = 20
- AND parentid IN (SELECT nodeid FROM #{DBPREFIX}node WHERE parentid=#{ROOT_NODE})").to_a
- top_level_categories = categories.select { |c| c["parentid"] == ROOT_NODE }
- create_categories(top_level_categories) do |category|
- {
- id: category["forumid"],
- name: @htmlentities.decode(category["title"]).strip,
- position: category["displayorder"],
- description: @htmlentities.decode(category["description"]).strip
- }
- end
- puts "", "importing child categories..."
- children_categories = categories.select { |c| c["parentid"] != ROOT_NODE }
- top_level_category_ids = Set.new(top_level_categories.map { |c| c["forumid"] })
- # cut down the tree to only 2 levels of categories
- children_categories.each do |cc|
- while !top_level_category_ids.include?(cc["parentid"])
- cc["parentid"] = categories.detect { |c| c["forumid"] == cc["parentid"] }["parentid"]
- end
- end
- create_categories(children_categories) do |category|
- {
- id: category["forumid"],
- name: @htmlentities.decode(category["title"]).strip,
- position: category["displayorder"],
- description: @htmlentities.decode(category["description"]).strip,
- parent_category_id: category_id_from_imported_category_id(category["parentid"])
- }
- end
- end
- def import_topics
- puts "", "importing topics..."
- # keep track of closed topics
- @closed_topic_ids = []
- topic_count = mysql_query("select count(nodeid) cnt from #{DBPREFIX}node where parentid in (
- select nodeid from #{DBPREFIX}node where contenttypeid=20 ) and contenttypeid=24;").first["cnt"]
- batches(BATCH_SIZE) do |offset|
- topics = mysql_query <<-SQL
- SELECT t.nodeid AS threadid, t.title, t.parentid AS forumid,t.open,t.userid AS postuserid,t.publishdate AS dateline,
- nv.count views, 1 AS visible, t.sticky,
- CONVERT(CAST(rawtext AS BINARY)USING utf8) AS raw
- FROM #{DBPREFIX}node t
- LEFT JOIN #{DBPREFIX}nodeview nv ON nv.nodeid=t.nodeid
- LEFT JOIN #{DBPREFIX}text txt ON txt.nodeid=t.nodeid
- WHERE t.parentid in ( select nodeid from #{DBPREFIX}node where contenttypeid=20 )
- AND t.contenttypeid = 24
- ORDER BY t.nodeid
- LIMIT #{BATCH_SIZE}
- OFFSET #{offset}
- SQL
- break if topics.size < 1
- # disabled line below, caused issues
- # next if all_records_exist? :posts, topics.map {|t| "thread-#{topic["threadid"]}" }
- create_posts(topics, total: topic_count, offset: offset) do |topic|
- raw = preprocess_post_raw(topic["raw"]) rescue nil
- next if raw.blank?
- topic_id = "thread-#{topic["threadid"]}"
- @closed_topic_ids << topic_id if topic["open"] == "0"
- t = {
- id: topic_id,
- user_id: user_id_from_imported_user_id(topic["postuserid"]) || Discourse::SYSTEM_USER_ID,
- title: @htmlentities.decode(topic["title"]).strip[0...255],
- category: category_id_from_imported_category_id(topic["forumid"]),
- raw: raw,
- created_at: parse_timestamp(topic["dateline"]),
- visible: topic["visible"].to_i == 1,
- views: topic["views"],
- }
- t[:pinned_at] = t[:created_at] if topic["sticky"].to_i == 1
- t
- end
- end
- end
- def import_posts
- puts "", "importing posts..."
- # make sure `firstpostid` is indexed
- begin
- mysql_query("CREATE INDEX firstpostid_index ON thread (firstpostid)")
- rescue
- end
- post_count = mysql_query("SELECT COUNT(nodeid) cnt FROM #{DBPREFIX}node WHERE parentid NOT IN (
- SELECT nodeid FROM #{DBPREFIX}node WHERE contenttypeid=23 ) AND contenttypeid=20;").first["cnt"]
- batches(BATCH_SIZE) do |offset|
- posts = mysql_query <<-SQL
- SELECT p.nodeid AS postid, p.userid AS userid, p.parentid AS threadid,
- CONVERT(CAST(rawtext AS BINARY)USING utf8) AS raw, p.publishdate AS dateline,
- 1 AS visible, p.parentid AS parentid
- FROM #{DBPREFIX}node p
- LEFT JOIN #{DBPREFIX}nodeview nv ON nv.nodeid=p.nodeid
- LEFT JOIN #{DBPREFIX}text txt ON txt.nodeid=p.nodeid
- WHERE p.parentid NOT IN ( select nodeid from #{DBPREFIX}node where contenttypeid=23 )
- AND p.contenttypeid = 20
- ORDER BY postid
- LIMIT #{BATCH_SIZE}
- OFFSET #{offset}
- SQL
- break if posts.size < 1
- # disabled line below, caused issues
- # next if all_records_exist? :posts, posts.map {|p| p["postid"] }
- create_posts(posts, total: post_count, offset: offset) do |post|
- raw = preprocess_post_raw(post["raw"])
- next if raw.blank?
- next unless topic = topic_lookup_from_imported_post_id("thread-#{post["threadid"]}")
- p = {
- id: post["postid"],
- user_id: user_id_from_imported_user_id(post["userid"]) || Discourse::SYSTEM_USER_ID,
- topic_id: topic[:topic_id],
- raw: raw,
- created_at: parse_timestamp(post["dateline"]),
- hidden: post["visible"].to_i == 0,
- }
- if parent = topic_lookup_from_imported_post_id(post["parentid"])
- p[:reply_to_post_number] = parent[:post_number]
- end
- p
- end
- end
- end
- # find the uploaded file information from the db
- def find_upload(post, attachment_id)
- sql = "SELECT a.filedataid, a.filename, fd.userid, LENGTH(fd.filedata) AS dbsize, filedata
- FROM #{DBPREFIX}attach a
- LEFT JOIN #{DBPREFIX}filedata fd ON fd.filedataid = a.filedataid
- WHERE a.nodeid = #{attachment_id}"
- results = mysql_query(sql)
- unless (row = results.first)
- puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}"
- return nil
- end
- filename = File.join(ATTACHMENT_DIR, row['userid'].to_s.split('').join('/'), "#{row['filedataid']}.attach")
- real_filename = row['filename']
- real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
- unless File.exists?(filename)
- if row['dbsize'].to_i == 0
- puts "Attachment file #{row['filedataid']} doesn't exist"
- return nil
- end
- tmpfile = 'attach_' + row['filedataid'].to_s
- filename = File.join('/tmp/', tmpfile)
- File.open(filename, 'wb') { |f|
- #f.write(PG::Connection.unescape_bytea(row['filedata']))
- f.write(row['filedata'])
- }
- end
- upload = create_upload(post.user.id, filename, real_filename)
- if upload.nil? || !upload.valid?
- puts "Upload not valid :("
- puts upload.errors.inspect if upload
- return nil
- end
- return upload, real_filename
- rescue Mysql2::Error => e
- puts "SQL Error"
- puts e.message
- puts sql
- return nil
- end
- def import_attachments
- puts '', 'importing attachments...'
- current_count = 0
- total_count = mysql_query("SELECT COUNT(nodeid) cnt FROM #{DBPREFIX}node WHERE contenttypeid=22 ").first["cnt"]
- success_count = 0
- fail_count = 0
- attachment_regex = /\[attach[^\]]*\]n(\d+)\[\/attach\]/i
- Post.find_each do |post|
- current_count += 1
- print_status current_count, total_count
- new_raw = post.raw.dup
- new_raw.gsub!(attachment_regex) do |s|
- matches = attachment_regex.match(s)
- attachment_id = matches[1]
- upload, filename = find_upload(post, attachment_id)
- unless upload
- fail_count += 1
- next
- end
- html_for_upload(upload, filename)
- end
- if new_raw != post.raw
- PostRevisor.new(post).revise!(post.user, { raw: new_raw }, { bypass_bump: true, edit_reason: 'Import attachments from vBulletin' })
- end
- success_count += 1
- end
- end
- def close_topics
- puts "", "Closing topics..."
- sql = <<-SQL
- WITH closed_topic_ids AS (
- SELECT t.id AS topic_id
- FROM post_custom_fields pcf
- JOIN posts p ON p.id = pcf.post_id
- JOIN topics t ON t.id = p.topic_id
- WHERE pcf.name = 'import_id'
- AND pcf.value IN (?)
- )
- UPDATE topics
- SET closed = true
- WHERE id IN (SELECT topic_id FROM closed_topic_ids)
- SQL
- Topic.exec_sql(sql, @closed_topic_ids)
- end
- def post_process_posts
- puts "", "Postprocessing posts..."
- current = 0
- max = Post.count
- Post.find_each do |post|
- begin
- new_raw = postprocess_post_raw(post.raw)
- if new_raw != post.raw
- post.raw = new_raw
- post.save
- end
- rescue PrettyText::JavaScriptError
- nil
- ensure
- print_status(current += 1, max)
- end
- end
- end
- def preprocess_post_raw(raw)
- return "" if raw.blank?
- # decode HTML entities
- raw = @htmlentities.decode(raw)
- # fix whitespaces
- raw = raw.gsub(/(\\r)?\\n/, "\n")
- .gsub("\\t", "\t")
- # [HTML]...[/HTML]
- raw = raw.gsub(/\[html\]/i, "\n```html\n")
- .gsub(/\[\/html\]/i, "\n```\n")
- # [PHP]...[/PHP]
- raw = raw.gsub(/\[php\]/i, "\n```php\n")
- .gsub(/\[\/php\]/i, "\n```\n")
- # [HIGHLIGHT="..."]
- raw = raw.gsub(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" }
- # [CODE]...[/CODE]
- # [HIGHLIGHT]...[/HIGHLIGHT]
- raw = raw.gsub(/\[\/?code\]/i, "\n```\n")
- .gsub(/\[\/?highlight\]/i, "\n```\n")
- # [SAMP]...[/SAMP]
- raw = raw.gsub(/\[\/?samp\]/i, "`")
- # replace all chevrons with HTML entities
- # NOTE: must be done
- # - AFTER all the "code" processing
- # - BEFORE the "quote" processing
- raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
- .gsub("<", "<")
- .gsub("\u2603", "<")
- raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" }
- .gsub(">", ">")
- .gsub("\u2603", ">")
- # [URL=...]...[/URL]
- raw.gsub!(/\[url="?(.+?)"?\](.+?)\[\/url\]/i) { "<a href=\"#{$1}\">#{$2}</a>" }
- # [URL]...[/URL]
- # [MP3]...[/MP3]
- raw = raw.gsub(/\[\/?url\]/i, "")
- .gsub(/\[\/?mp3\]/i, "")
- # [MENTION]<username>[/MENTION]
- raw = raw.gsub(/\[mention\](.+?)\[\/mention\]/i) do
- old_username = $1
- if @old_username_to_new_usernames.has_key?(old_username)
- old_username = @old_username_to_new_usernames[old_username]
- end
- "@#{old_username}"
- end
- # [USER=<user_id>]<username>[/USER]
- raw = raw.gsub(/\[user="?(\d+)"?\](.+?)\[\/user\]/i) do
- user_id, old_username = $1, $2
- if @old_username_to_new_usernames.has_key?(old_username)
- new_username = @old_username_to_new_usernames[old_username]
- else
- new_username = old_username
- end
- "@#{new_username}"
- end
- # [FONT=blah] and [COLOR=blah]
- # no idea why the /i is not matching case insensitive..
- raw.gsub! /\[color=.*?\](.*?)\[\/color\]/im, '\1'
- raw.gsub! /\[COLOR=.*?\](.*?)\[\/COLOR\]/im, '\1'
- raw.gsub! /\[font=.*?\](.*?)\[\/font\]/im, '\1'
- raw.gsub! /\[FONT=.*?\](.*?)\[\/FONT\]/im, '\1'
- # [CENTER]...[/CENTER]
- raw.gsub! /\[CENTER\](.*?)\[\/CENTER\]/im, '\1'
- # fix LIST
- raw.gsub! /\[LIST\](.*?)\[\/LIST\]/im, '<ul>\1</ul>'
- raw.gsub! /\[\*\]/im, '<li>'
- # [QUOTE]...[/QUOTE]
- raw = raw.gsub(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" }
- # [QUOTE=<username>]...[/QUOTE]
- raw = raw.gsub(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do
- old_username, quote = $1, $2
- if @old_username_to_new_usernames.has_key?(old_username)
- old_username = @old_username_to_new_usernames[old_username]
- end
- "\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n"
- end
- # [YOUTUBE]<id>[/YOUTUBE]
- raw = raw.gsub(/\[youtube\](.+?)\[\/youtube\]/i) { "\n//youtu.be/#{$1}\n" }
- # [VIDEO=youtube;<id>]...[/VIDEO]
- raw = raw.gsub(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" }
- raw
- end
- def postprocess_post_raw(raw)
- # [QUOTE=<username>;<post_id>]...[/QUOTE]
- raw = raw.gsub(/\[quote=([^;]+);n(\d+)\](.+?)\[\/quote\]/im) do
- old_username, post_id, quote = $1, $2, $3
- if @old_username_to_new_usernames.has_key?(old_username)
- old_username = @old_username_to_new_usernames[old_username]
- end
- if topic_lookup = topic_lookup_from_imported_post_id(post_id)
- post_number = topic_lookup[:post_number]
- topic_id = topic_lookup[:topic_id]
- "\n[quote=\"#{old_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n"
- else
- "\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n"
- end
- end
- # remove attachments
- raw = raw.gsub(/\[attach[^\]]*\]\d+\[\/attach\]/i, "")
- # [THREAD]<thread_id>[/THREAD]
- # ==> http://my.discourse.org/t/slug/<topic_id>
- raw = raw.gsub(/\[thread\](\d+)\[\/thread\]/i) do
- thread_id = $1
- if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}")
- topic_lookup[:url]
- else
- $&
- end
- end
- # [THREAD=<thread_id>]...[/THREAD]
- # ==> [...](http://my.discourse.org/t/slug/<topic_id>)
- raw = raw.gsub(/\[thread=(\d+)\](.+?)\[\/thread\]/i) do
- thread_id, link = $1, $2
- if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}")
- url = topic_lookup[:url]
- "[#{link}](#{url})"
- else
- $&
- end
- end
- # [POST]<post_id>[/POST]
- # ==> http://my.discourse.org/t/slug/<topic_id>/<post_number>
- raw = raw.gsub(/\[post\](\d+)\[\/post\]/i) do
- post_id = $1
- if topic_lookup = topic_lookup_from_imported_post_id(post_id)
- topic_lookup[:url]
- else
- $&
- end
- end
- # [POST=<post_id>]...[/POST]
- # ==> [...](http://my.discourse.org/t/<topic_slug>/<topic_id>/<post_number>)
- raw = raw.gsub(/\[post=(\d+)\](.+?)\[\/post\]/i) do
- post_id, link = $1, $2
- if topic_lookup = topic_lookup_from_imported_post_id(post_id)
- url = topic_lookup[:url]
- "[#{link}](#{url})"
- else
- $&
- end
- end
- raw
- end
- def parse_timestamp(timestamp)
- Time.zone.at(@tz.utc_to_local(timestamp))
- end
- def fake_email
- SecureRandom.hex << "@domain.com"
- end
- def mysql_query(sql)
- @client.query(sql, cache_rows: false)
- end
- end
- ImportScripts::VBulletin.new.perform
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement