Advertisement
NginUS

wp.rb1

Sep 21st, 2015
142
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.77 KB | None | 0 0
  1. module JekyllImport
  2. module Importers
  3. class WordPress < Importer
  4.  
  5. def self.require_deps
  6. JekyllImport.require_with_fallback(%w[
  7. rubygems
  8. sequel
  9. fileutils
  10. safe_yaml
  11. unidecode
  12. ])
  13. end
  14.  
  15. def self.specify_options(c)
  16. c.option 'dbname', '--dbname DB', 'Database name (default: "")'
  17. c.option 'socket', '--socket SOCKET', 'Database socket (default: "")'
  18. c.option 'user', '--user USER', 'Database user name (default: "")'
  19. c.option 'password', '--password PW', "Database user's password (default: "")"
  20. c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
  21. c.option 'table_prefix', '--table_prefix PREFIX', 'Table prefix name (default: "wp_")'
  22. c.option 'clean_entities', '--clean_entities', 'Whether to clean entities (default: true)'
  23. c.option 'comments', '--comments', 'Whether to import comments (default: true)'
  24. c.option 'categories', '--categories', 'Whether to import categories (default: true)'
  25. c.option 'tags', '--tags', 'Whether to import tags (default: true)'
  26. c.option 'more_excerpt', '--more_excerpt', 'Whether to use more excerpt (default: true)'
  27. c.option 'more_anchor', '--more_anchor', 'Whether to use more anchor (default: true)'
  28. c.option 'status', '--status STATUS,STATUS2', Array, 'Array of allowed statuses (default: ["publish"], other options: "draft", "private", "revision")'
  29. end
  30.  
  31. # Main migrator function. Call this to perform the migration.
  32. #
  33. # dbname:: The name of the database
  34. # user:: The database user name
  35. # pass:: The database user's password
  36. # host:: The address of the MySQL database host. Default: 'localhost'
  37. # socket:: The database socket's path
  38. # options:: A hash table of configuration options.
  39. #
  40. # Supported options are:
  41. #
  42. # :table_prefix:: Prefix of database tables used by WordPress.
  43. # Default: 'wp_'
  44. # :clean_entities:: If true, convert non-ASCII characters to HTML
  45. # entities in the posts, comments, titles, and
  46. # names. Requires the 'htmlentities' gem to
  47. # work. Default: true.
  48. # :comments:: If true, migrate post comments too. Comments
  49. # are saved in the post's YAML front matter.
  50. # Default: true.
  51. # :categories:: If true, save the post's categories in its
  52. # YAML front matter.
  53. # :tags:: If true, save the post's tags in its
  54. # YAML front matter.
  55. # :more_excerpt:: If true, when a post has no excerpt but
  56. # does have a <!-- more --> tag, use the
  57. # preceding post content as the excerpt.
  58. # Default: true.
  59. # :more_anchor:: If true, convert a <!-- more --> tag into
  60. # two HTML anchors with ids "more" and
  61. # "more-NNN" (where NNN is the post number).
  62. # Default: true.
  63. # :status:: Array of allowed post statuses. Only
  64. # posts with matching status will be migrated.
  65. # Known statuses are :publish, :draft, :private,
  66. # and :revision. If this is nil or an empty
  67. # array, all posts are migrated regardless of
  68. # status. Default: [:publish].
  69. #
  70. def self.process(opts)
  71. options = {
  72. :user => opts.fetch('user', ''),
  73. :pass => opts.fetch('password', ''),
  74. :host => opts.fetch('host', 'localhost'),
  75. :socket => opts.fetch('socket', nil),
  76. :dbname => opts.fetch('dbname', ''),
  77. :table_prefix => opts.fetch('table_prefix', 'wp_'),
  78. :clean_entities => opts.fetch('clean_entities', true),
  79. :comments => opts.fetch('comments', true),
  80. :categories => opts.fetch('categories', true),
  81. :tags => opts.fetch('tags', true),
  82. :more_excerpt => opts.fetch('more_excerpt', true),
  83. :more_anchor => opts.fetch('more_anchor', true),
  84. :status => opts.fetch('status', ["publish"]).map(&:to_sym) # :draft, :private, :revision
  85. }
  86.  
  87. if options[:clean_entities]
  88. begin
  89. require 'htmlentities'
  90. rescue LoadError
  91. STDERR.puts "Could not require 'htmlentities', so the " +
  92. ":clean_entities option is now disabled."
  93. options[:clean_entities] = false
  94. end
  95. end
  96.  
  97. FileUtils.mkdir_p("_posts")
  98. FileUtils.mkdir_p("_drafts") if options[:status].include? :draft
  99.  
  100. db = Sequel.mysql2(options[:dbname], :user => options[:user], :password => options[:pass],
  101. :socket => options[:socket], :host => options[:host], :encoding => 'utf8')
  102.  
  103. px = options[:table_prefix]
  104.  
  105. page_name_list = {}
  106.  
  107. page_name_query = "
  108. SELECT
  109. posts.ID AS `id`,
  110. posts.post_title AS `title`,
  111. posts.post_name AS `slug`,
  112. posts.post_parent AS `parent`
  113. FROM #{px}posts AS `posts`
  114. WHERE posts.post_type = 'page'"
  115.  
  116. db[page_name_query].each do |page|
  117. if !page[:slug] or page[:slug].empty?
  118. page[:slug] = sluggify(page[:title])
  119. end
  120. page_name_list[ page[:id] ] = {
  121. :slug => page[:slug],
  122. :parent => page[:parent]
  123. }
  124. end
  125.  
  126. posts_query = "
  127. SELECT
  128. posts.ID AS `id`,
  129. posts.guid AS `guid`,
  130. posts.post_type AS `type`,
  131. posts.post_status AS `status`,
  132. posts.post_title AS `title`,
  133. posts.post_name AS `slug`,
  134. posts.post_date AS `date`,
  135. posts.post_date_gmt AS `date_gmt`,
  136. posts.post_content AS `content`,
  137. posts.post_excerpt AS `excerpt`,
  138. posts.comment_count AS `comment_count`,
  139. users.display_name AS `author`,
  140. users.user_login AS `author_login`,
  141. users.user_email AS `author_email`,
  142. users.user_url AS `author_url`
  143. FROM #{px}posts AS `posts`
  144. LEFT JOIN #{px}users AS `users`
  145. ON posts.post_author = users.ID"
  146.  
  147. if options[:status] and not options[:status].empty?
  148. status = options[:status][0]
  149. posts_query << "
  150. WHERE posts.post_status = '#{status.to_s}'"
  151. options[:status][1..-1].each do |status|
  152. posts_query << " OR
  153. posts.post_status = '#{status.to_s}'"
  154. end
  155. end
  156.  
  157. db[posts_query].each do |post|
  158. process_post(post, db, options, page_name_list)
  159. end
  160. end
  161.  
  162.  
  163. def self.process_post(post, db, options, page_name_list)
  164. px = options[:table_prefix]
  165.  
  166. title = post[:title]
  167. if options[:clean_entities]
  168. title = clean_entities(title)
  169. end
  170.  
  171. slug = post[:slug]
  172. if !slug or slug.empty?
  173. slug = sluggify(title)
  174. end
  175.  
  176. date = post[:date] || Time.now
  177. name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
  178. date.day, slug]
  179. content = post[:content].to_s
  180. if options[:clean_entities]
  181. content = clean_entities(content)
  182. end
  183.  
  184. excerpt = post[:excerpt].to_s
  185.  
  186. more_index = content.index(/<!-- *more *-->/)
  187. more_anchor = nil
  188. if more_index
  189. if options[:more_excerpt] and
  190. (post[:excerpt].nil? or post[:excerpt].empty?)
  191. excerpt = content[0...more_index]
  192. end
  193. if options[:more_anchor]
  194. more_link = "more"
  195. content.sub!(/<!-- *more *-->/,
  196. "<a id=\"more\"></a>" +
  197. "<a id=\"more-#{post[:id]}\"></a>")
  198. end
  199. end
  200.  
  201. categories = []
  202. tags = []
  203.  
  204. if options[:categories] or options[:tags]
  205.  
  206. cquery =
  207. "SELECT
  208. terms.name AS `name`,
  209. ttax.taxonomy AS `type`
  210. FROM
  211. #{px}terms AS `terms`,
  212. #{px}term_relationships AS `trels`,
  213. #{px}term_taxonomy AS `ttax`
  214. WHERE
  215. trels.object_id = '#{post[:id]}' AND
  216. trels.term_taxonomy_id = ttax.term_taxonomy_id AND
  217. terms.term_id = ttax.term_id"
  218.  
  219. db[cquery].each do |term|
  220. if options[:categories] and term[:type] == "category"
  221. if options[:clean_entities]
  222. categories << clean_entities(term[:name])
  223. else
  224. categories << term[:name]
  225. end
  226. elsif options[:tags] and term[:type] == "post_tag"
  227. if options[:clean_entities]
  228. tags << clean_entities(term[:name])
  229. else
  230. tags << term[:name]
  231. end
  232. end
  233. end
  234. end
  235.  
  236. comments = []
  237.  
  238. if options[:comments] and post[:comment_count].to_i > 0
  239. cquery =
  240. "SELECT
  241. comment_ID AS `id`,
  242. comment_author AS `author`,
  243. comment_author_email AS `author_email`,
  244. comment_author_url AS `author_url`,
  245. comment_date AS `date`,
  246. comment_date_gmt AS `date_gmt`,
  247. comment_content AS `content`
  248. FROM #{px}comments
  249. WHERE
  250. comment_post_ID = '#{post[:id]}' AND
  251. comment_approved != 'spam'"
  252.  
  253.  
  254. db[cquery].each do |comment|
  255.  
  256. comcontent = comment[:content].to_s
  257. if comcontent.respond_to?(:force_encoding)
  258. comcontent.force_encoding("UTF-8")
  259. end
  260. if options[:clean_entities]
  261. comcontent = clean_entities(comcontent)
  262. end
  263. comauthor = comment[:author].to_s
  264. if options[:clean_entities]
  265. comauthor = clean_entities(comauthor)
  266. end
  267.  
  268. comments << {
  269. 'id' => comment[:id].to_i,
  270. 'author' => comauthor,
  271. 'author_email' => comment[:author_email].to_s,
  272. 'author_url' => comment[:author_url].to_s,
  273. 'date' => comment[:date].to_s,
  274. 'date_gmt' => comment[:date_gmt].to_s,
  275. 'content' => comcontent,
  276. }
  277. end
  278.  
  279. comments.sort!{ |a,b| a['id'] <=> b['id'] }
  280. end
  281.  
  282. # Get the relevant fields as a hash, delete empty fields and
  283. # convert to YAML for the header.
  284. data = {
  285. 'layout' => post[:type].to_s,
  286. 'status' => post[:status].to_s,
  287. 'published' => post[:status].to_s == 'draft' ? nil : (post[:status].to_s == 'publish'),
  288. 'title' => title.to_s,
  289. 'author' => {
  290. 'display_name'=> post[:author].to_s,
  291. 'login' => post[:author_login].to_s,
  292. 'email' => post[:author_email].to_s,
  293. 'url' => post[:author_url].to_s,
  294. },
  295. 'author_login' => post[:author_login].to_s,
  296. 'author_email' => post[:author_email].to_s,
  297. 'author_url' => post[:author_url].to_s,
  298. 'excerpt' => excerpt,
  299. 'more_anchor' => more_anchor,
  300. 'wordpress_id' => post[:id],
  301. 'wordpress_url' => post[:guid].to_s,
  302. 'date' => date.to_s,
  303. 'date_gmt' => post[:date_gmt].to_s,
  304. 'categories' => options[:categories] ? categories : nil,
  305. 'tags' => options[:tags] ? tags : nil,
  306. 'comments' => options[:comments] ? comments : nil,
  307. }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
  308.  
  309. if post[:type] == 'page'
  310. filename = page_path(post[:id], page_name_list) + 'index.markdown'
  311. FileUtils.mkdir_p(File.dirname(filename))
  312. elsif post[:status] == 'draft'
  313. filename = "_drafts/#{slug}.md"
  314. else
  315. filename = "_posts/#{name}"
  316. end
  317.  
  318. # Write out the data and content to file
  319. File.open(filename, "w") do |f|
  320. f.puts data
  321. f.puts "---"
  322. f.puts Util.wpautop(content)
  323. end
  324. end
  325.  
  326.  
  327. def self.clean_entities( text )
  328. if text.respond_to?(:force_encoding)
  329. text.force_encoding("UTF-8")
  330. end
  331. text = HTMLEntities.new.encode(text, :named)
  332. # We don't want to convert these, it would break all
  333. # HTML tags in the post and comments.
  334. text.gsub!("&amp;", "&")
  335. text.gsub!("&lt;", "<")
  336. text.gsub!("&gt;", ">")
  337. text.gsub!("&quot;", '"')
  338. text.gsub!("&apos;", "'")
  339. text.gsub!("/", "&#47;")
  340. text
  341. end
  342.  
  343.  
  344. def self.sluggify( title )
  345. title = title.to_ascii.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
  346. end
  347.  
  348. def self.page_path( page_id, page_name_list )
  349. if page_name_list.key?(page_id)
  350. [
  351. page_path(page_name_list[page_id][:parent],page_name_list),
  352. page_name_list[page_id][:slug],
  353. '/'
  354. ].join("")
  355. else
  356. ""
  357. end
  358. end
  359.  
  360. end
  361. end
  362. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement