Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2019
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.70 KB | None | 0 0
  1. #!/usr/bin/env node
  2. const gravatar = require('gravatar')
  3. const cheerio = require('cheerio')
  4. const path = require('path')
  5. const fs = require('fs')
  6.  
  7. const json = fs.readFileSync(path.join(__dirname, 'export.json')).toString()
  8. .replace(/\\\/blog\\\/wp-content\\\/uploads/g, '\/content\/images')
  9. .replace(/wp-content\\\/authors/g, 'content\/images\/authors')
  10. .replace(/wp-content\\\/uploads/g, 'content\/images')
  11.  
  12. const MD_IMAGE_REGEX = /!\[([^\]]*)\]\(([^\)]+)\)/
  13.  
  14. function _unescape(string) {
  15. return string.replace(/^\n+|\n+$/, '').replace(/\\u[a-f0-9]+/g, function (ret) {
  16. return String.fromCodePoint(parseInt(ret.replace(/\\u/, ''), 16))
  17. }).replace(/[\s\n](\*+)$/g, '$1')
  18. }
  19.  
  20. function removeHTML(md) {
  21. return cheerio.load(md.replace(/<div\sclass="quote">\s*.([^"“”]+).\s*<\/div>/g, '> $1 \n\n')).text()
  22. .replace(/^(\*+)\s+/, '$1').replace(/\s+(\*+)$/, '$1');
  23. }
  24.  
  25. function removeFeatureImage(md, featureImage) {
  26. if(!featureImage) return md;
  27.  
  28. return md.replace(new RegExp(`!\\[[^\\]]*\\]\\(${featureImage}\\)`), '');
  29. }
  30.  
  31. function convertDate(str) {
  32. return new Date(Date.parse(str)).toISOString()
  33. }
  34.  
  35. const parsed = JSON.parse(json)
  36.  
  37. const dump = {
  38. meta: {
  39. // epoch time in milliseconds
  40. exported_on: new Date().getTime(),
  41. version: '2.14.0'
  42. },
  43. data: {
  44. posts: [],
  45. users: parsed.data.users.map(u => ({
  46. ...u,
  47. profile_image: !u.profile_image ? gravatar.url(u.email, {
  48. s: '250',
  49. r: 'x'
  50. }, true) : u.profile_image
  51. })),
  52. tags: parsed.data.tags,
  53. posts_tags: parsed.data.posts_tags
  54. }
  55. }
  56.  
  57. parsed.data.posts.forEach(function (post) {
  58. const elements = post.markdown.replace(/\\\//g, '/').replace(/<\/figure>/g, '</figure>\n\n').split('\n\n')
  59.  
  60. const mobileDoc = {
  61. version: '0.3.1',
  62. atoms: [],
  63. cards: [],
  64. markups: [],
  65. sections: []
  66. }
  67.  
  68. let feature_image = null
  69. const $readMore = cheerio.load(post.html)
  70. const image = $readMore('img').get(0)
  71. if (image) {
  72. feature_image = image.attribs.src
  73. }
  74.  
  75. let sectionIndex = 0
  76.  
  77. const addCard = (type, card) => {
  78. mobileDoc.cards.push([type, card])
  79. addSection(10)
  80. }
  81.  
  82. const addSection = (identifier) => {
  83. mobileDoc.sections.push([identifier, sectionIndex++])
  84. }
  85.  
  86. const addMarkdownCard = (md) => {
  87. const prevCard = mobileDoc.cards[mobileDoc.cards.length - 1]
  88. if (prevCard && prevCard[0] === 'card-markdown') {
  89. prevCard[1].markdown += '\n\n' + md
  90. return
  91. }
  92. addCard('card-markdown', { cardName: 'card-markdown', markdown: md })
  93. }
  94.  
  95. const addImage = (src, alt, caption) => {
  96. addCard('image', { src, caption, alt })
  97. }
  98.  
  99. elements.map(_unescape).forEach((el) => {
  100. if (el.indexOf('<figure') > -1) {
  101. const m = el.match(MD_IMAGE_REGEX)
  102. if (m) {
  103. const alt = m[1]
  104. const src = unescape(m[2])
  105. const $ = cheerio.load('<div>' + unescape(el) + '</div>')
  106. const caption = $('figcaption').get(0);
  107. if (feature_image !== src) {
  108. addImage(src, alt, caption ? $(caption).text() : undefined)
  109. }
  110. }
  111. } else {
  112. addMarkdownCard(removeFeatureImage(removeHTML(el), feature_image))
  113.  
  114. }
  115. })
  116.  
  117. dump.data.posts.push(
  118. {
  119. id: post.id,
  120. title: removeHTML(post.title),
  121. slug: post.slug,
  122. mobiledoc: JSON.stringify(mobileDoc),
  123. feature_image,
  124. author_id: post.author_id,
  125. created_at: convertDate(post.created_at),
  126. created_by: post.created_by,
  127. updated_at: convertDate(post.updated_at),
  128. updated_by: post.updated_by,
  129. published_at: post.published_at ? convertDate(post.published_at) : null,
  130. published_by: post.published_by,
  131. status: post.status
  132. }
  133. )
  134. console.log(post.title)
  135. })
  136.  
  137.  
  138. fs.writeFileSync(path.join(__dirname, 'dump.json'), JSON.stringify(dump))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement