Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env node
- const gravatar = require('gravatar')
- const cheerio = require('cheerio')
- const path = require('path')
- const fs = require('fs')
- const json = fs.readFileSync(path.join(__dirname, 'export.json')).toString()
- .replace(/\\\/blog\\\/wp-content\\\/uploads/g, '\/content\/images')
- .replace(/wp-content\\\/authors/g, 'content\/images\/authors')
- .replace(/wp-content\\\/uploads/g, 'content\/images')
- const MD_IMAGE_REGEX = /!\[([^\]]*)\]\(([^\)]+)\)/
- function _unescape(string) {
- return string.replace(/^\n+|\n+$/, '').replace(/\\u[a-f0-9]+/g, function (ret) {
- return String.fromCodePoint(parseInt(ret.replace(/\\u/, ''), 16))
- }).replace(/[\s\n](\*+)$/g, '$1')
- }
- function removeHTML(md) {
- return cheerio.load(md.replace(/<div\sclass="quote">\s*.([^"“”]+).\s*<\/div>/g, '> $1 \n\n')).text()
- .replace(/^(\*+)\s+/, '$1').replace(/\s+(\*+)$/, '$1');
- }
- function removeFeatureImage(md, featureImage) {
- if(!featureImage) return md;
- return md.replace(new RegExp(`!\\[[^\\]]*\\]\\(${featureImage}\\)`), '');
- }
- function convertDate(str) {
- return new Date(Date.parse(str)).toISOString()
- }
- const parsed = JSON.parse(json)
- const dump = {
- meta: {
- // epoch time in milliseconds
- exported_on: new Date().getTime(),
- version: '2.14.0'
- },
- data: {
- posts: [],
- users: parsed.data.users.map(u => ({
- ...u,
- profile_image: !u.profile_image ? gravatar.url(u.email, {
- s: '250',
- r: 'x'
- }, true) : u.profile_image
- })),
- tags: parsed.data.tags,
- posts_tags: parsed.data.posts_tags
- }
- }
- parsed.data.posts.forEach(function (post) {
- const elements = post.markdown.replace(/\\\//g, '/').replace(/<\/figure>/g, '</figure>\n\n').split('\n\n')
- const mobileDoc = {
- version: '0.3.1',
- atoms: [],
- cards: [],
- markups: [],
- sections: []
- }
- let feature_image = null
- const $readMore = cheerio.load(post.html)
- const image = $readMore('img').get(0)
- if (image) {
- feature_image = image.attribs.src
- }
- let sectionIndex = 0
- const addCard = (type, card) => {
- mobileDoc.cards.push([type, card])
- addSection(10)
- }
- const addSection = (identifier) => {
- mobileDoc.sections.push([identifier, sectionIndex++])
- }
- const addMarkdownCard = (md) => {
- const prevCard = mobileDoc.cards[mobileDoc.cards.length - 1]
- if (prevCard && prevCard[0] === 'card-markdown') {
- prevCard[1].markdown += '\n\n' + md
- return
- }
- addCard('card-markdown', { cardName: 'card-markdown', markdown: md })
- }
- const addImage = (src, alt, caption) => {
- addCard('image', { src, caption, alt })
- }
- elements.map(_unescape).forEach((el) => {
- if (el.indexOf('<figure') > -1) {
- const m = el.match(MD_IMAGE_REGEX)
- if (m) {
- const alt = m[1]
- const src = unescape(m[2])
- const $ = cheerio.load('<div>' + unescape(el) + '</div>')
- const caption = $('figcaption').get(0);
- if (feature_image !== src) {
- addImage(src, alt, caption ? $(caption).text() : undefined)
- }
- }
- } else {
- addMarkdownCard(removeFeatureImage(removeHTML(el), feature_image))
- }
- })
- dump.data.posts.push(
- {
- id: post.id,
- title: removeHTML(post.title),
- slug: post.slug,
- mobiledoc: JSON.stringify(mobileDoc),
- feature_image,
- author_id: post.author_id,
- created_at: convertDate(post.created_at),
- created_by: post.created_by,
- updated_at: convertDate(post.updated_at),
- updated_by: post.updated_by,
- published_at: post.published_at ? convertDate(post.published_at) : null,
- published_by: post.published_by,
- status: post.status
- }
- )
- console.log(post.title)
- })
- fs.writeFileSync(path.join(__dirname, 'dump.json'), JSON.stringify(dump))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement