Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- var mongolayer = require("/sv/node_modules/npm/mongolayer/1/node_modules/mongolayer/");
- var asyncLib = require("/sv/node_modules/sv/asyncLib/1/");
- var request = require("request");
- var async = require("/sv/node_modules/npm/async/1/node_modules/async/");
- console.log('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n');
- let settings = {
- blog_name : 'leisure_blog',
- api : {
- posts : 'http://manitobahot.com/wp-json/wp/v2/posts',
- media : 'http://manitobahot.com/wp-json/wp/v2/media',
- categories : 'http://manitobahot.com/wp-json/wp/v2/categories',
- tags : 'http://manitobahot.com/wp-json/wp/v2/tags'
- },
- tag : new mongolayer.ObjectId('5a5eb261bef518101cf78f2e')
- };
- let convertAscii = function(str) {
- return str
- .replace(/&/g, "&")
- .replace(/&/g, "&")
- .replace(/</g, "<")
- .replace(/>/g, ">")
- .replace(/"/g, "\"")
- .replace(/–/g, "–")
- .replace(/—/g, "—")
- .replace(/‘/g, "‘")
- .replace(/’/g, "’")
- .replace(/‚/g, "‚")
- .replace(/“/g, "“")
- .replace(/”/g, "”")
- .replace(/„/g, "„")
- .replace(/•/g, "•")
- .replace(/…/g, "…")
- .replace(/™/g, "™")
- .replace(/🍁/g, "🍁")
- .replace(/🎄/g, "🎄")
- .replace(/💀/g, "💀")
- .replace(/👻/g, "👻")
- .replace(/🍂/g, "🍂")
- .replace(/🌽/g, "🌽")
- .replace(/🍓/g, "🍓")
- .replace(/🍅/g, "🍅")
- .replace(/🌈/g, "🌈")
- .replace(/🏳/g, "🏳")
- .replace(/🣸/g, "")
- .replace(/🇮/g, "")
- .replace(/\<br\>/g, "")
- };
- let flow = new asyncLib.Flow({ timers : true });
- flow.series({
- posts(cb) {
- console.log("Collecting posts");
- let posts = [];
- let options = {
- url : settings.api.posts,
- qs : {
- page : 1,
- per_page : 2,
- "_embed" : 1
- },
- json : true
- };
- let paginate = () => {
- request(options, (err, response, body) => {
- if (err) return cb(err);
- posts = posts.concat(body);
- // if (body.length === 100) {
- // options.qs.page = options.qs.page + 1;
- // return paginate();
- // }
- process();
- });
- };
- let process = () => {
- let raw = posts;
- let processed = raw.map(post => {
- let categories_ids = post.categories.map(category => String(category));
- let tags_ids = post.tags.map(tag => String(tag));
- let title = convertAscii(post.title.rendered);
- let slug = post.slug.replace(/[^a-zA-Z0-9-]/ig, '');
- return {
- blog : settings.blog_name,
- author_id : String(post.author),
- image_id : String(post.featured_media || 0),
- categories_ids : categories_ids,
- title : title,
- slug : slug,
- description : post.content.rendered,
- enabled : true,
- active : true,
- enable_comments : (post.comment_status === "open") ? true : false,
- publish_start : new Date(post.date),
- created : new Date(post.date),
- updated : new Date(post.modified),
- tags_ids : tags_ids,
- cms_tags_ids : [settings.tag],
- legacy_id : String(post.id)
- };
- });
- raw = raw;
- processed = processed;
- cb(null, { raw : raw, processed : processed });
- };
- paginate();
- },
- assets(cb) {
- console.log("Collecting assets");
- async.series({
- attachments(cb) {
- let options = {
- url : settings.api.media,
- qs : {
- page : 1,
- per_page : 100
- },
- json : true
- };
- let paginate = (post, cb) => {
- let postid = Number(post.legacy_id);
- options.qs.parent = postid;
- request(options, (err, response, body) => {
- if (err) return cb(err);
- if (body.length === 100) {
- options.qs.page = options.qs.page + 1;
- return paginate(postid, cb);
- }
- cb(null, body);
- });
- };
- async.mapLimit(flow.data.posts.processed, 5, (post, cb) => {
- paginate(post, cb);
- }, (error, assets) => {
- if (error) return cb(error);
- cb(null, assets);
- });
- },
- contents(cb) {
- let assetids = [];
- let attachment = /<div(?:[^>]+?)?id="attachment_([0-9]+)"(?:[^>]+?)?>/ig
- let inlineimage = /<img(?:[^>]+?)?class="(?:[^>]+?)?wp-image-([0-9]+)(?:[^>]+?)?"(?:[^>]+?)?src="http:\/\/manitobahot\.com\/(?:[^>]+?)?>/ig
- let attachmentimage = /<img(?:[^>]+?)?data-attachment-id="([0-9]+)"(?:[^>]+?)?>/ig
- flow.data.posts.processed.forEach(post => {
- if (attachment.test(post.description)) {
- post.description.replace(attachment, (img, id) => {
- assetids.push(id);
- });
- }
- if (inlineimage.test(post.description)) {
- post.description.replace(inlineimage, (img, id) => {
- assetids.push(id);
- });
- }
- if (attachmentimage.test(post.description)) {
- post.description.replace(attachmentimage, (img, id) => {
- assetids.push(id);
- });
- }
- });
- if (assetids.length > 0) {
- async.mapLimit(assetids, 5, (id, cb) => {
- let options = {
- url : settings.api.media + '/' + id,
- json : true
- };
- request(options, (err, response, body) => {
- if (err) return cb(err);
- if (response.statusCode === 403) {
- console.log(`[Assets] Forbidden to collect media asset (${id})`);
- }
- cb(null, body);
- });
- }, (error, assets) => {
- if (error) return cb(error);
- cb(null, assets);
- });
- }
- }
- }, (err, combined) => {
- if (err) return cb(err);
- let unique = {};
- Object.keys(combined).forEach(key => {
- combined[key].forEach(asset => {
- if (Array.isArray(asset)) {
- if (asset.length > 0) {
- asset.forEach(asset => {
- if (asset.title !== undefined) {
- unique[asset.id] = asset;
- } else {
- console.log('[Asset Array] Failed asset:');
- console.log(asset);
- }
- });
- }
- } else {
- if (asset.title !== undefined) {
- unique[asset.id] = asset;
- } else {
- console.log('[Asset Object] Failed asset:');
- console.log(asset);
- }
- }
- });
- });
- let raw = Object.keys(unique).map(key => unique[key]);
- let processed = raw.map(asset => {
- if (asset.legacy_id == 101386) {
- console.log(asset);
- }
- let title = (asset.title.rendered !== "") ? asset.title.rendered : 'NO TITLE ' + asset.source_url;
- return {
- title : title,
- title_sort : title,
- imageurl : asset.source_url,
- notes : asset.description || undefined,
- created : asset.date ? new Date(asset.date) : undefined,
- content_owner : 'default',
- tags_ids : [settings.tag],
- legacy_id : String(asset.id)
- };
- });
- cb(null, { raw : raw, processed : processed });
- });
- },
- insert(cb) {
- console.log("Inserting data");
- var imagesApi = site.plugins.assets.apis.images;
- var blogApi = site.plugins.blog.apis[settings.blog_name];
- async.series({
- assets(cb) {
- console.log("Insert assets");
- let assets = flow.data.assets.processed;
- if (!assets.length) return cb(null);
- async.mapLimit(assets, 5, (asset, cb) => {
- imagesApi.insert(asset, (err, asset) => {
- if (err) return cb(err);
- cb(null, asset);
- });
- }, (error, assets) => {
- if (error) return cb(error);
- flow.data.assets.inserted = assets;
- console.log("Assets inserted")
- cb(null, assets);
- });
- },
- posts(cb) {
- console.log("Insert posts");
- let posts = flow.data.posts.processed;
- if (!posts.length) return cb(null);
- let assets = flow.data.assets.inserted;
- let anchorimage = /<a(?:[^>]+?)?href="(http:\/\/manitobahot.com\/[^"]+?[\.jpg|\.png|\.gif])"(?:[^>]+?)?><img(?:[^>]+?)?class="((?:[^>]+?)?wp-image-([0-9]+)(?:[^>]+?)?)"(?:[^>]+?)?src="(http:\/\/manitobahot\.com\/[^>]+?)"(?:[^>]+?)?(?:alt="([^>]+?)")?(?:[^>]+?)(width="[0-9]+?" height="[0-9]+?")(?:[^>]+?)?>/ig
- let inlineimage = /<img(?:[^>]+?)?class="((?:[^>]+?)?wp-image-([0-9]+)(?:[^>]+?)?)"(?:[^>]+?)?src="(http:\/\/manitobahot\.com\/[^>]+?)"(?:[^>]+?)?(?:alt="([^>]+?)")?(?:[^>]+?)(width="[0-9]+?" height="[0-9]+?")(?:[^>]+?)?>/ig
- let attachmentimage = /<img(?:[^>]+?)data-attachment-id="([^"]+?)"(?:[^>]+?)?src="(http:\/\/i[0-9]\.wp\.com\/[^>]+?)"(?:[^>]+?)?alt="([^]+?)"(?:[^>]+?)?style="([^>]+?)"(?:[^>]+?)?>/ig
- let anchors = /<a(?:[^>]+?)?(?:href="http:\/\/manitobahot\.com\/)(?:[^>]+?)?>(.+?)<\/a>/ig
- posts.forEach(function(post) {
- if (post.image_id) {
- let image = assets.find(asset => asset.legacy_id === post.image_id);
- if (image) {
- post.image_id = image._id;
- } else {
- console.log(`[ImageAssignment] No image? ${post.image_id}`);
- delete post.image_id;
- }
- }
- if (post.author_id) {
- async.series({
- author(cb) {
- blogApi.authors.find({ legacy_id: post.author_id }, {}, function(err, docs) {
- if (err) { return cb(err); }
- cb(null, docs);
- });
- }
- }, function(err, res) {
- console.log(res.author[0]._id);
- post.author_id = res.author[0]._id;
- });
- }
- if (post.categories_ids) {
- async.series({
- categories(cb) {
- blogApi.categories.find({}, {}, function(err, docs) {
- if (err) { return cb(err); }
- cb(null, docs);
- });
- }
- }, function(err, res) {
- var cats = [];
- res.categories.forEach(function(c) {
- if (post.categories_ids.includes(c.legacy_id)) {
- cats.push(c._id);
- }
- });
- post.categories_ids = cats;
- });
- }
- if (post.tags_ids) {
- async.series({
- tags(cb) {
- blogApi.tags.find({}, {}, function(err, docs) {
- if (err) { return cb(err); }
- cb(null, docs);
- });
- }
- }, function(err, res) {
- var tags = [];
- res.tags.forEach(function(t) {
- if (post.tags_ids.includes(t.legacy_id)) {
- tags.push(t._id);
- }
- });
- post.tags_ids = tags;
- });
- }
- if (anchorimage.test(post.description)) {
- post.description = post.description.replace(anchorimage, (whole, href, imgclass, id, src, alt, dimensions) => {
- let asset = assets.find(asset => asset.legacy_id === String(id));
- if (asset) {
- let newsrc = asset.resource.getUrl();
- return '<a href="' + href + '"><img class="' + imgclass + '" src="' + newsrc + '" alt="' + alt + '" ' + dimensions + '>';
- }
- console.log(`[anchorimage] could not find asset with legacy_id ${id} for post with legacy_id ${post.legacy_id} - ${post.title}`);
- return whole;
- });
- }
- if (inlineimage.test(post.description)) {
- post.description = post.description.replace(inlineimage, (whole, imgclass, id, src, alt, dimensions) => {
- let asset = assets.find(asset => asset.legacy_id === String(id));
- if (asset) {
- let newsrc = asset.resource.getUrl();
- return '<img class="' + imgclass + '" src="' + newsrc + '" alt="' + alt + '" ' + dimensions + '>';
- }
- console.log(`[inlineimage] could not find asset with legacy_id ${id} for post with legacy_id ${post.legacy_id} - ${post.title}`);
- return whole;
- });
- }
- if (attachmentimage.test(post.description)) {
- post.description = post.description.replace(attachmentimage, (whole, id, src, alt, style) => {
- let asset = assets.find(asset => asset.legacy_id === String(id));
- if (asset) {
- let newsrc = asset.resource.getUrl();
- return '<img data-attachment-id="' + id + '" src="' + newsrc + '" alt="' + alt + '" style="' + style + '">';
- }
- console.log(`[attachmentimage] could not find asset with legacy_id ${id} for post with legacy_id ${post.legacy_id} - ${post.title}`);
- return whole;
- });
- }
- if (anchors.test(post.description)) {
- post.description = post.description.replace(anchors, (whole, inner) => {
- return inner;
- });
- }
- });
- console.log("INSERT!!!!!!");
- blogApi.posts.insert(posts, (err, posts) => {
- if (err) return cb(err);
- flow.data.posts.inserted = posts;
- console.log("Posts inserted");
- cb(null, posts);
- });
- }
- }, (err, results) => {
- if (err) return cb(err);
- cb(null, results);
- });
- }
- }, (err, data) => {
- if (err) return cb(err);
- //console.log('data', data);
- console.log('import done');
- cb(null, data);
- });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement