Advertisement
Guest User

Untitled

a guest
Jan 16th, 2018
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.06 KB | None | 0 0
  1. // ===== Self-hosted Wordpress -> cms3x blog =========================================
  2. // This script imports a self-hosted wordpress installation into cms3x blog
  3. // self hosted wordpress is required to have the "Json rest v2" plugin installed
  4. // and actived so the correct api endpoints are accessable by the script.
  5. // Before running make sure you change the settings.tag id (cms_tags), this
  6. // can be useful when needing find imported data after the script runs.
  7. // ===================================================================================
  8.  
  9. var mongolayer = require("/sv/node_modules/npm/mongolayer/1/node_modules/mongolayer/");
  10. var asyncLib = require("/sv/node_modules/sv/asyncLib/1/");
  11. var request = require("request");
  12. var async = require("/sv/node_modules/npm/async/1/node_modules/async/");
  13.  
  14. console.log('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n');
  15.  
  16. let settings = {
  17. blog_name : 'leisure_blog',
  18. api : {
  19. posts : 'http://manitobahot.com/wp-json/wp/v2/posts',
  20. media : 'http://manitobahot.com/wp-json/wp/v2/media',
  21. categories : 'http://manitobahot.com/wp-json/wp/v2/categories',
  22. tags : 'http://manitobahot.com/wp-json/wp/v2/tags'
  23. },
  24. tag : new mongolayer.ObjectId('5a5eb261bef518101cf78f2e')
  25. };
  26.  
  27. let convertAscii = function(str) {
  28. return str
  29. .replace(/&/g, "&")
  30. .replace(/&/g, "&")
  31. .replace(/&lt;/g, "<")
  32. .replace(/&gt;/g, ">")
  33. .replace(/&quot;/g, "\"")
  34. .replace(/&#8211;/g, "–")
  35. .replace(/&#8212;/g, "—")
  36. .replace(/&#8216;/g, "‘")
  37. .replace(/&#8217;/g, "’")
  38. .replace(/&#8218;/g, "‚")
  39. .replace(/&#8220;/g, "“")
  40. .replace(/&#8221;/g, "”")
  41. .replace(/&#8222;/g, "„")
  42. .replace(/&#8226;/g, "•")
  43. .replace(/&#8230;/g, "…")
  44. .replace(/&#8482;/g, "™")
  45. .replace(/&#x1f341;/g, "🍁")
  46. .replace(/&#x1f384;/g, "🎄")
  47. .replace(/&#x1f480;/g, "💀")
  48. .replace(/&#x1f47b;/g, "👻")
  49. .replace(/&#x1f342;/g, "🍂")
  50. .replace(/&#x1f33d;/g, "🌽")
  51. .replace(/&#x1f353;/g, "🍓")
  52. .replace(/&#x1f345;/g, "🍅")
  53. .replace(/&#x1f308;/g, "🌈")
  54. .replace(/&#x1f3f3;/g, "🏳")
  55. .replace(/&#x1f8f8;/g, "")
  56. .replace(/&#x1f1ee;/g, "")
  57. .replace(/\<br\>/g, "")
  58. };
  59.  
  60. let flow = new asyncLib.Flow({ timers : true });
  61. flow.series({
  62. posts(cb) {
  63. console.log("Collecting posts");
  64. let posts = [];
  65. let options = {
  66. url : settings.api.posts,
  67. qs : {
  68. page : 1,
  69. per_page : 2,
  70. "_embed" : 1
  71. },
  72. json : true
  73. };
  74. let paginate = () => {
  75. request(options, (err, response, body) => {
  76. if (err) return cb(err);
  77. posts = posts.concat(body);
  78. // if (body.length === 100) {
  79. // options.qs.page = options.qs.page + 1;
  80. // return paginate();
  81. // }
  82. process();
  83. });
  84. };
  85. let process = () => {
  86. let raw = posts;
  87. let processed = raw.map(post => {
  88. let categories_ids = post.categories.map(category => String(category));
  89. let tags_ids = post.tags.map(tag => String(tag));
  90. let title = convertAscii(post.title.rendered);
  91. let slug = post.slug.replace(/[^a-zA-Z0-9-]/ig, '');
  92. return {
  93. blog : settings.blog_name,
  94. author_id : String(post.author),
  95. image_id : String(post.featured_media || 0),
  96. categories_ids : categories_ids,
  97. title : title,
  98. slug : slug,
  99. description : post.content.rendered,
  100. enabled : true,
  101. active : true,
  102. enable_comments : (post.comment_status === "open") ? true : false,
  103. publish_start : new Date(post.date),
  104. created : new Date(post.date),
  105. updated : new Date(post.modified),
  106. tags_ids : tags_ids,
  107. cms_tags_ids : [settings.tag],
  108. legacy_id : String(post.id)
  109. };
  110. });
  111. raw = raw;
  112. processed = processed;
  113. cb(null, { raw : raw, processed : processed });
  114. };
  115. paginate();
  116. },
  117. assets(cb) {
  118. console.log("Collecting assets");
  119. async.series({
  120. attachments(cb) {
  121. let options = {
  122. url : settings.api.media,
  123. qs : {
  124. page : 1,
  125. per_page : 100
  126. },
  127. json : true
  128. };
  129. let paginate = (post, cb) => {
  130. let postid = Number(post.legacy_id);
  131. options.qs.parent = postid;
  132. request(options, (err, response, body) => {
  133. if (err) return cb(err);
  134. if (body.length === 100) {
  135. options.qs.page = options.qs.page + 1;
  136. return paginate(postid, cb);
  137. }
  138. cb(null, body);
  139. });
  140. };
  141. async.mapLimit(flow.data.posts.processed, 5, (post, cb) => {
  142. paginate(post, cb);
  143. }, (error, assets) => {
  144. if (error) return cb(error);
  145. cb(null, assets);
  146. });
  147. },
  148. contents(cb) {
  149. let assetids = [];
  150. let attachment = /<div(?:[^>]+?)?id="attachment_([0-9]+)"(?:[^>]+?)?>/ig
  151. let inlineimage = /<img(?:[^>]+?)?class="(?:[^>]+?)?wp-image-([0-9]+)(?:[^>]+?)?"(?:[^>]+?)?src="http:\/\/manitobahot\.com\/(?:[^>]+?)?>/ig
  152. let attachmentimage = /<img(?:[^>]+?)?data-attachment-id="([0-9]+)"(?:[^>]+?)?>/ig
  153. flow.data.posts.processed.forEach(post => {
  154. if (attachment.test(post.description)) {
  155. post.description.replace(attachment, (img, id) => {
  156. assetids.push(id);
  157. });
  158. }
  159. if (inlineimage.test(post.description)) {
  160. post.description.replace(inlineimage, (img, id) => {
  161. assetids.push(id);
  162. });
  163. }
  164. if (attachmentimage.test(post.description)) {
  165. post.description.replace(attachmentimage, (img, id) => {
  166. assetids.push(id);
  167. });
  168. }
  169. });
  170. if (assetids.length > 0) {
  171. async.mapLimit(assetids, 5, (id, cb) => {
  172. let options = {
  173. url : settings.api.media + '/' + id,
  174. json : true
  175. };
  176. request(options, (err, response, body) => {
  177. if (err) return cb(err);
  178. if (response.statusCode === 403) {
  179. console.log(`[Assets] Forbidden to collect media asset (${id})`);
  180. }
  181. cb(null, body);
  182. });
  183. }, (error, assets) => {
  184. if (error) return cb(error);
  185. cb(null, assets);
  186. });
  187. }
  188. }
  189. }, (err, combined) => {
  190. if (err) return cb(err);
  191. let unique = {};
  192. Object.keys(combined).forEach(key => {
  193. combined[key].forEach(asset => {
  194. if (Array.isArray(asset)) {
  195. if (asset.length > 0) {
  196. asset.forEach(asset => {
  197. if (asset.title !== undefined) {
  198. unique[asset.id] = asset;
  199. } else {
  200. console.log('[Asset Array] Failed asset:');
  201. console.log(asset);
  202. }
  203. });
  204. }
  205. } else {
  206. if (asset.title !== undefined) {
  207. unique[asset.id] = asset;
  208. } else {
  209. console.log('[Asset Object] Failed asset:');
  210. console.log(asset);
  211. }
  212. }
  213. });
  214. });
  215. let raw = Object.keys(unique).map(key => unique[key]);
  216. let processed = raw.map(asset => {
  217. if (asset.legacy_id == 101386) {
  218. console.log(asset);
  219. }
  220. let title = (asset.title.rendered !== "") ? asset.title.rendered : 'NO TITLE ' + asset.source_url;
  221. return {
  222. title : title,
  223. title_sort : title,
  224. imageurl : asset.source_url,
  225. notes : asset.description || undefined,
  226. created : asset.date ? new Date(asset.date) : undefined,
  227. content_owner : 'default',
  228. tags_ids : [settings.tag],
  229. legacy_id : String(asset.id)
  230. };
  231. });
  232. cb(null, { raw : raw, processed : processed });
  233. });
  234. },
  235. insert(cb) {
  236. console.log("Inserting data");
  237. var imagesApi = site.plugins.assets.apis.images;
  238. var blogApi = site.plugins.blog.apis[settings.blog_name];
  239. async.series({
  240. assets(cb) {
  241. console.log("Insert assets");
  242. let assets = flow.data.assets.processed;
  243. if (!assets.length) return cb(null);
  244. async.mapLimit(assets, 5, (asset, cb) => {
  245. imagesApi.insert(asset, (err, asset) => {
  246. if (err) return cb(err);
  247. cb(null, asset);
  248. });
  249. }, (error, assets) => {
  250. if (error) return cb(error);
  251. flow.data.assets.inserted = assets;
  252. console.log("Assets inserted")
  253. cb(null, assets);
  254. });
  255. },
  256. posts(cb) {
  257. console.log("Insert posts");
  258. let posts = flow.data.posts.processed;
  259. if (!posts.length) return cb(null);
  260.  
  261. let assets = flow.data.assets.inserted;
  262.  
  263. let anchorimage = /<a(?:[^>]+?)?href="(http:\/\/manitobahot.com\/[^"]+?[\.jpg|\.png|\.gif])"(?:[^>]+?)?><img(?:[^>]+?)?class="((?:[^>]+?)?wp-image-([0-9]+)(?:[^>]+?)?)"(?:[^>]+?)?src="(http:\/\/manitobahot\.com\/[^>]+?)"(?:[^>]+?)?(?:alt="([^>]+?)")?(?:[^>]+?)(width="[0-9]+?" height="[0-9]+?")(?:[^>]+?)?>/ig
  264. let inlineimage = /<img(?:[^>]+?)?class="((?:[^>]+?)?wp-image-([0-9]+)(?:[^>]+?)?)"(?:[^>]+?)?src="(http:\/\/manitobahot\.com\/[^>]+?)"(?:[^>]+?)?(?:alt="([^>]+?)")?(?:[^>]+?)(width="[0-9]+?" height="[0-9]+?")(?:[^>]+?)?>/ig
  265. let attachmentimage = /<img(?:[^>]+?)data-attachment-id="([^"]+?)"(?:[^>]+?)?src="(http:\/\/i[0-9]\.wp\.com\/[^>]+?)"(?:[^>]+?)?alt="([^]+?)"(?:[^>]+?)?style="([^>]+?)"(?:[^>]+?)?>/ig
  266. let anchors = /<a(?:[^>]+?)?(?:href="http:\/\/manitobahot\.com\/)(?:[^>]+?)?>(.+?)<\/a>/ig
  267.  
  268. posts.forEach(function(post) {
  269. if (post.image_id) {
  270. let image = assets.find(asset => asset.legacy_id === post.image_id);
  271. if (image) {
  272. post.image_id = image._id;
  273. } else {
  274. console.log(`[ImageAssignment] No image? ${post.image_id}`);
  275. delete post.image_id;
  276. }
  277. }
  278. if (post.author_id) {
  279. async.series({
  280. author(cb) {
  281. blogApi.authors.find({ legacy_id: post.author_id }, {}, function(err, docs) {
  282. if (err) { return cb(err); }
  283. cb(null, docs);
  284. });
  285. }
  286. }, function(err, res) {
  287. console.log(res.author[0]._id);
  288. post.author_id = res.author[0]._id;
  289. });
  290. }
  291. if (post.categories_ids) {
  292. async.series({
  293. categories(cb) {
  294. blogApi.categories.find({}, {}, function(err, docs) {
  295. if (err) { return cb(err); }
  296. cb(null, docs);
  297. });
  298. }
  299. }, function(err, res) {
  300. var cats = [];
  301. res.categories.forEach(function(c) {
  302. if (post.categories_ids.includes(c.legacy_id)) {
  303. cats.push(c._id);
  304. }
  305. });
  306. post.categories_ids = cats;
  307. });
  308. }
  309. if (post.tags_ids) {
  310. async.series({
  311. tags(cb) {
  312. blogApi.tags.find({}, {}, function(err, docs) {
  313. if (err) { return cb(err); }
  314. cb(null, docs);
  315. });
  316. }
  317. }, function(err, res) {
  318. var tags = [];
  319. res.tags.forEach(function(t) {
  320. if (post.tags_ids.includes(t.legacy_id)) {
  321. tags.push(t._id);
  322. }
  323. });
  324. post.tags_ids = tags;
  325. });
  326. }
  327. if (anchorimage.test(post.description)) {
  328. post.description = post.description.replace(anchorimage, (whole, href, imgclass, id, src, alt, dimensions) => {
  329. let asset = assets.find(asset => asset.legacy_id === String(id));
  330. if (asset) {
  331. let newsrc = asset.resource.getUrl();
  332. return '<a href="' + href + '"><img class="' + imgclass + '" src="' + newsrc + '" alt="' + alt + '" ' + dimensions + '>';
  333. }
  334. console.log(`[anchorimage] could not find asset with legacy_id ${id} for post with legacy_id ${post.legacy_id} - ${post.title}`);
  335. return whole;
  336. });
  337. }
  338. if (inlineimage.test(post.description)) {
  339. post.description = post.description.replace(inlineimage, (whole, imgclass, id, src, alt, dimensions) => {
  340. let asset = assets.find(asset => asset.legacy_id === String(id));
  341. if (asset) {
  342. let newsrc = asset.resource.getUrl();
  343. return '<img class="' + imgclass + '" src="' + newsrc + '" alt="' + alt + '" ' + dimensions + '>';
  344. }
  345. console.log(`[inlineimage] could not find asset with legacy_id ${id} for post with legacy_id ${post.legacy_id} - ${post.title}`);
  346. return whole;
  347. });
  348. }
  349. if (attachmentimage.test(post.description)) {
  350. post.description = post.description.replace(attachmentimage, (whole, id, src, alt, style) => {
  351. let asset = assets.find(asset => asset.legacy_id === String(id));
  352. if (asset) {
  353. let newsrc = asset.resource.getUrl();
  354. return '<img data-attachment-id="' + id + '" src="' + newsrc + '" alt="' + alt + '" style="' + style + '">';
  355. }
  356. console.log(`[attachmentimage] could not find asset with legacy_id ${id} for post with legacy_id ${post.legacy_id} - ${post.title}`);
  357. return whole;
  358. });
  359. }
  360. if (anchors.test(post.description)) {
  361. post.description = post.description.replace(anchors, (whole, inner) => {
  362. return inner;
  363. });
  364. }
  365. });
  366. console.log("INSERT!!!!!!");
  367. blogApi.posts.insert(posts, (err, posts) => {
  368. if (err) return cb(err);
  369. flow.data.posts.inserted = posts;
  370. console.log("Posts inserted");
  371. cb(null, posts);
  372. });
  373. }
  374. }, (err, results) => {
  375. if (err) return cb(err);
  376. cb(null, results);
  377. });
  378. }
  379. }, (err, data) => {
  380. if (err) return cb(err);
  381. //console.log('data', data);
  382. console.log('import done');
  383. cb(null, data);
  384. });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement