Advertisement
Guest User

Untitled

a guest
Jan 16th, 2018
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.51 KB | None | 0 0
  1. var mongolayer = require("/sv/node_modules/npm/mongolayer/1/node_modules/mongolayer/");
  2. var asyncLib = require("/sv/node_modules/sv/asyncLib/1/");
  3. var request = require("request");
  4. var async = require("/sv/node_modules/npm/async/1/node_modules/async/");
  5.  
  6. console.log('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n');
  7.  
  8. let settings = {
  9. blog_name : 'leisure_blog',
  10. api : {
  11. posts : 'http://manitobahot.com/wp-json/wp/v2/posts',
  12. media : 'http://manitobahot.com/wp-json/wp/v2/media',
  13. categories : 'http://manitobahot.com/wp-json/wp/v2/categories',
  14. tags : 'http://manitobahot.com/wp-json/wp/v2/tags'
  15. },
  16. tag : new mongolayer.ObjectId('5a5eb261bef518101cf78f2e')
  17. };
  18.  
  19. let convertAscii = function(str) {
  20. return str
  21. .replace(/&/g, "&")
  22. .replace(/&/g, "&")
  23. .replace(/&lt;/g, "<")
  24. .replace(/&gt;/g, ">")
  25. .replace(/&quot;/g, "\"")
  26. .replace(/&#8211;/g, "–")
  27. .replace(/&#8212;/g, "—")
  28. .replace(/&#8216;/g, "‘")
  29. .replace(/&#8217;/g, "’")
  30. .replace(/&#8218;/g, "‚")
  31. .replace(/&#8220;/g, "“")
  32. .replace(/&#8221;/g, "”")
  33. .replace(/&#8222;/g, "„")
  34. .replace(/&#8226;/g, "•")
  35. .replace(/&#8230;/g, "…")
  36. .replace(/&#8482;/g, "™")
  37. .replace(/&#x1f341;/g, "🍁")
  38. .replace(/&#x1f384;/g, "🎄")
  39. .replace(/&#x1f480;/g, "💀")
  40. .replace(/&#x1f47b;/g, "👻")
  41. .replace(/&#x1f342;/g, "🍂")
  42. .replace(/&#x1f33d;/g, "🌽")
  43. .replace(/&#x1f353;/g, "🍓")
  44. .replace(/&#x1f345;/g, "🍅")
  45. .replace(/&#x1f308;/g, "🌈")
  46. .replace(/&#x1f3f3;/g, "🏳")
  47. .replace(/&#x1f8f8;/g, "")
  48. .replace(/&#x1f1ee;/g, "")
  49. .replace(/\<br\>/g, "")
  50. };
  51.  
  52. let flow = new asyncLib.Flow({ timers : true });
  53. flow.series({
  54. posts(cb) {
  55. console.log("Collecting posts");
  56. let posts = [];
  57. let options = {
  58. url : settings.api.posts,
  59. qs : {
  60. page : 1,
  61. per_page : 2,
  62. "_embed" : 1
  63. },
  64. json : true
  65. };
  66. let paginate = () => {
  67. request(options, (err, response, body) => {
  68. if (err) return cb(err);
  69. posts = posts.concat(body);
  70. // if (body.length === 100) {
  71. // options.qs.page = options.qs.page + 1;
  72. // return paginate();
  73. // }
  74. process();
  75. });
  76. };
  77. let process = () => {
  78. let raw = posts;
  79. let processed = raw.map(post => {
  80. let categories_ids = post.categories.map(category => String(category));
  81. let tags_ids = post.tags.map(tag => String(tag));
  82. let title = convertAscii(post.title.rendered);
  83. let slug = post.slug.replace(/[^a-zA-Z0-9-]/ig, '');
  84. return {
  85. blog : settings.blog_name,
  86. author_id : String(post.author),
  87. image_id : String(post.featured_media || 0),
  88. categories_ids : categories_ids,
  89. title : title,
  90. slug : slug,
  91. description : post.content.rendered,
  92. enabled : true,
  93. active : true,
  94. enable_comments : (post.comment_status === "open") ? true : false,
  95. publish_start : new Date(post.date),
  96. created : new Date(post.date),
  97. updated : new Date(post.modified),
  98. tags_ids : tags_ids,
  99. cms_tags_ids : [settings.tag],
  100. legacy_id : String(post.id)
  101. };
  102. });
  103. raw = raw;
  104. processed = processed;
  105. cb(null, { raw : raw, processed : processed });
  106. };
  107. paginate();
  108. },
  109. assets(cb) {
  110. console.log("Collecting assets");
  111. async.series({
  112. attachments(cb) {
  113. let options = {
  114. url : settings.api.media,
  115. qs : {
  116. page : 1,
  117. per_page : 100
  118. },
  119. json : true
  120. };
  121. let paginate = (post, cb) => {
  122. let postid = Number(post.legacy_id);
  123. options.qs.parent = postid;
  124. request(options, (err, response, body) => {
  125. if (err) return cb(err);
  126. if (body.length === 100) {
  127. options.qs.page = options.qs.page + 1;
  128. return paginate(postid, cb);
  129. }
  130. cb(null, body);
  131. });
  132. };
  133. async.mapLimit(flow.data.posts.processed, 5, (post, cb) => {
  134. paginate(post, cb);
  135. }, (error, assets) => {
  136. if (error) return cb(error);
  137. cb(null, assets);
  138. });
  139. },
  140. contents(cb) {
  141. let assetids = [];
  142. let attachment = /<div(?:[^>]+?)?id="attachment_([0-9]+)"(?:[^>]+?)?>/ig
  143. let inlineimage = /<img(?:[^>]+?)?class="(?:[^>]+?)?wp-image-([0-9]+)(?:[^>]+?)?"(?:[^>]+?)?src="http:\/\/manitobahot\.com\/(?:[^>]+?)?>/ig
  144. let attachmentimage = /<img(?:[^>]+?)?data-attachment-id="([0-9]+)"(?:[^>]+?)?>/ig
  145. flow.data.posts.processed.forEach(post => {
  146. if (attachment.test(post.description)) {
  147. post.description.replace(attachment, (img, id) => {
  148. assetids.push(id);
  149. });
  150. }
  151. if (inlineimage.test(post.description)) {
  152. post.description.replace(inlineimage, (img, id) => {
  153. assetids.push(id);
  154. });
  155. }
  156. if (attachmentimage.test(post.description)) {
  157. post.description.replace(attachmentimage, (img, id) => {
  158. assetids.push(id);
  159. });
  160. }
  161. });
  162. if (assetids.length > 0) {
  163. async.mapLimit(assetids, 5, (id, cb) => {
  164. let options = {
  165. url : settings.api.media + '/' + id,
  166. json : true
  167. };
  168. request(options, (err, response, body) => {
  169. if (err) return cb(err);
  170. if (response.statusCode === 403) {
  171. console.log(`[Assets] Forbidden to collect media asset (${id})`);
  172. }
  173. cb(null, body);
  174. });
  175. }, (error, assets) => {
  176. if (error) return cb(error);
  177. cb(null, assets);
  178. });
  179. }
  180. }
  181. }, (err, combined) => {
  182. if (err) return cb(err);
  183. let unique = {};
  184. Object.keys(combined).forEach(key => {
  185. combined[key].forEach(asset => {
  186. if (Array.isArray(asset)) {
  187. if (asset.length > 0) {
  188. asset.forEach(asset => {
  189. if (asset.title !== undefined) {
  190. unique[asset.id] = asset;
  191. } else {
  192. console.log('[Asset Array] Failed asset:');
  193. console.log(asset);
  194. }
  195. });
  196. }
  197. } else {
  198. if (asset.title !== undefined) {
  199. unique[asset.id] = asset;
  200. } else {
  201. console.log('[Asset Object] Failed asset:');
  202. console.log(asset);
  203. }
  204. }
  205. });
  206. });
  207. let raw = Object.keys(unique).map(key => unique[key]);
  208. let processed = raw.map(asset => {
  209. if (asset.legacy_id == 101386) {
  210. console.log(asset);
  211. }
  212. let title = (asset.title.rendered !== "") ? asset.title.rendered : 'NO TITLE ' + asset.source_url;
  213. return {
  214. title : title,
  215. title_sort : title,
  216. imageurl : asset.source_url,
  217. notes : asset.description || undefined,
  218. created : asset.date ? new Date(asset.date) : undefined,
  219. content_owner : 'default',
  220. tags_ids : [settings.tag],
  221. legacy_id : String(asset.id)
  222. };
  223. });
  224. cb(null, { raw : raw, processed : processed });
  225. });
  226. },
  227. insert(cb) {
  228. console.log("Inserting data");
  229. var imagesApi = site.plugins.assets.apis.images;
  230. var blogApi = site.plugins.blog.apis[settings.blog_name];
  231. async.series({
  232. assets(cb) {
  233. console.log("Insert assets");
  234. let assets = flow.data.assets.processed;
  235. if (!assets.length) return cb(null);
  236. async.mapLimit(assets, 5, (asset, cb) => {
  237. imagesApi.insert(asset, (err, asset) => {
  238. if (err) return cb(err);
  239. cb(null, asset);
  240. });
  241. }, (error, assets) => {
  242. if (error) return cb(error);
  243. flow.data.assets.inserted = assets;
  244. console.log("Assets inserted")
  245. cb(null, assets);
  246. });
  247. },
  248. posts(cb) {
  249. console.log("Insert posts");
  250. let posts = flow.data.posts.processed;
  251. if (!posts.length) return cb(null);
  252.  
  253. let assets = flow.data.assets.inserted;
  254.  
  255. let anchorimage = /<a(?:[^>]+?)?href="(http:\/\/manitobahot.com\/[^"]+?[\.jpg|\.png|\.gif])"(?:[^>]+?)?><img(?:[^>]+?)?class="((?:[^>]+?)?wp-image-([0-9]+)(?:[^>]+?)?)"(?:[^>]+?)?src="(http:\/\/manitobahot\.com\/[^>]+?)"(?:[^>]+?)?(?:alt="([^>]+?)")?(?:[^>]+?)(width="[0-9]+?" height="[0-9]+?")(?:[^>]+?)?>/ig
  256. let inlineimage = /<img(?:[^>]+?)?class="((?:[^>]+?)?wp-image-([0-9]+)(?:[^>]+?)?)"(?:[^>]+?)?src="(http:\/\/manitobahot\.com\/[^>]+?)"(?:[^>]+?)?(?:alt="([^>]+?)")?(?:[^>]+?)(width="[0-9]+?" height="[0-9]+?")(?:[^>]+?)?>/ig
  257. let attachmentimage = /<img(?:[^>]+?)data-attachment-id="([^"]+?)"(?:[^>]+?)?src="(http:\/\/i[0-9]\.wp\.com\/[^>]+?)"(?:[^>]+?)?alt="([^]+?)"(?:[^>]+?)?style="([^>]+?)"(?:[^>]+?)?>/ig
  258. let anchors = /<a(?:[^>]+?)?(?:href="http:\/\/manitobahot\.com\/)(?:[^>]+?)?>(.+?)<\/a>/ig
  259.  
  260. posts.forEach(function(post) {
  261. if (post.image_id) {
  262. let image = assets.find(asset => asset.legacy_id === post.image_id);
  263. if (image) {
  264. post.image_id = image._id;
  265. } else {
  266. console.log(`[ImageAssignment] No image? ${post.image_id}`);
  267. delete post.image_id;
  268. }
  269. }
  270. if (post.author_id) {
  271. async.series({
  272. author(cb) {
  273. blogApi.authors.find({ legacy_id: post.author_id }, {}, function(err, docs) {
  274. if (err) { return cb(err); }
  275. cb(null, docs);
  276. });
  277. }
  278. }, function(err, res) {
  279. console.log(res.author[0]._id);
  280. post.author_id = res.author[0]._id;
  281. });
  282. }
  283. if (post.categories_ids) {
  284. async.series({
  285. categories(cb) {
  286. blogApi.categories.find({}, {}, function(err, docs) {
  287. if (err) { return cb(err); }
  288. cb(null, docs);
  289. });
  290. }
  291. }, function(err, res) {
  292. var cats = [];
  293. res.categories.forEach(function(c) {
  294. if (post.categories_ids.includes(c.legacy_id)) {
  295. cats.push(c._id);
  296. }
  297. });
  298. post.categories_ids = cats;
  299. });
  300. }
  301. if (post.tags_ids) {
  302. async.series({
  303. tags(cb) {
  304. blogApi.tags.find({}, {}, function(err, docs) {
  305. if (err) { return cb(err); }
  306. cb(null, docs);
  307. });
  308. }
  309. }, function(err, res) {
  310. var tags = [];
  311. res.tags.forEach(function(t) {
  312. if (post.tags_ids.includes(t.legacy_id)) {
  313. tags.push(t._id);
  314. }
  315. });
  316. post.tags_ids = tags;
  317. });
  318. }
  319. if (anchorimage.test(post.description)) {
  320. post.description = post.description.replace(anchorimage, (whole, href, imgclass, id, src, alt, dimensions) => {
  321. let asset = assets.find(asset => asset.legacy_id === String(id));
  322. if (asset) {
  323. let newsrc = asset.resource.getUrl();
  324. return '<a href="' + href + '"><img class="' + imgclass + '" src="' + newsrc + '" alt="' + alt + '" ' + dimensions + '>';
  325. }
  326. console.log(`[anchorimage] could not find asset with legacy_id ${id} for post with legacy_id ${post.legacy_id} - ${post.title}`);
  327. return whole;
  328. });
  329. }
  330. if (inlineimage.test(post.description)) {
  331. post.description = post.description.replace(inlineimage, (whole, imgclass, id, src, alt, dimensions) => {
  332. let asset = assets.find(asset => asset.legacy_id === String(id));
  333. if (asset) {
  334. let newsrc = asset.resource.getUrl();
  335. return '<img class="' + imgclass + '" src="' + newsrc + '" alt="' + alt + '" ' + dimensions + '>';
  336. }
  337. console.log(`[inlineimage] could not find asset with legacy_id ${id} for post with legacy_id ${post.legacy_id} - ${post.title}`);
  338. return whole;
  339. });
  340. }
  341. if (attachmentimage.test(post.description)) {
  342. post.description = post.description.replace(attachmentimage, (whole, id, src, alt, style) => {
  343. let asset = assets.find(asset => asset.legacy_id === String(id));
  344. if (asset) {
  345. let newsrc = asset.resource.getUrl();
  346. return '<img data-attachment-id="' + id + '" src="' + newsrc + '" alt="' + alt + '" style="' + style + '">';
  347. }
  348. console.log(`[attachmentimage] could not find asset with legacy_id ${id} for post with legacy_id ${post.legacy_id} - ${post.title}`);
  349. return whole;
  350. });
  351. }
  352. if (anchors.test(post.description)) {
  353. post.description = post.description.replace(anchors, (whole, inner) => {
  354. return inner;
  355. });
  356. }
  357. });
  358. console.log("INSERT!!!!!!");
  359. blogApi.posts.insert(posts, (err, posts) => {
  360. if (err) return cb(err);
  361. flow.data.posts.inserted = posts;
  362. console.log("Posts inserted");
  363. cb(null, posts);
  364. });
  365. }
  366. }, (err, results) => {
  367. if (err) return cb(err);
  368. cb(null, results);
  369. });
  370. }
  371. }, (err, data) => {
  372. if (err) return cb(err);
  373. //console.log('data', data);
  374. console.log('import done');
  375. cb(null, data);
  376. });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement