Advertisement
Guest User

Untitled

a guest
Dec 7th, 2016
155
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.85 KB | None | 0 0
  1. /**
  2. * Created by ASUS-PC on 11/8/2016.
  3. */
  4. var FeedParser = require('feedparser'),
  5. request = require('request'),
  6. http = require('http'),
  7. https = require('https'),
  8. striptags = require('striptags'),
  9. sizeOf = require('image-size'),
  10. size = require('request-image-size'),
  11. Slack = require('slack-node'),
  12. mysql = require('mysql'),
  13. crypto = require('crypto'),
  14. Emitter = require('tiny-emitter'),
  15. errors = []
  16. ;
  17.  
  18. var emitter = new Emitter();
  19.  
  20. //this is for slack
  21. //webhookUri = "https://hooks.slack.com/services/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";
  22. webhookUri = "";
  23.  
  24. slack = new Slack();
  25. slack.setWebhook(webhookUri);
  26. // end slack
  27.  
  28. // setting mysql config
  29. var connection = mysql.createConnection({
  30. host: "localhost",
  31. user: "root",
  32. password: '',
  33. //user: "intrafeed",
  34. //password: 'Palmerah$$$i9i7',
  35. database: "intrafeed",
  36. charset: 'utf8mb4'
  37. //waitForConnections : true
  38. // multipleStatements: true
  39. });
  40.  
  41. connection.connect(function (err) {
  42. if (err) {
  43. console.log('Error connecting to Db');
  44. return;
  45. }
  46. console.log('Connection established');
  47. });
  48.  
  49. var intrafeedLinks = "https://intrafeed.co:9000/apit/rssChannels";
  50.  
  51. /*
  52. End settings and params
  53. */
  54.  
  55. //fetch('http://rss.detik.com/index.php/detikcom_nasional', 63, '416a5423119a4ec081ca4dd4d948f27f');
  56.  
  57. start();
  58. //5 minutes
  59. setInterval(function () {
  60. start();
  61. }, 5 * 60 * 1000);
  62.  
  63.  
  64. function start() {
  65. var channels = null;
  66.  
  67. request({url: intrafeedLinks, json: true}, function (error, response, body) {
  68. if (!error && response.statusCode == 200) {
  69. console.log(body); // Show the HTML for the Google homepage.
  70. channels = body.channels;
  71. for (i = 0; i < channels.length; i++) {
  72.  
  73. //fetch link
  74. //fetch(channels[i].link, channels[i].channelid, channels[i].token);
  75. emitter.emit('fetch', channels[i].link, channels[i].channelid, channels[i].token);
  76. }
  77. }
  78. });
  79. }
  80.  
  81. emitter.on("fetch", function (link, channelId, channelToken) {
  82. fetch(link, channelId, channelToken);
  83. });
  84.  
  85. function fetch(link, channelId, channelToken) {
  86. var req = request(link);
  87. //var feedparser = new FeedParser();
  88. var feedparser = new FeedParser({addmeta: false, resume_saxerror: false});
  89.  
  90. req.on('error', function (error) {
  91. // handle any request errors
  92. });
  93.  
  94. var itemArray = [];
  95. var countItemArray = 0;
  96.  
  97. req.on('response', function (res) {
  98. var stream = this;
  99.  
  100. if (res.statusCode != 200) {
  101. var txt = "Error Request status Code " + res.statusCode + " link : " + link + " channelid :" + channelId + " error result: " + err;
  102. if (isNaN(errors[channelId])) {
  103. errors[channelId] = 1;
  104. } else {
  105. errors[channelId]++;
  106. }
  107. if (errors[channelId] >= 10) {
  108. slack.webhook({
  109. text: txt
  110. }, function (errs, response) {
  111. // console.log(response);
  112. });
  113. errors[channelId] = 0;
  114. }
  115. console.log("errors counter : " + channelId + " " + errors[channelId]);
  116.  
  117. return this.emit('error', new Error('Bad status code'));
  118. }
  119.  
  120. stream.pipe(feedparser);
  121. });
  122.  
  123. feedparser.on('error', function (err) {
  124. var txt = "Error in feed parser module " + link + " channelid :" + channelId + " error result: " + err;
  125. console.log(txt);
  126. if (isNaN(errors[channelId])) {
  127. errors[channelId] = 1;
  128. } else {
  129. errors[channelId]++;
  130. }
  131.  
  132. if (errors[channelId] >= 10) {
  133. slack.webhook({
  134. text: txt
  135. }, function (errs, response) {
  136. // console.log(response);
  137. });
  138. errors[channelId] = 0;
  139. }
  140. console.log("errors counter : " + channelId + " " + errors[channelId]);
  141. });
  142.  
  143. feedparser.on('readable', function () {
  144. // This is where the action is!
  145. var stream = this
  146. , meta = this.meta // **NOTE** the "meta" is always available in the context of the feedparser instance
  147. , item;
  148.  
  149. while (item = stream.read()) {
  150. ///store(item, link, channelId, channelToken);
  151.  
  152. if (item.date == null) {
  153. item.date = new Date().toString();
  154. // console.log("date is :"+item.date);
  155. // return;
  156. }
  157.  
  158. if (item.pubDate == null) {
  159. item.pubDate = new Date().toString();
  160. // console.log("date is :"+item.date);
  161. //return;
  162. }
  163.  
  164. item.date = new Date(item.date).getTime();
  165.  
  166. //console.log("date is :"+item.date);
  167.  
  168. itemArray.push(item);
  169. //console.log(" -------++----------"+ item +"==============");
  170. //emitter.emit('store', item, link, channelId, channelToken);
  171. countItemArray++;
  172. }
  173. });
  174. // promise start
  175.  
  176. feedparser.on("end", function () {
  177. console.log(" ++++++++" + link + " total : " + countItemArray + "+++++++++");
  178.  
  179. var byDate = itemArray.slice(0);
  180. byDate.sort(function (a, b) {
  181. var x = a.date;
  182. var y = b.date;
  183. return x < y ? -1 : x > y ? 1 : 0;
  184. });
  185.  
  186. //fechingAll
  187. fetchingAll(byDate, link, channelId, channelToken);
  188.  
  189. });
  190. }
  191. function fetchingAll(item, link, channelId, channelToken) {
  192. var sequence = Promise.resolve();
  193.  
  194. item.forEach(function (index, value) {
  195. var readStore = store(index, link, channelId, channelToken);
  196. console.log(readStore);
  197. var asyn = asyncImage(readStore.data);
  198. mysqlSave(link, channelId, channelToken, asyn);
  199.  
  200.  
  201. });
  202.  
  203.  
  204. /*item.forEach(function (index, value) {
  205. readStore = store(index, link, channelId, channelToken);
  206. sequence = sequence.then(function () {
  207. var size = require('request-image-size');
  208. size(readStore.data.imagelink, function (err, dimensions, length) {
  209. if (!err) {
  210. readStore.data.imagewidth = dimensions.width;
  211. readStore.data.imageheight = dimensions.height;
  212. //value.data.createdDate = createdDate;
  213.  
  214. console.log("insert image size, width:" + readStore.data.imagewidth + " | height: " + readStore.data.imageheight);
  215.  
  216. return readStore
  217. }
  218. });
  219.  
  220. }).then(function () {
  221.  
  222. console.log("valux" + readStore);
  223.  
  224. return mysqlSave(readStore.link, readStore.channelId, readStore.channelToken, readStore.data)
  225.  
  226. }).
  227.  
  228. catch(function (err) {
  229. console.log(err + ' failed to load!')
  230. })
  231. })*/
  232. }
  233.  
  234.  
  235. function asyncImage(data) {
  236. var quote;
  237. return new Promise(function (resolve, reject) {
  238. size(readStore.data.imagelink, function (err, dimensions, length) {
  239. if (error) return reject(error);
  240. data.imagewidth = dimensions.width;
  241. data.imageheight = dimensions.height;
  242. //value.data.createdDate = createdDate;
  243.  
  244. console.log("insert image size, width:" + data.imagewidth + " | height: " + data.imageheight);
  245.  
  246. resolve(data);
  247. });
  248.  
  249. });
  250. }
  251.  
  252. // promise end
  253. emitter.on('store', function (item, link, channelId, channelToken) {
  254. store(item, link, channelId, channelToken);
  255. });
  256.  
  257. function store(item, link, channelId, channelToken) {
  258.  
  259. var linkOri = link;
  260. var linkItem = item.link;
  261. var title = item.title;
  262. var description = item.description;
  263. var pubDate = item.date;
  264. /* var newDate = new Date(pubDate);
  265. var tz = newDate.toString().match(/([-\+][0-9]+)\s/)[1];
  266. var t = newDate.toJSON();
  267. t = t.substring(0, 19) + tz;
  268. t = t.replace("+", "-");*/
  269.  
  270. //console.log("pubDate is :" + pubDate + "link at" + link);
  271. /* if (pubDate == null) {
  272. pubDate = new Date().toString();
  273. }*/
  274. var createdDate = new Date(pubDate).toISOString().slice(0, 19).replace('T', ' ');
  275. //console.log(createdDate);
  276. var author = item.author || "";
  277. var imagewidth = 0;
  278. var imageheight = 0;
  279.  
  280. var isItemImage = item.image;
  281.  
  282. var itemImage = "";
  283. var imagelink = "a";
  284.  
  285. var linkhash = crypto.createHash('sha256').update(linkItem).digest('sha256').toString('hex');
  286.  
  287. if (isItemImage.hasOwnProperty("url") && isItemImage.url != null) {
  288. itemImage = isItemImage.url;
  289. }
  290. if (itemImage.length > 0) {
  291. imagelink = itemImage;
  292. }
  293.  
  294. if (imagelink.length < 5) {
  295. imagelink = getImage(description);
  296. }
  297.  
  298. if (imagelink.length < 5) {
  299. if (item.hasOwnProperty("link") && item.link != null) {
  300. imagelink = item.link;
  301. }
  302. }
  303.  
  304.  
  305. //get from enclosures
  306. if (imagelink.length < 5) {
  307. if (item.hasOwnProperty("enclosures") && item.enclosures != null) {
  308. var itemEnclosures = item.enclosures;
  309. if (itemEnclosures.length > 0) {
  310. if (itemEnclosures[0].hasOwnProperty("url")) {
  311. imagelink = itemEnclosures[0].url;
  312. }
  313. }
  314. }
  315. }
  316.  
  317. //get from tag rss:image
  318. if (imagelink.length < 5) {
  319. if (item.hasOwnProperty("rss:image") && item["rss:image"] != null) {
  320. var imageRss = item["rss:image"];
  321. if (imageRss.hasOwnProperty("url")) {
  322. var imageRssUrl = imageRss["url"];
  323. if (imageRssUrl.hasOwnProperty("#")) {
  324. var imageRssUrlCrash = imageRssUrl["#"];
  325. if (imageRssUrlCrash.length > 1) {
  326. imagelink = imageRss
  327. } else {
  328. var meta = item.meta;
  329. if (meta.hasOwnProperty("image") && meta.image != null) {
  330. var metaImage = meta.image;
  331. if (metaImage.hasOwnProperty("url")) {
  332. imagelink = meta["image"]["url"];
  333. }
  334. }
  335. }
  336. }
  337. }
  338.  
  339. }
  340. }
  341.  
  342. //item meta image url
  343. if (imagelink.length < 5) {
  344. var meta = item.meta;
  345. if (meta != null) {
  346. if (meta.hasOwnProperty("image")) {
  347. var metaImage = meta.image;
  348. if (metaImage.hasOwnProperty("url")) {
  349. imagelink = meta["image"]["url"];
  350. }
  351. }
  352. }
  353. }
  354.  
  355. //get from tag media:content
  356. if (imagelink < 5) {
  357. if (item.hasOwnProperty("media:content") && item["media:content"] != null) {
  358. var mediaContent = item["media:content"];
  359. imagelink = mediaContent.getAttribute("url");
  360. }
  361. }
  362.  
  363. if (imagelink < 5) {
  364. if (item.hasOwnProperty("content:encoded")) {
  365. var contentEncoded = item["content:encoded"];
  366. if (contentEncoded.hasOwnProperty("#")) {
  367. var contentCrash = contentEncoded["#"];
  368. imagelink = getImage(contentCrash);
  369. }
  370. }
  371. }
  372.  
  373.  
  374. if (imagelink.length > 5) {
  375. imagelink = imagelink.replace("<![CDATA[", "").replace("]]>", "");
  376. imagelink = imagelink.replace("'", "").replace('"', "");
  377. imagelink = imagelink.replace("'", "");
  378. imagelink = imagelink.replace('"', "");
  379. imagelink = imagelink.replace("&quot;", "");
  380. imagelink = striptags(imagelink); /// start
  381.  
  382.  
  383. }
  384.  
  385.  
  386. var data = {
  387. title: title,
  388. description: striptags(description),
  389. link: linkItem,
  390. imagelink: imagelink,
  391. imagewidth: imagewidth || 0,
  392. imageheight: imageheight || 0,
  393. createdDate: createdDate,
  394. channelid: channelId,
  395. username: author,
  396. linkhash: linkhash,
  397. active: 1
  398. };
  399.  
  400.  
  401. return {link: link, channelId: channelId, channelToken: channelToken, data: data};
  402.  
  403. }
  404.  
  405.  
  406. emitter.on('mysql-save', function (link, channelId, channelToken, data) {
  407. // var sql2 = '⁠⁠⁠insert into feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username) SELECT * FROM (SELECT ? as f1, ? as f2, ? as f3, ? as f4, 125 as f5,125 as f6,84 as f7,1 as f8, ? as f9, ? as f10) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? and linkhash= ?) LIMIT 1;';
  408. mysqlSave(link, channelId, channelToken, data);
  409. });
  410.  
  411. function mysqlSave(link, channelId, channelToken, data) {
  412. var id = 0;
  413. var results;
  414. var sql = ' INSERT INTO feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username,createdDate) SELECT * FROM (SELECT ? AS f1,? AS f2, ? AS f3,? AS f4,? AS f5,? AS f6,? AS f7,? AS f8, ? AS f9, ? as f10, ? as f11) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? AND linkhash= ?) LIMIT 1';
  415.  
  416. var arrayValue = [data.title, data.description, data.link, data.imagelink, data.imagewidth, data.imageheight, channelId, 1, data.linkhash, data.username, data.createdDate, channelId, data.linkhash];
  417. var query = connection.query(sql, arrayValue, function (errs, resulr) {
  418. var txt = data.title + " " + data.link + " " + data.description;
  419. if (errs) {
  420. console.log("error dibagian:" + errs);
  421.  
  422. slack.webhook({
  423. text: txt + errs.toString()
  424. }, function (errs, response) {
  425. // console.log(response);
  426. });
  427. }
  428.  
  429.  
  430. });
  431.  
  432. }
  433.  
  434.  
  435. function getImage(string) {
  436. if (string != null) {
  437. string = string.replace("<![CDATA[", "").replace("]]>", "");
  438. string = string.replace("CDATA", "");
  439. string = string.replace("<figure>", "");
  440. string = string.replace("</figure>", "");
  441. string = string.replace("\n", "");
  442. string = string.replace("\t", "");
  443.  
  444. var re = /<img[^>]+src="?([^"\s]+)"?[^>]*\/>/g;
  445. var rex = /\ssrc=(?:(?:'([^']*)')|(?:"([^"]*)")|([^\s]*))/i; // match src='a' OR src="a" OR src=a
  446. var results = null;
  447. var img = "";
  448.  
  449. results = re.exec(string);
  450. if (results) {
  451. img = results[1];
  452. if (img.match(/.js/)) {
  453. var res = string.match(rex);
  454. if (res != null) {
  455. img = res[1] || res[2] || res[3]; // get the one that matched
  456. }
  457. }
  458.  
  459. } else {
  460. var res = string.match(rex);
  461. if (res != null) {
  462. img = res[1] || res[2] || res[3]; // get the one that matched
  463. }
  464. }
  465.  
  466. return img;
  467. } else {
  468. return "";
  469. }
  470.  
  471. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement