Advertisement
Guest User

intra

a guest
Nov 24th, 2016
135
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.28 KB | None | 0 0
  1. /**
  2. * Created by ASUS-PC on 11/8/2016.
  3. */
  4. var FeedParser = require('feedparser'),
  5. request = require('request'),
  6. http = require('http'),
  7. https = require('https'),
  8. striptags = require('striptags'),
  9. size = require('request-image-size'),
  10. Slack = require('slack-node'),
  11. mysql = require('mysql'),
  12. crypto = require('crypto'),
  13. Emitter = require('tiny-emitter'),
  14. errors = []
  15. ;
  16.  
  17. var emitter = new Emitter();
  18.  
  19. //this is for slack
  20. //webhookUri = "https://hooks.slack.com/services/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";
  21. webhookUri = "";
  22.  
  23. slack = new Slack();
  24. slack.setWebhook(webhookUri);
  25. // end slack
  26.  
  27. // setting mysql config
  28. var connection = mysql.createConnection({
  29. host: "localhost",
  30. user: "root",
  31. password: '',
  32. //user: "intrafeed",
  33. //password: 'Palmerah$$$i9i7',
  34. database: "intrafeed",
  35. charset: 'utf8mb4'
  36. //waitForConnections : true
  37. // multipleStatements: true
  38. });
  39.  
  40. connection.connect(function (err) {
  41. if (err) {
  42. console.log('Error connecting to Db');
  43. return;
  44. }
  45. console.log('Connection established');
  46. });
  47.  
  48. var intrafeedLinks = "https://intrafeed.co:9000/apit/rssChannels";
  49.  
  50. /*
  51. End settings and params
  52. */
  53.  
  54. //fetch('http://rss.detik.com/index.php/detikcom_nasional', 63, '416a5423119a4ec081ca4dd4d948f27f');
  55.  
  56. start();
  57. //5 minutes
  58. setInterval(function () {
  59. start();
  60. }, 5 * 60 * 1000);
  61.  
  62.  
  63. function start() {
  64. var channels = null;
  65.  
  66. request({url: intrafeedLinks, json: true}, function (error, response, body) {
  67. if (!error && response.statusCode == 200) {
  68. console.log(body); // Show the HTML for the Google homepage.
  69. channels = body.channels;
  70. for (i = 0; i < channels.length; i++) {
  71.  
  72. //fetch link
  73. //fetch(channels[i].link, channels[i].channelid, channels[i].token);
  74. emitter.emit('fetch', channels[i].link, channels[i].channelid, channels[i].token);
  75. }
  76. }
  77. });
  78. }
  79.  
  80. emitter.on("fetch", function (link, channelId, channelToken) {
  81. fetch(link, channelId, channelToken);
  82. });
  83.  
  84. function fetch(link, channelId, channelToken) {
  85. var req = request(link);
  86. //var feedparser = new FeedParser();
  87. var feedparser = new FeedParser({addmeta: false, resume_saxerror: false});
  88.  
  89. req.on('error', function (error) {
  90. // handle any request errors
  91. });
  92.  
  93. var itemArray = [];
  94. var countItemArray = 0;
  95.  
  96. req.on('response', function (res) {
  97. var stream = this;
  98.  
  99. if (res.statusCode != 200) {
  100. var txt = "Error Request status Code " + res.statusCode + " link : " + link + " channelid :" + channelId + " error result: " + err;
  101. if (isNaN(errors[channelId])) {
  102. errors[channelId] = 1;
  103. } else {
  104. errors[channelId]++;
  105. }
  106. if (errors[channelId] >= 10) {
  107. slack.webhook({
  108. text: txt
  109. }, function (errs, response) {
  110. // console.log(response);
  111. });
  112. errors[channelId] = 0;
  113. }
  114. console.log("errors counter : " + channelId + " " + errors[channelId]);
  115.  
  116. return this.emit('error', new Error('Bad status code'));
  117. }
  118.  
  119. stream.pipe(feedparser);
  120. });
  121.  
  122. feedparser.on('error', function (err) {
  123. var txt = "Error in feed parser module " + link + " channelid :" + channelId + " error result: " + err;
  124. console.log(txt);
  125. if (isNaN(errors[channelId])) {
  126. errors[channelId] = 1;
  127. } else {
  128. errors[channelId]++;
  129. }
  130.  
  131. if (errors[channelId] >= 10) {
  132. slack.webhook({
  133. text: txt
  134. }, function (errs, response) {
  135. // console.log(response);
  136. });
  137. errors[channelId] = 0;
  138. }
  139. console.log("errors counter : " + channelId + " " + errors[channelId]);
  140. });
  141.  
  142. feedparser.on('readable', function () {
  143. // This is where the action is!
  144. var stream = this
  145. , meta = this.meta // **NOTE** the "meta" is always available in the context of the feedparser instance
  146. , item;
  147.  
  148. while (item = stream.read()) {
  149. ///store(item, link, channelId, channelToken);
  150.  
  151. if (item.date == null) {
  152. item.date = new Date().toString();
  153. // console.log("date is :"+item.date);
  154. // return;
  155. }
  156.  
  157. if (item.pubDate == null) {
  158. item.pubDate = new Date().toString();
  159. // console.log("date is :"+item.date);
  160. //return;
  161. }
  162.  
  163. item.date = new Date(item.date).getTime();
  164.  
  165. //console.log("date is :"+item.date);
  166.  
  167. itemArray.push(item);
  168. //console.log(" -------++----------"+ item +"==============");
  169. //emitter.emit('store', item, link, channelId, channelToken);
  170. countItemArray++;
  171. }
  172. });
  173. // promise
  174. // end promise
  175.  
  176. feedparser.on("end", function () {
  177. console.log(" ++++++++" + link + " total : " + countItemArray + "+++++++++");
  178.  
  179. var byDate = itemArray.slice(0);
  180. byDate.sort(function (a, b) {
  181. var x = a.date;
  182. var y = b.date;
  183. return x < y ? -1 : x > y ? 1 : 0;
  184. });
  185.  
  186. //buat fungsi baru fechingAll
  187. fetchingAll(byDate , link, channelId, channelToken);
  188.  
  189. });
  190. }
  191.  
  192. function fetchingAll(item, link, channelId, channelToken){
  193. var fetchpromises = item;
  194. var sequence = Promise.resolve()
  195.  
  196. fetchpromises.forEach(function(index,value){
  197. sequence = sequence.then(function(){
  198. return store(index, link, channelId, channelToken)
  199. //console.log(index)
  200. }).then(function(value){
  201. // success method
  202. //console.log(value + '.' + url + ' success to load!')
  203. mysqlSave(value.link, value.channelId, value.channelToken, value.data)
  204. //console.log(value);
  205.  
  206. }).catch(function(err){
  207. console.log(err + ' failed to load!')
  208. })
  209. })
  210. }
  211.  
  212. emitter.on('store', function (item, link, channelId, channelToken) {
  213. store(item, link, channelId, channelToken);
  214. });
  215.  
  216. function store(item, link, channelId, channelToken) {
  217.  
  218. var linkOri = link;
  219. var linkItem = item.link;
  220. var title = item.title;
  221. var description = item.description;
  222. var pubDate = item.date;
  223. /* var newDate = new Date(pubDate);
  224. var tz = newDate.toString().match(/([-\+][0-9]+)\s/)[1];
  225. var t = newDate.toJSON();
  226. t = t.substring(0, 19) + tz;
  227. t = t.replace("+", "-");*/
  228.  
  229. //console.log("pubDate is :" + pubDate + "link at" + link);
  230. /* if (pubDate == null) {
  231. pubDate = new Date().toString();
  232. }*/
  233. var createdDate = new Date(pubDate).toISOString().slice(0, 19).replace('T', ' ');
  234. //console.log(createdDate);
  235. var author = item.author || "";
  236. var imagewidth = 0;
  237. var imageheight = 0;
  238.  
  239. var isItemImage = item.image;
  240.  
  241. var itemImage = "";
  242. var imagelink = "a";
  243.  
  244. var linkhash = crypto.createHash('sha256').update(linkItem).digest('sha256').toString('hex');
  245.  
  246. if (isItemImage.hasOwnProperty("url") && isItemImage.url != null) {
  247. itemImage = isItemImage.url;
  248. }
  249. if (itemImage.length > 0) {
  250. imagelink = itemImage;
  251. }
  252.  
  253. if (imagelink.length < 5) {
  254. imagelink = getImage(description);
  255. }
  256.  
  257. if (imagelink.length < 5) {
  258. if (item.hasOwnProperty("link") && item.link != null) {
  259. imagelink = item.link;
  260. }
  261. }
  262.  
  263.  
  264. //get from enclosures
  265. if (imagelink.length < 5) {
  266. if (item.hasOwnProperty("enclosures") && item.enclosures != null) {
  267. var itemEnclosures = item.enclosures;
  268. if (itemEnclosures.length > 0) {
  269. if (itemEnclosures[0].hasOwnProperty("url")) {
  270. imagelink = itemEnclosures[0].url;
  271. }
  272. }
  273. }
  274. }
  275.  
  276. //get from tag rss:image
  277. if (imagelink.length < 5) {
  278. if (item.hasOwnProperty("rss:image") && item["rss:image"] != null) {
  279. var imageRss = item["rss:image"];
  280. if (imageRss.hasOwnProperty("url")) {
  281. var imageRssUrl = imageRss["url"];
  282. if (imageRssUrl.hasOwnProperty("#")) {
  283. var imageRssUrlCrash = imageRssUrl["#"];
  284. if (imageRssUrlCrash.length > 1) {
  285. imagelink = imageRss
  286. } else {
  287. var meta = item.meta;
  288. if (meta.hasOwnProperty("image") && meta.image != null) {
  289. var metaImage = meta.image;
  290. if (metaImage.hasOwnProperty("url")) {
  291. imagelink = meta["image"]["url"];
  292. }
  293. }
  294. }
  295. }
  296. }
  297.  
  298. }
  299. }
  300.  
  301. //item meta image url
  302. if (imagelink.length < 5) {
  303. var meta = item.meta;
  304. if (meta != null) {
  305. if (meta.hasOwnProperty("image")) {
  306. var metaImage = meta.image;
  307. if (metaImage.hasOwnProperty("url")) {
  308. imagelink = meta["image"]["url"];
  309. }
  310. }
  311. }
  312. }
  313.  
  314. //get from tag media:content
  315. if (imagelink < 5) {
  316. if (item.hasOwnProperty("media:content") && item["media:content"] != null) {
  317. var mediaContent = item["media:content"];
  318. imagelink = mediaContent.getAttribute("url");
  319. }
  320. }
  321.  
  322. if (imagelink < 5) {
  323. if (item.hasOwnProperty("content:encoded")) {
  324. var contentEncoded = item["content:encoded"];
  325. if (contentEncoded.hasOwnProperty("#")) {
  326. var contentCrash = contentEncoded["#"];
  327. imagelink = getImage(contentCrash);
  328. }
  329. }
  330. }
  331.  
  332. var data = {
  333. title: title,
  334. description: striptags(description),
  335. link: linkItem,
  336. imagelink: imagelink,
  337. imagewidth: imagewidth || 0,
  338. imageheight: imageheight || 0,
  339. createdDate: createdDate,
  340. channelid: channelId,
  341. username: author,
  342. linkhash: linkhash,
  343. active: 1
  344. };
  345.  
  346. if (imagelink.length > 5) {
  347. imagelink = imagelink.replace("<![CDATA[", "").replace("]]>", "");
  348. imagelink = imagelink.replace("'", "").replace('"', "");
  349. imagelink = imagelink.replace("'", "");
  350. imagelink = imagelink.replace('"', "");
  351. imagelink = imagelink.replace("&quot;", "");
  352. imagelink = striptags(imagelink);
  353.  
  354. if (imagelink.match(/.js/)) {
  355. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  356. //mysqlSave(link, channelId, channelToken, data);
  357. } else {
  358. size(imagelink, function (err, dimensions, length) {
  359. if (err) {
  360. slack.webhook({
  361. text: err
  362. }, function (errs, response) {
  363. // console.log(response);
  364. });
  365. return true;
  366. }
  367. if (!err) {
  368. if (dimensions != undefined) {
  369. imagewidth = dimensions.width;
  370. imageheight = dimensions.height;
  371. }
  372.  
  373. data = {
  374. title: title,
  375. description: striptags(description),
  376. link: linkItem,
  377. imagelink: imagelink,
  378. imagewidth: imagewidth || 0,
  379. imageheight: imageheight || 0,
  380. createdDate: createdDate,
  381. channelid: channelId,
  382. username: author,
  383. linkhash: linkhash,
  384. active: 1
  385. };
  386.  
  387. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  388. //mysqlSave(link, channelId, channelToken, data);
  389. }
  390.  
  391.  
  392. });
  393. }
  394.  
  395.  
  396. } else {
  397. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  398. //mysqlSave(link, channelId, channelToken, data);
  399.  
  400. }
  401.  
  402.  
  403. var fetch = {link:link,channelId:channelId,channelToken:channelToken,data:data};
  404. return fetch;
  405. }
  406.  
  407.  
  408. emitter.on('mysql-save', function (link, channelId, channelToken, data) {
  409. // var sql2 = '⁠⁠⁠insert into feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username) SELECT * FROM (SELECT ? as f1, ? as f2, ? as f3, ? as f4, 125 as f5,125 as f6,84 as f7,1 as f8, ? as f9, ? as f10) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? and linkhash= ?) LIMIT 1;';
  410. mysqlSave(link, channelId, channelToken, data);
  411. });
  412.  
  413. function mysqlSave(link, channelId, channelToken, data) {
  414. var sql = ' INSERT INTO feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username,createdDate) SELECT * FROM (SELECT ? AS f1,? AS f2, ? AS f3,? AS f4,? AS f5,? AS f6,? AS f7,? AS f8, ? AS f9, ? as f10, ? as f11) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? AND linkhash= ?) LIMIT 1';
  415.  
  416. var arrayValue = [data.title, data.description, data.link, data.imagelink, data.imagewidth, data.imageheight, channelId, 1, data.linkhash, data.username, data.createdDate, channelId, data.linkhash];
  417. var query = connection.query(sql, arrayValue, function (errs, resulr) {
  418. var txt = data.title + " " + data.link + " " + data.description;
  419. if (errs) {
  420. //console.log(errs.toString());
  421. //console.log(arrayValue.toString());
  422. //console.log(query);
  423.  
  424. slack.webhook({
  425. text: txt + errs.toString()
  426. }, function (errs, response) {
  427. // console.log(response);
  428. });
  429. }
  430. });
  431. }
  432.  
  433.  
  434. function getImage(string) {
  435. if (string != null) {
  436. string = string.replace("<![CDATA[", "").replace("]]>", "");
  437. string = string.replace("CDATA", "");
  438. string = string.replace("<figure>", "");
  439. string = string.replace("</figure>", "");
  440. string = string.replace("\n", "");
  441. string = string.replace("\t", "");
  442.  
  443. var re = /<img[^>]+src="?([^"\s]+)"?[^>]*\/>/g;
  444. var rex = /\ssrc=(?:(?:'([^']*)')|(?:"([^"]*)")|([^\s]*))/i; // match src='a' OR src="a" OR src=a
  445. var results = null;
  446. var img = "";
  447.  
  448. results = re.exec(string);
  449. if (results) {
  450. img = results[1];
  451. if (img.match(/.js/)) {
  452. var res = string.match(rex);
  453. if (res != null) {
  454. img = res[1] || res[2] || res[3]; // get the one that matched
  455. }
  456. }
  457.  
  458. } else {
  459. var res = string.match(rex);
  460. if (res != null) {
  461. img = res[1] || res[2] || res[3]; // get the one that matched
  462. }
  463. }
  464.  
  465. return img;
  466. } else {
  467. return "";
  468. }
  469.  
  470. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement