Advertisement
diagnoze

nodejs

Nov 29th, 2016
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.41 KB | None | 0 0
  1. /**
  2. * Created by ASUS-PC on 11/8/2016.
  3. */
  4. var FeedParser = require('feedparser'),
  5. request = require('request'),
  6. http = require('http'),
  7. https = require('https'),
  8. striptags = require('striptags'),
  9. sizeOf = require('image-size'),
  10. size = require('request-image-size'),
  11. Slack = require('slack-node'),
  12. mysql = require('mysql'),
  13. crypto = require('crypto'),
  14. Emitter = require('tiny-emitter'),
  15. errors = []
  16. ;
  17.  
  18. var emitter = new Emitter();
  19.  
  20. //this is for slack
  21. //webhookUri = "https://hooks.slack.com/services/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";
  22. webhookUri = "";
  23.  
  24. slack = new Slack();
  25. slack.setWebhook(webhookUri);
  26. // end slack
  27.  
  28. // setting mysql config
  29. var connection = mysql.createConnection({
  30. host: "localhost",
  31. user: "root",
  32. password: '',
  33. //user: "intrafeed",
  34. //password: 'Palmerah$$$i9i7',
  35. database: "intrafeed",
  36. charset: 'utf8mb4'
  37. //waitForConnections : true
  38. // multipleStatements: true
  39. });
  40.  
  41. connection.connect(function (err) {
  42. if (err) {
  43. console.log('Error connecting to Db');
  44. return;
  45. }
  46. console.log('Connection established');
  47. });
  48.  
  49. var intrafeedLinks = "https://intrafeed.co:9000/apit/rssChannels";
  50.  
  51. /*
  52. End settings and params
  53. */
  54.  
  55. //fetch('http://rss.detik.com/index.php/detikcom_nasional', 63, '416a5423119a4ec081ca4dd4d948f27f');
  56.  
  57. start();
  58. //5 minutes
  59. setInterval(function () {
  60. start();
  61. }, 5 * 60 * 1000);
  62.  
  63.  
  64. function start() {
  65. var channels = null;
  66.  
  67. request({url: intrafeedLinks, json: true}, function (error, response, body) {
  68. if (!error && response.statusCode == 200) {
  69. console.log(body); // Show the HTML for the Google homepage.
  70. channels = body.channels;
  71. for (i = 0; i < channels.length; i++) {
  72.  
  73. //fetch link
  74. //fetch(channels[i].link, channels[i].channelid, channels[i].token);
  75. emitter.emit('fetch', channels[i].link, channels[i].channelid, channels[i].token);
  76. }
  77. }
  78. });
  79. }
  80.  
  81. emitter.on("fetch", function (link, channelId, channelToken) {
  82. fetch(link, channelId, channelToken);
  83. });
  84.  
  85. function fetch(link, channelId, channelToken) {
  86. var req = request(link);
  87. //var feedparser = new FeedParser();
  88. var feedparser = new FeedParser({addmeta: false, resume_saxerror: false});
  89.  
  90. req.on('error', function (error) {
  91. // handle any request errors
  92. });
  93.  
  94. var itemArray = [];
  95. var countItemArray = 0;
  96.  
  97. req.on('response', function (res) {
  98. var stream = this;
  99.  
  100. if (res.statusCode != 200) {
  101. var txt = "Error Request status Code " + res.statusCode + " link : " + link + " channelid :" + channelId + " error result: " + err;
  102. if (isNaN(errors[channelId])) {
  103. errors[channelId] = 1;
  104. } else {
  105. errors[channelId]++;
  106. }
  107. if (errors[channelId] >= 10) {
  108. slack.webhook({
  109. text: txt
  110. }, function (errs, response) {
  111. // console.log(response);
  112. });
  113. errors[channelId] = 0;
  114. }
  115. console.log("errors counter : " + channelId + " " + errors[channelId]);
  116.  
  117. return this.emit('error', new Error('Bad status code'));
  118. }
  119.  
  120. stream.pipe(feedparser);
  121. });
  122.  
  123. feedparser.on('error', function (err) {
  124. var txt = "Error in feed parser module " + link + " channelid :" + channelId + " error result: " + err;
  125. console.log(txt);
  126. if (isNaN(errors[channelId])) {
  127. errors[channelId] = 1;
  128. } else {
  129. errors[channelId]++;
  130. }
  131.  
  132. if (errors[channelId] >= 10) {
  133. slack.webhook({
  134. text: txt
  135. }, function (errs, response) {
  136. // console.log(response);
  137. });
  138. errors[channelId] = 0;
  139. }
  140. console.log("errors counter : " + channelId + " " + errors[channelId]);
  141. });
  142.  
  143. feedparser.on('readable', function () {
  144. // This is where the action is!
  145. var stream = this
  146. , meta = this.meta // **NOTE** the "meta" is always available in the context of the feedparser instance
  147. , item;
  148.  
  149. while (item = stream.read()) {
  150. ///store(item, link, channelId, channelToken);
  151.  
  152. if (item.date == null) {
  153. item.date = new Date().toString();
  154. // console.log("date is :"+item.date);
  155. // return;
  156. }
  157.  
  158. if (item.pubDate == null) {
  159. item.pubDate = new Date().toString();
  160. // console.log("date is :"+item.date);
  161. //return;
  162. }
  163.  
  164. item.date = new Date(item.date).getTime();
  165.  
  166. //console.log("date is :"+item.date);
  167.  
  168. itemArray.push(item);
  169. //console.log(" -------++----------"+ item +"==============");
  170. //emitter.emit('store', item, link, channelId, channelToken);
  171. countItemArray++;
  172. }
  173. });
  174. // promise start
  175.  
  176. feedparser.on("end", function () {
  177. console.log(" ++++++++" + link + " total : " + countItemArray + "+++++++++");
  178.  
  179. var byDate = itemArray.slice(0);
  180. byDate.sort(function (a, b) {
  181. var x = a.date;
  182. var y = b.date;
  183. return x < y ? -1 : x > y ? 1 : 0;
  184. });
  185.  
  186. //fechingAll
  187. fetchingAll(byDate , link, channelId, channelToken);
  188.  
  189. });
  190. }
  191. function fetchingAll(item, link, channelId, channelToken){
  192. var fetchpromises = item;
  193. var sequence = Promise.resolve()
  194.  
  195. fetchpromises.forEach(function(index,value){
  196. sequence = sequence.then(function(){
  197. return store(index, link, channelId, channelToken)
  198. //console.log(index)
  199. }).then(function(value){
  200.  
  201. //var dimensions = sizeOf(value.data[imagelink]);
  202. // console.log( "ini link image:"+ value.data);
  203. // success method
  204. console.log(value + '.' + link + ' success to load!')
  205. return mysqlSave(value.link, value.channelId, value.channelToken, value.data)
  206. //console.log(value);
  207.  
  208. }).
  209.  
  210. catch(function(err){
  211. console.log(err + ' failed to load!')
  212. })
  213. })
  214. }
  215.  
  216.  
  217. // promise end
  218. emitter.on('store', function (item, link, channelId, channelToken) {
  219. store(item, link, channelId, channelToken);
  220. });
  221.  
  222. function store(item, link, channelId, channelToken) {
  223.  
  224. var linkOri = link;
  225. var linkItem = item.link;
  226. var title = item.title;
  227. var description = item.description;
  228. var pubDate = item.date;
  229. /* var newDate = new Date(pubDate);
  230. var tz = newDate.toString().match(/([-\+][0-9]+)\s/)[1];
  231. var t = newDate.toJSON();
  232. t = t.substring(0, 19) + tz;
  233. t = t.replace("+", "-");*/
  234.  
  235. //console.log("pubDate is :" + pubDate + "link at" + link);
  236. /* if (pubDate == null) {
  237. pubDate = new Date().toString();
  238. }*/
  239. var createdDate = new Date(pubDate).toISOString().slice(0, 19).replace('T', ' ');
  240. //console.log(createdDate);
  241. var author = item.author || "";
  242. var imagewidth = 0;
  243. var imageheight = 0;
  244.  
  245. var isItemImage = item.image;
  246.  
  247. var itemImage = "";
  248. var imagelink = "a";
  249.  
  250. var linkhash = crypto.createHash('sha256').update(linkItem).digest('sha256').toString('hex');
  251.  
  252. if (isItemImage.hasOwnProperty("url") && isItemImage.url != null) {
  253. itemImage = isItemImage.url;
  254. }
  255. if (itemImage.length > 0) {
  256. imagelink = itemImage;
  257. }
  258.  
  259. if (imagelink.length < 5) {
  260. imagelink = getImage(description);
  261. }
  262.  
  263. if (imagelink.length < 5) {
  264. if (item.hasOwnProperty("link") && item.link != null) {
  265. imagelink = item.link;
  266. }
  267. }
  268.  
  269.  
  270. //get from enclosures
  271. if (imagelink.length < 5) {
  272. if (item.hasOwnProperty("enclosures") && item.enclosures != null) {
  273. var itemEnclosures = item.enclosures;
  274. if (itemEnclosures.length > 0) {
  275. if (itemEnclosures[0].hasOwnProperty("url")) {
  276. imagelink = itemEnclosures[0].url;
  277. }
  278. }
  279. }
  280. }
  281.  
  282. //get from tag rss:image
  283. if (imagelink.length < 5) {
  284. if (item.hasOwnProperty("rss:image") && item["rss:image"] != null) {
  285. var imageRss = item["rss:image"];
  286. if (imageRss.hasOwnProperty("url")) {
  287. var imageRssUrl = imageRss["url"];
  288. if (imageRssUrl.hasOwnProperty("#")) {
  289. var imageRssUrlCrash = imageRssUrl["#"];
  290. if (imageRssUrlCrash.length > 1) {
  291. imagelink = imageRss
  292. } else {
  293. var meta = item.meta;
  294. if (meta.hasOwnProperty("image") && meta.image != null) {
  295. var metaImage = meta.image;
  296. if (metaImage.hasOwnProperty("url")) {
  297. imagelink = meta["image"]["url"];
  298. }
  299. }
  300. }
  301. }
  302. }
  303.  
  304. }
  305. }
  306.  
  307. //item meta image url
  308. if (imagelink.length < 5) {
  309. var meta = item.meta;
  310. if (meta != null) {
  311. if (meta.hasOwnProperty("image")) {
  312. var metaImage = meta.image;
  313. if (metaImage.hasOwnProperty("url")) {
  314. imagelink = meta["image"]["url"];
  315. }
  316. }
  317. }
  318. }
  319.  
  320. //get from tag media:content
  321. if (imagelink < 5) {
  322. if (item.hasOwnProperty("media:content") && item["media:content"] != null) {
  323. var mediaContent = item["media:content"];
  324. imagelink = mediaContent.getAttribute("url");
  325. }
  326. }
  327.  
  328. if (imagelink < 5) {
  329. if (item.hasOwnProperty("content:encoded")) {
  330. var contentEncoded = item["content:encoded"];
  331. if (contentEncoded.hasOwnProperty("#")) {
  332. var contentCrash = contentEncoded["#"];
  333. imagelink = getImage(contentCrash);
  334. }
  335. }
  336. }
  337.  
  338. var data = {
  339. title: title,
  340. description: striptags(description),
  341. link: linkItem,
  342. imagelink: imagelink,
  343. imagewidth: imagewidth || 0,
  344. imageheight: imageheight || 0,
  345. createdDate: createdDate,
  346. channelid: channelId,
  347. username: author,
  348. linkhash: linkhash,
  349. active: 1
  350. };
  351.  
  352. if (imagelink.length > 5) {
  353. imagelink = imagelink.replace("<![CDATA[", "").replace("]]>", "");
  354. imagelink = imagelink.replace("'", "").replace('"', "");
  355. imagelink = imagelink.replace("'", "");
  356. imagelink = imagelink.replace('"', "");
  357. imagelink = imagelink.replace("&quot;", "");
  358. imagelink = striptags(imagelink);
  359.  
  360. if (imagelink.match(/.js/)) {
  361. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  362. //mysqlSave(link, channelId, channelToken, data);
  363. } else {
  364. size(imagelink, function (err, dimensions, length) {
  365. if (err) {
  366. slack.webhook({
  367. text: err
  368. }, function (errs, response) {
  369. // console.log(response);
  370. });
  371. return true;
  372. }
  373. if (!err) {
  374. if (dimensions != undefined) {
  375. imagewidth = dimensions.width;
  376. imageheight = dimensions.height;
  377. }
  378.  
  379. data = {
  380. title: title,
  381. description: striptags(description),
  382. link: linkItem,
  383. imagelink: imagelink,
  384. imagewidth: imagewidth || 0,
  385. imageheight: imageheight || 0,
  386. createdDate: createdDate,
  387. channelid: channelId,
  388. username: author,
  389. linkhash: linkhash,
  390. active: 1
  391. };
  392.  
  393. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  394. //mysqlSave(link, channelId, channelToken, data);
  395. }
  396.  
  397.  
  398. });
  399. }
  400.  
  401.  
  402. } else {
  403. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  404. //mysqlSave(link, channelId, channelToken, data);
  405.  
  406. }
  407.  
  408.  
  409. var fetch = {link:link,channelId:channelId,channelToken:channelToken,data:data};
  410. return fetch;
  411. }
  412.  
  413.  
  414. emitter.on('mysql-save', function (link, channelId, channelToken, data) {
  415. // var sql2 = '⁠⁠⁠insert into feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username) SELECT * FROM (SELECT ? as f1, ? as f2, ? as f3, ? as f4, 125 as f5,125 as f6,84 as f7,1 as f8, ? as f9, ? as f10) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? and linkhash= ?) LIMIT 1;';
  416. mysqlSave(link, channelId, channelToken, data);
  417. });
  418.  
  419. function mysqlSave(link, channelId, channelToken, data) {
  420. var id = 0;
  421. var results;
  422. var sql = ' INSERT INTO feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username,createdDate) SELECT * FROM (SELECT ? AS f1,? AS f2, ? AS f3,? AS f4,? AS f5,? AS f6,? AS f7,? AS f8, ? AS f9, ? as f10, ? as f11) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? AND linkhash= ?) LIMIT 1';
  423.  
  424. var arrayValue = [data.title, data.description, data.link, data.imagelink, data.imagewidth, data.imageheight, channelId, 1, data.linkhash, data.username, data.createdDate, channelId, data.linkhash];
  425. var query = connection.query(sql, arrayValue, function (errs, resulr) {
  426. var txt = data.title + " " + data.link + " " + data.description;
  427. if (errs) {
  428. console.log("error dibagian:"+ errs);
  429. //console.log(arrayValue.toString());
  430. //console.log(query);
  431.  
  432. slack.webhook({
  433. text: txt + errs.toString()
  434. }, function (errs, response) {
  435. // console.log(response);
  436. });
  437. }
  438.  
  439. //console.log('res ID:'+ results.id);
  440. size(data.imagelink, function (err, dimensions, length) {
  441. if (err) {
  442. slack.webhook({
  443. text: err
  444. }, function (errs, response) {
  445. // console.log(response);
  446. });
  447. return true;
  448. }
  449. //console.log("width:" + dimensions.width + "height:" + dimensions.height);
  450. if (!err) {
  451. //imagewidth = dimensions.width;
  452. //imageheight = dimensions.height;
  453. connection.query('UPDATE feed SET imagewidth = ?, imageheight = ? Where id = ?',[dimensions.width,dimensions.height, resulr.insertId]);
  454. }
  455.  
  456.  
  457. });
  458. //console.log("LAST ID: " + resulr.insertId);
  459. //id = resulr.insertId;
  460. });
  461.  
  462. //results = {link:link,channelId:channelId,channelToken:channelToken,data:data,id:id}
  463.  
  464. //return results;
  465.  
  466. }
  467.  
  468.  
  469. function getImage(string) {
  470. if (string != null) {
  471. string = string.replace("<![CDATA[", "").replace("]]>", "");
  472. string = string.replace("CDATA", "");
  473. string = string.replace("<figure>", "");
  474. string = string.replace("</figure>", "");
  475. string = string.replace("\n", "");
  476. string = string.replace("\t", "");
  477.  
  478. var re = /<img[^>]+src="?([^"\s]+)"?[^>]*\/>/g;
  479. var rex = /\ssrc=(?:(?:'([^']*)')|(?:"([^"]*)")|([^\s]*))/i; // match src='a' OR src="a" OR src=a
  480. var results = null;
  481. var img = "";
  482.  
  483. results = re.exec(string);
  484. if (results) {
  485. img = results[1];
  486. if (img.match(/.js/)) {
  487. var res = string.match(rex);
  488. if (res != null) {
  489. img = res[1] || res[2] || res[3]; // get the one that matched
  490. }
  491. }
  492.  
  493. } else {
  494. var res = string.match(rex);
  495. if (res != null) {
  496. img = res[1] || res[2] || res[3]; // get the one that matched
  497. }
  498. }
  499.  
  500. return img;
  501. } else {
  502. return "";
  503. }
  504.  
  505. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement