Advertisement
diagnoze

app7-node

Dec 4th, 2016
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.07 KB | None | 0 0
  1. /**
  2. * Created by ASUS-PC on 11/8/2016.
  3. */
  4. var FeedParser = require('feedparser'),
  5. request = require('request'),
  6. http = require('http'),
  7. https = require('https'),
  8. striptags = require('striptags'),
  9. sizeOf = require('image-size'),
  10. size = require('request-image-size'),
  11. Slack = require('slack-node'),
  12. mysql = require('mysql'),
  13. crypto = require('crypto'),
  14. Emitter = require('tiny-emitter'),
  15. errors = []
  16. ;
  17.  
  18. var emitter = new Emitter();
  19.  
  20. //this is for slack
  21. //webhookUri = "https://hooks.slack.com/services/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";
  22. webhookUri = "";
  23.  
  24. slack = new Slack();
  25. slack.setWebhook(webhookUri);
  26. // end slack
  27.  
  28. // setting mysql config
  29. var connection = mysql.createConnection({
  30. host: "localhost",
  31. user: "root",
  32. password: '',
  33. //user: "intrafeed",
  34. //password: 'Palmerah$$$i9i7',
  35. database: "intrafeed",
  36. charset: 'utf8mb4'
  37. //waitForConnections : true
  38. // multipleStatements: true
  39. });
  40.  
  41. connection.connect(function (err) {
  42. if (err) {
  43. console.log('Error connecting to Db');
  44. return;
  45. }
  46. console.log('Connection established');
  47. });
  48.  
  49. var intrafeedLinks = "https://intrafeed.co:9000/apit/rssChannels";
  50.  
  51. /*
  52. End settings and params
  53. */
  54.  
  55. //fetch('http://rss.detik.com/index.php/detikcom_nasional', 63, '416a5423119a4ec081ca4dd4d948f27f');
  56.  
  57. start();
  58. //5 minutes
  59. setInterval(function () {
  60. start();
  61. }, 5 * 60 * 1000);
  62.  
  63.  
  64. function start() {
  65. var channels = null;
  66.  
  67. request({url: intrafeedLinks, json: true}, function (error, response, body) {
  68. if (!error && response.statusCode == 200) {
  69. console.log(body); // Show the HTML for the Google homepage.
  70. channels = body.channels;
  71. for (i = 0; i < channels.length; i++) {
  72.  
  73. //fetch link
  74. //fetch(channels[i].link, channels[i].channelid, channels[i].token);
  75. emitter.emit('fetch', channels[i].link, channels[i].channelid, channels[i].token);
  76. }
  77. }
  78. });
  79. }
  80.  
  81. emitter.on("fetch", function (link, channelId, channelToken) {
  82. fetch(link, channelId, channelToken);
  83. });
  84.  
  85. function fetch(link, channelId, channelToken) {
  86. var req = request(link);
  87. //var feedparser = new FeedParser();
  88. var feedparser = new FeedParser({addmeta: false, resume_saxerror: false});
  89.  
  90. req.on('error', function (error) {
  91. // handle any request errors
  92. });
  93.  
  94. var itemArray = [];
  95. var countItemArray = 0;
  96.  
  97. req.on('response', function (res) {
  98. var stream = this;
  99.  
  100. if (res.statusCode != 200) {
  101. var txt = "Error Request status Code " + res.statusCode + " link : " + link + " channelid :" + channelId + " error result: " + err;
  102. if (isNaN(errors[channelId])) {
  103. errors[channelId] = 1;
  104. } else {
  105. errors[channelId]++;
  106. }
  107. if (errors[channelId] >= 10) {
  108. slack.webhook({
  109. text: txt
  110. }, function (errs, response) {
  111. // console.log(response);
  112. });
  113. errors[channelId] = 0;
  114. }
  115. console.log("errors counter : " + channelId + " " + errors[channelId]);
  116.  
  117. return this.emit('error', new Error('Bad status code'));
  118. }
  119.  
  120. stream.pipe(feedparser);
  121. });
  122.  
  123. feedparser.on('error', function (err) {
  124. var txt = "Error in feed parser module " + link + " channelid :" + channelId + " error result: " + err;
  125. console.log(txt);
  126. if (isNaN(errors[channelId])) {
  127. errors[channelId] = 1;
  128. } else {
  129. errors[channelId]++;
  130. }
  131.  
  132. if (errors[channelId] >= 10) {
  133. slack.webhook({
  134. text: txt
  135. }, function (errs, response) {
  136. // console.log(response);
  137. });
  138. errors[channelId] = 0;
  139. }
  140. console.log("errors counter : " + channelId + " " + errors[channelId]);
  141. });
  142.  
  143. feedparser.on('readable', function () {
  144. // This is where the action is!
  145. var stream = this
  146. , meta = this.meta // **NOTE** the "meta" is always available in the context of the feedparser instance
  147. , item;
  148.  
  149. while (item = stream.read()) {
  150. ///store(item, link, channelId, channelToken);
  151.  
  152. if (item.date == null) {
  153. item.date = new Date().toString();
  154. // console.log("date is :"+item.date);
  155. // return;
  156. }
  157.  
  158. if (item.pubDate == null) {
  159. item.pubDate = new Date().toString();
  160. // console.log("date is :"+item.date);
  161. //return;
  162. }
  163.  
  164. item.date = new Date(item.date).getTime();
  165.  
  166. //console.log("date is :"+item.date);
  167.  
  168. itemArray.push(item);
  169. //console.log(" -------++----------"+ item +"==============");
  170. //emitter.emit('store', item, link, channelId, channelToken);
  171. countItemArray++;
  172. }
  173. });
  174. // promise start
  175.  
  176. feedparser.on("end", function () {
  177. console.log(" ++++++++" + link + " total : " + countItemArray + "+++++++++");
  178.  
  179. var byDate = itemArray.slice(0);
  180. byDate.sort(function (a, b) {
  181. var x = a.date;
  182. var y = b.date;
  183. return x < y ? -1 : x > y ? 1 : 0;
  184. });
  185.  
  186. //fechingAll
  187. fetchingAll(byDate , link, channelId, channelToken);
  188.  
  189. });
  190. }
  191. function fetchingAll(item, link, channelId, channelToken){
  192. var fetchpromises = item;
  193. var sequence = Promise.resolve()
  194.  
  195. fetchpromises.forEach(function(index,value){
  196. sequence = sequence.then(function(){
  197. return store(index, link, channelId, channelToken)
  198. //console.log(index)
  199. }).then(function(value){
  200.  
  201.  
  202.  
  203.  
  204.  
  205. var size = require('request-image-size');
  206. size(value.data.imagelink, function(err, dimensions, length) {
  207. if(!err){
  208.  
  209. // bro, gimana biar nilai get image dimension.width ini ke update di return mysql.Save() value.data
  210. value.data.imagewidth = dimensions.width ;
  211. value.data.imageheight = dimensions.height;
  212. console.log("insert image size, width:" + value.data.imagewidth + " | height: "+ value.data.imageheight);
  213. value.data;
  214. }
  215. });
  216. // nilai dimensions.width di array value.data gak ke update
  217. return mysqlSave(value.link, value.channelId, value.channelToken, value.data)
  218.  
  219. }).
  220.  
  221. catch(function(err){
  222. console.log(err + ' failed to load!')
  223. })
  224. })
  225. }
  226.  
  227.  
  228. // promise end
  229. emitter.on('store', function (item, link, channelId, channelToken) {
  230. store(item, link, channelId, channelToken);
  231. });
  232.  
  233. function store(item, link, channelId, channelToken) {
  234.  
  235. var linkOri = link;
  236. var linkItem = item.link;
  237. var title = item.title;
  238. var description = item.description;
  239. var pubDate = item.date;
  240. /* var newDate = new Date(pubDate);
  241. var tz = newDate.toString().match(/([-\+][0-9]+)\s/)[1];
  242. var t = newDate.toJSON();
  243. t = t.substring(0, 19) + tz;
  244. t = t.replace("+", "-");*/
  245.  
  246. //console.log("pubDate is :" + pubDate + "link at" + link);
  247. /* if (pubDate == null) {
  248. pubDate = new Date().toString();
  249. }*/
  250. var createdDate = new Date(pubDate).toISOString().slice(0, 19).replace('T', ' ');
  251. //console.log(createdDate);
  252. var author = item.author || "";
  253. var imagewidth = 0;
  254. var imageheight = 0;
  255.  
  256. var isItemImage = item.image;
  257.  
  258. var itemImage = "";
  259. var imagelink = "a";
  260.  
  261. var linkhash = crypto.createHash('sha256').update(linkItem).digest('sha256').toString('hex');
  262.  
  263. if (isItemImage.hasOwnProperty("url") && isItemImage.url != null) {
  264. itemImage = isItemImage.url;
  265. }
  266. if (itemImage.length > 0) {
  267. imagelink = itemImage;
  268. }
  269.  
  270. if (imagelink.length < 5) {
  271. imagelink = getImage(description);
  272. }
  273.  
  274. if (imagelink.length < 5) {
  275. if (item.hasOwnProperty("link") && item.link != null) {
  276. imagelink = item.link;
  277. }
  278. }
  279.  
  280.  
  281. //get from enclosures
  282. if (imagelink.length < 5) {
  283. if (item.hasOwnProperty("enclosures") && item.enclosures != null) {
  284. var itemEnclosures = item.enclosures;
  285. if (itemEnclosures.length > 0) {
  286. if (itemEnclosures[0].hasOwnProperty("url")) {
  287. imagelink = itemEnclosures[0].url;
  288. }
  289. }
  290. }
  291. }
  292.  
  293. //get from tag rss:image
  294. if (imagelink.length < 5) {
  295. if (item.hasOwnProperty("rss:image") && item["rss:image"] != null) {
  296. var imageRss = item["rss:image"];
  297. if (imageRss.hasOwnProperty("url")) {
  298. var imageRssUrl = imageRss["url"];
  299. if (imageRssUrl.hasOwnProperty("#")) {
  300. var imageRssUrlCrash = imageRssUrl["#"];
  301. if (imageRssUrlCrash.length > 1) {
  302. imagelink = imageRss
  303. } else {
  304. var meta = item.meta;
  305. if (meta.hasOwnProperty("image") && meta.image != null) {
  306. var metaImage = meta.image;
  307. if (metaImage.hasOwnProperty("url")) {
  308. imagelink = meta["image"]["url"];
  309. }
  310. }
  311. }
  312. }
  313. }
  314.  
  315. }
  316. }
  317.  
  318. //item meta image url
  319. if (imagelink.length < 5) {
  320. var meta = item.meta;
  321. if (meta != null) {
  322. if (meta.hasOwnProperty("image")) {
  323. var metaImage = meta.image;
  324. if (metaImage.hasOwnProperty("url")) {
  325. imagelink = meta["image"]["url"];
  326. }
  327. }
  328. }
  329. }
  330.  
  331. //get from tag media:content
  332. if (imagelink < 5) {
  333. if (item.hasOwnProperty("media:content") && item["media:content"] != null) {
  334. var mediaContent = item["media:content"];
  335. imagelink = mediaContent.getAttribute("url");
  336. }
  337. }
  338.  
  339. if (imagelink < 5) {
  340. if (item.hasOwnProperty("content:encoded")) {
  341. var contentEncoded = item["content:encoded"];
  342. if (contentEncoded.hasOwnProperty("#")) {
  343. var contentCrash = contentEncoded["#"];
  344. imagelink = getImage(contentCrash);
  345. }
  346. }
  347. }
  348.  
  349. var data = {
  350. title: title,
  351. description: striptags(description),
  352. link: linkItem,
  353. imagelink: imagelink,
  354. imagewidth: imagewidth || 0,
  355. imageheight: imageheight || 0,
  356. createdDate: createdDate,
  357. channelid: channelId,
  358. username: author,
  359. linkhash: linkhash,
  360. active: 1
  361. };
  362.  
  363. if (imagelink.length > 5) {
  364. imagelink = imagelink.replace("<![CDATA[", "").replace("]]>", "");
  365. imagelink = imagelink.replace("'", "").replace('"', "");
  366. imagelink = imagelink.replace("'", "");
  367. imagelink = imagelink.replace('"', "");
  368. imagelink = imagelink.replace("&quot;", "");
  369. imagelink = striptags(imagelink);
  370.  
  371. if (imagelink.match(/.js/)) {
  372. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  373. //mysqlSave(link, channelId, channelToken, data);
  374. } else {
  375. size(imagelink, function (err, dimensions, length) {
  376. if (err) {
  377. slack.webhook({
  378. text: err
  379. }, function (errs, response) {
  380. // console.log(response);
  381. });
  382. return true;
  383. }
  384. if (!err) {
  385. if (dimensions != undefined) {
  386. imagewidth = dimensions.width;
  387. imageheight = dimensions.height;
  388. }
  389.  
  390. data = {
  391. title: title,
  392. description: striptags(description),
  393. link: linkItem,
  394. imagelink: imagelink,
  395. imagewidth: imagewidth || 0,
  396. imageheight: imageheight || 0,
  397. createdDate: createdDate,
  398. channelid: channelId,
  399. username: author,
  400. linkhash: linkhash,
  401. active: 1
  402. };
  403.  
  404. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  405. //mysqlSave(link, channelId, channelToken, data);
  406. }
  407.  
  408.  
  409. });
  410. }
  411.  
  412.  
  413. } else {
  414. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  415. //mysqlSave(link, channelId, channelToken, data);
  416.  
  417. }
  418.  
  419.  
  420. var fetch = {link:link,channelId:channelId,channelToken:channelToken,data:data};
  421. return fetch;
  422. }
  423.  
  424.  
  425. emitter.on('mysql-save', function (link, channelId, channelToken, data) {
  426. // var sql2 = '⁠⁠⁠insert into feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username) SELECT * FROM (SELECT ? as f1, ? as f2, ? as f3, ? as f4, 125 as f5,125 as f6,84 as f7,1 as f8, ? as f9, ? as f10) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? and linkhash= ?) LIMIT 1;';
  427. mysqlSave(link, channelId, channelToken, data);
  428. });
  429.  
  430. function mysqlSave(link, channelId, channelToken, data) {
  431. var id = 0;
  432. var results;
  433. var sql = ' INSERT INTO feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username,createdDate) SELECT * FROM (SELECT ? AS f1,? AS f2, ? AS f3,? AS f4,? AS f5,? AS f6,? AS f7,? AS f8, ? AS f9, ? as f10, ? as f11) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? AND linkhash= ?) LIMIT 1';
  434.  
  435. var arrayValue = [data.title, data.description, data.link, data.imagelink, data.imagewidth, data.imageheight, channelId, 1, data.linkhash, data.username, data.createdDate, channelId, data.linkhash];
  436. var query = connection.query(sql, arrayValue, function (errs, resulr) {
  437. var txt = data.title + " " + data.link + " " + data.description;
  438. if (errs) {
  439. console.log("error dibagian:"+ errs);
  440.  
  441. slack.webhook({
  442. text: txt + errs.toString()
  443. }, function (errs, response) {
  444. // console.log(response);
  445. });
  446. }
  447.  
  448. var imagelink = data.imagelink;
  449. imagelink.replace("&quot;","");
  450. size(imagelink, function (err, dimensions) {
  451. //console.log(dimensions.width, dimensions.height);
  452. if (!err) {
  453. // connection.query('UPDATE feed SET imagewidth = ?, imageheight = ? Where id = ?',[dimensions.width,dimensions.height, resulr.insertId]);
  454. }
  455. });
  456. });
  457.  
  458. }
  459.  
  460.  
  461. function getImage(string) {
  462. if (string != null) {
  463. string = string.replace("<![CDATA[", "").replace("]]>", "");
  464. string = string.replace("CDATA", "");
  465. string = string.replace("<figure>", "");
  466. string = string.replace("</figure>", "");
  467. string = string.replace("\n", "");
  468. string = string.replace("\t", "");
  469.  
  470. var re = /<img[^>]+src="?([^"\s]+)"?[^>]*\/>/g;
  471. var rex = /\ssrc=(?:(?:'([^']*)')|(?:"([^"]*)")|([^\s]*))/i; // match src='a' OR src="a" OR src=a
  472. var results = null;
  473. var img = "";
  474.  
  475. results = re.exec(string);
  476. if (results) {
  477. img = results[1];
  478. if (img.match(/.js/)) {
  479. var res = string.match(rex);
  480. if (res != null) {
  481. img = res[1] || res[2] || res[3]; // get the one that matched
  482. }
  483. }
  484.  
  485. } else {
  486. var res = string.match(rex);
  487. if (res != null) {
  488. img = res[1] || res[2] || res[3]; // get the one that matched
  489. }
  490. }
  491.  
  492. return img;
  493. } else {
  494. return "";
  495. }
  496.  
  497. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement